486 lines
18 KiB
Python
486 lines
18 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
系统优化模块
|
||
|
|
包含性能优化、安全优化、流量保护、成本优化、稳定性优化
|
||
|
|
"""
|
||
|
|
|
||
|
|
import logging
|
||
|
|
import time
|
||
|
|
import threading
|
||
|
|
from typing import Dict, List, Optional, Any
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from collections import defaultdict, deque
|
||
|
|
import psutil
|
||
|
|
import redis
|
||
|
|
|
||
|
|
from ..config.config import Config
|
||
|
|
from .database import db_manager
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
class SystemOptimizer:
|
||
|
|
"""系统优化器"""
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
self.redis_client = None
|
||
|
|
self._init_redis()
|
||
|
|
|
||
|
|
# 性能监控
|
||
|
|
self.performance_metrics = deque(maxlen=1000)
|
||
|
|
self.request_counts = defaultdict(int)
|
||
|
|
self.response_times = deque(maxlen=1000)
|
||
|
|
|
||
|
|
# 流量控制
|
||
|
|
self.rate_limits = {
|
||
|
|
"per_minute": 60, # 每分钟最大请求数
|
||
|
|
"per_hour": 1000, # 每小时最大请求数
|
||
|
|
"per_day": 10000 # 每天最大请求数
|
||
|
|
}
|
||
|
|
|
||
|
|
# 成本控制
|
||
|
|
self.cost_limits = {
|
||
|
|
"daily": 100.0, # 每日成本限制(元)
|
||
|
|
"hourly": 20.0, # 每小时成本限制(元)
|
||
|
|
"per_request": 0.1 # 单次请求成本限制(元)
|
||
|
|
}
|
||
|
|
|
||
|
|
# 安全设置
|
||
|
|
self.security_settings = {
|
||
|
|
"max_input_length": 10000, # 最大输入长度
|
||
|
|
"max_output_length": 5000, # 最大输出长度
|
||
|
|
"blocked_keywords": ["恶意", "攻击", "病毒"], # 屏蔽关键词
|
||
|
|
"max_concurrent_users": 50 # 最大并发用户数(调整为更合理的值)
|
||
|
|
}
|
||
|
|
|
||
|
|
# 启动监控线程
|
||
|
|
self._start_monitoring()
|
||
|
|
|
||
|
|
def _init_redis(self):
|
||
|
|
"""初始化Redis连接"""
|
||
|
|
try:
|
||
|
|
self.redis_client = redis.Redis(
|
||
|
|
host='43.134.68.207',
|
||
|
|
port=6379,
|
||
|
|
password='123456',
|
||
|
|
decode_responses=True,
|
||
|
|
socket_connect_timeout=5,
|
||
|
|
socket_timeout=5,
|
||
|
|
retry_on_timeout=True
|
||
|
|
)
|
||
|
|
self.redis_client.ping()
|
||
|
|
logger.info("系统优化Redis连接成功")
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"系统优化Redis连接失败: {e}")
|
||
|
|
self.redis_client = None
|
||
|
|
|
||
|
|
def _start_monitoring(self):
|
||
|
|
"""启动监控线程"""
|
||
|
|
try:
|
||
|
|
# 检查是否启用系统监控
|
||
|
|
enable_monitoring = Config.get_config().get('system_monitoring', True)
|
||
|
|
if not enable_monitoring:
|
||
|
|
logger.info("系统监控已禁用")
|
||
|
|
return
|
||
|
|
|
||
|
|
monitor_thread = threading.Thread(target=self._monitor_system, daemon=True)
|
||
|
|
monitor_thread.start()
|
||
|
|
logger.info("系统监控线程已启动")
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"启动监控线程失败: {e}")
|
||
|
|
|
||
|
|
def _monitor_system(self):
|
||
|
|
"""系统监控循环"""
|
||
|
|
while True:
|
||
|
|
try:
|
||
|
|
self._collect_metrics()
|
||
|
|
self._check_performance()
|
||
|
|
self._check_security()
|
||
|
|
time.sleep(60) # 每分钟检查一次
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"系统监控异常: {e}")
|
||
|
|
time.sleep(60)
|
||
|
|
|
||
|
|
def _collect_metrics(self):
|
||
|
|
"""收集系统指标"""
|
||
|
|
try:
|
||
|
|
# CPU使用率
|
||
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
||
|
|
|
||
|
|
# 内存使用率
|
||
|
|
memory = psutil.virtual_memory()
|
||
|
|
memory_percent = memory.percent
|
||
|
|
|
||
|
|
# 磁盘使用率
|
||
|
|
disk = psutil.disk_usage('/')
|
||
|
|
disk_percent = disk.percent
|
||
|
|
|
||
|
|
# 网络IO
|
||
|
|
network = psutil.net_io_counters()
|
||
|
|
|
||
|
|
# 只统计与我们的应用相关的连接(避免统计系统所有连接)
|
||
|
|
app_connections = 0
|
||
|
|
try:
|
||
|
|
# 获取当前进程的网络连接
|
||
|
|
current_process = psutil.Process()
|
||
|
|
app_connections = len(current_process.connections())
|
||
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||
|
|
# 如果无法获取当前进程连接,使用一个合理的估算值
|
||
|
|
app_connections = 5 # 默认估算值
|
||
|
|
|
||
|
|
metrics = {
|
||
|
|
"timestamp": datetime.now().isoformat(),
|
||
|
|
"cpu_percent": cpu_percent,
|
||
|
|
"memory_percent": memory_percent,
|
||
|
|
"disk_percent": disk_percent,
|
||
|
|
"network_bytes_sent": network.bytes_sent,
|
||
|
|
"network_bytes_recv": network.bytes_recv,
|
||
|
|
"active_connections": app_connections
|
||
|
|
}
|
||
|
|
|
||
|
|
self.performance_metrics.append(metrics)
|
||
|
|
|
||
|
|
# 保存到Redis
|
||
|
|
if self.redis_client:
|
||
|
|
self.redis_client.lpush(
|
||
|
|
"system_metrics",
|
||
|
|
str(metrics)
|
||
|
|
)
|
||
|
|
self.redis_client.ltrim("system_metrics", 0, 999) # 保留最近1000条
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"收集系统指标失败: {e}")
|
||
|
|
|
||
|
|
def _check_performance(self):
|
||
|
|
"""检查性能指标"""
|
||
|
|
try:
|
||
|
|
if len(self.performance_metrics) < 5:
|
||
|
|
return
|
||
|
|
|
||
|
|
recent_metrics = list(self.performance_metrics)[-5:]
|
||
|
|
|
||
|
|
# 检查CPU使用率
|
||
|
|
avg_cpu = sum(m["cpu_percent"] for m in recent_metrics) / len(recent_metrics)
|
||
|
|
if avg_cpu > 80:
|
||
|
|
self._trigger_performance_alert("high_cpu", f"CPU使用率过高: {avg_cpu:.1f}%")
|
||
|
|
|
||
|
|
# 检查内存使用率
|
||
|
|
avg_memory = sum(m["memory_percent"] for m in recent_metrics) / len(recent_metrics)
|
||
|
|
if avg_memory > 85:
|
||
|
|
self._trigger_performance_alert("high_memory", f"内存使用率过高: {avg_memory:.1f}%")
|
||
|
|
|
||
|
|
# 检查磁盘使用率
|
||
|
|
avg_disk = sum(m["disk_percent"] for m in recent_metrics) / len(recent_metrics)
|
||
|
|
if avg_disk > 90:
|
||
|
|
self._trigger_performance_alert("high_disk", f"磁盘使用率过高: {avg_disk:.1f}%")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"检查性能指标失败: {e}")
|
||
|
|
|
||
|
|
def _check_security(self):
|
||
|
|
"""检查安全指标"""
|
||
|
|
try:
|
||
|
|
# 检查并发连接数(使用滑动窗口避免误报)
|
||
|
|
if len(self.performance_metrics) >= 3: # 至少需要3个数据点
|
||
|
|
recent_metrics = list(self.performance_metrics)[-3:] # 最近3个数据点
|
||
|
|
avg_connections = sum(m.get("active_connections", 0) for m in recent_metrics) / len(recent_metrics)
|
||
|
|
|
||
|
|
# 只有当平均连接数持续过高时才触发预警
|
||
|
|
if avg_connections > self.security_settings["max_concurrent_users"]:
|
||
|
|
self._trigger_security_alert("high_connections", f"平均并发连接数过高: {avg_connections:.1f}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"检查安全指标失败: {e}")
|
||
|
|
|
||
|
|
def _trigger_performance_alert(self, alert_type: str, message: str):
|
||
|
|
"""触发性能预警"""
|
||
|
|
try:
|
||
|
|
from ..core.models import Alert
|
||
|
|
|
||
|
|
with db_manager.get_session() as session:
|
||
|
|
alert = Alert(
|
||
|
|
rule_name=f"性能监控_{alert_type}",
|
||
|
|
alert_type=alert_type,
|
||
|
|
level="warning",
|
||
|
|
severity="medium",
|
||
|
|
message=message,
|
||
|
|
is_active=True,
|
||
|
|
created_at=datetime.now()
|
||
|
|
)
|
||
|
|
session.add(alert)
|
||
|
|
session.commit()
|
||
|
|
|
||
|
|
logger.warning(f"性能预警: {message}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"触发性能预警失败: {e}")
|
||
|
|
|
||
|
|
def _trigger_security_alert(self, alert_type: str, message: str):
|
||
|
|
"""触发安全预警"""
|
||
|
|
try:
|
||
|
|
from ..core.models import Alert
|
||
|
|
|
||
|
|
with db_manager.get_session() as session:
|
||
|
|
alert = Alert(
|
||
|
|
rule_name=f"安全监控_{alert_type}",
|
||
|
|
alert_type=alert_type,
|
||
|
|
level="error",
|
||
|
|
severity="high",
|
||
|
|
message=message,
|
||
|
|
is_active=True,
|
||
|
|
created_at=datetime.now()
|
||
|
|
)
|
||
|
|
session.add(alert)
|
||
|
|
session.commit()
|
||
|
|
|
||
|
|
logger.warning(f"安全预警: {message}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"触发安全预警失败: {e}")
|
||
|
|
|
||
|
|
def check_rate_limit(self, user_id: str) -> bool:
|
||
|
|
"""检查用户请求频率限制"""
|
||
|
|
try:
|
||
|
|
if not self.redis_client:
|
||
|
|
return True # Redis不可用时允许请求
|
||
|
|
|
||
|
|
now = datetime.now()
|
||
|
|
minute_key = f"rate_limit:{user_id}:{now.strftime('%Y%m%d%H%M')}"
|
||
|
|
hour_key = f"rate_limit:{user_id}:{now.strftime('%Y%m%d%H')}"
|
||
|
|
day_key = f"rate_limit:{user_id}:{now.strftime('%Y%m%d')}"
|
||
|
|
|
||
|
|
# 检查每分钟限制
|
||
|
|
minute_count = self.redis_client.get(minute_key) or 0
|
||
|
|
if int(minute_count) >= self.rate_limits["per_minute"]:
|
||
|
|
logger.warning(f"用户 {user_id} 触发每分钟频率限制")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# 检查每小时限制
|
||
|
|
hour_count = self.redis_client.get(hour_key) or 0
|
||
|
|
if int(hour_count) >= self.rate_limits["per_hour"]:
|
||
|
|
logger.warning(f"用户 {user_id} 触发每小时频率限制")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# 检查每日限制
|
||
|
|
day_count = self.redis_client.get(day_key) or 0
|
||
|
|
if int(day_count) >= self.rate_limits["per_day"]:
|
||
|
|
logger.warning(f"用户 {user_id} 触发每日频率限制")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# 增加计数
|
||
|
|
self.redis_client.incr(minute_key)
|
||
|
|
self.redis_client.incr(hour_key)
|
||
|
|
self.redis_client.incr(day_key)
|
||
|
|
|
||
|
|
# 设置过期时间
|
||
|
|
self.redis_client.expire(minute_key, 60)
|
||
|
|
self.redis_client.expire(hour_key, 3600)
|
||
|
|
self.redis_client.expire(day_key, 86400)
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"检查频率限制失败: {e}")
|
||
|
|
return True # 出错时允许请求
|
||
|
|
|
||
|
|
def check_input_security(self, user_input: str) -> Dict[str, Any]:
|
||
|
|
"""检查输入安全性"""
|
||
|
|
try:
|
||
|
|
result = {
|
||
|
|
"is_safe": True,
|
||
|
|
"blocked_keywords": [],
|
||
|
|
"length_check": True,
|
||
|
|
"message": "输入安全"
|
||
|
|
}
|
||
|
|
|
||
|
|
# 检查长度
|
||
|
|
if len(user_input) > self.security_settings["max_input_length"]:
|
||
|
|
result["is_safe"] = False
|
||
|
|
result["length_check"] = False
|
||
|
|
result["message"] = f"输入长度超过限制: {len(user_input)} > {self.security_settings['max_input_length']}"
|
||
|
|
return result
|
||
|
|
|
||
|
|
# 检查屏蔽关键词
|
||
|
|
blocked_keywords = []
|
||
|
|
for keyword in self.security_settings["blocked_keywords"]:
|
||
|
|
if keyword in user_input:
|
||
|
|
blocked_keywords.append(keyword)
|
||
|
|
|
||
|
|
if blocked_keywords:
|
||
|
|
result["is_safe"] = False
|
||
|
|
result["blocked_keywords"] = blocked_keywords
|
||
|
|
result["message"] = f"包含屏蔽关键词: {', '.join(blocked_keywords)}"
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"检查输入安全性失败: {e}")
|
||
|
|
return {
|
||
|
|
"is_safe": True,
|
||
|
|
"blocked_keywords": [],
|
||
|
|
"length_check": True,
|
||
|
|
"message": "安全检查异常,允许通过"
|
||
|
|
}
|
||
|
|
|
||
|
|
def check_cost_limit(self, estimated_cost: float) -> bool:
|
||
|
|
"""检查成本限制"""
|
||
|
|
try:
|
||
|
|
if not self.redis_client:
|
||
|
|
return True # Redis不可用时允许请求
|
||
|
|
|
||
|
|
now = datetime.now()
|
||
|
|
hour_key = f"cost_limit:{now.strftime('%Y%m%d%H')}"
|
||
|
|
day_key = f"cost_limit:{now.strftime('%Y%m%d')}"
|
||
|
|
|
||
|
|
# 检查单次请求成本
|
||
|
|
if estimated_cost > self.cost_limits["per_request"]:
|
||
|
|
logger.warning(f"单次请求成本超限: {estimated_cost:.4f} > {self.cost_limits['per_request']}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# 检查每小时成本
|
||
|
|
hour_cost = float(self.redis_client.get(hour_key) or 0)
|
||
|
|
if hour_cost + estimated_cost > self.cost_limits["hourly"]:
|
||
|
|
logger.warning(f"每小时成本超限: {hour_cost + estimated_cost:.4f} > {self.cost_limits['hourly']}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# 检查每日成本
|
||
|
|
day_cost = float(self.redis_client.get(day_key) or 0)
|
||
|
|
if day_cost + estimated_cost > self.cost_limits["daily"]:
|
||
|
|
logger.warning(f"每日成本超限: {day_cost + estimated_cost:.4f} > {self.cost_limits['daily']}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# 增加成本计数
|
||
|
|
self.redis_client.incrbyfloat(hour_key, estimated_cost)
|
||
|
|
self.redis_client.incrbyfloat(day_key, estimated_cost)
|
||
|
|
|
||
|
|
# 设置过期时间
|
||
|
|
self.redis_client.expire(hour_key, 3600)
|
||
|
|
self.redis_client.expire(day_key, 86400)
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"检查成本限制失败: {e}")
|
||
|
|
return True # 出错时允许请求
|
||
|
|
|
||
|
|
def optimize_response_time(self, response_time: float) -> Dict[str, Any]:
|
||
|
|
"""优化响应时间"""
|
||
|
|
try:
|
||
|
|
self.response_times.append(response_time)
|
||
|
|
|
||
|
|
# 计算平均响应时间
|
||
|
|
if len(self.response_times) >= 10:
|
||
|
|
avg_response_time = sum(self.response_times) / len(self.response_times)
|
||
|
|
|
||
|
|
optimization_suggestions = []
|
||
|
|
|
||
|
|
if avg_response_time > 5.0:
|
||
|
|
optimization_suggestions.append("考虑增加缓存层")
|
||
|
|
|
||
|
|
if avg_response_time > 10.0:
|
||
|
|
optimization_suggestions.append("考虑优化数据库查询")
|
||
|
|
|
||
|
|
if avg_response_time > 15.0:
|
||
|
|
optimization_suggestions.append("考虑使用异步处理")
|
||
|
|
|
||
|
|
return {
|
||
|
|
"avg_response_time": avg_response_time,
|
||
|
|
"suggestions": optimization_suggestions,
|
||
|
|
"performance_level": self._get_performance_level(avg_response_time)
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
"avg_response_time": response_time,
|
||
|
|
"suggestions": [],
|
||
|
|
"performance_level": "insufficient_data"
|
||
|
|
}
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"优化响应时间失败: {e}")
|
||
|
|
return {}
|
||
|
|
|
||
|
|
def _get_performance_level(self, response_time: float) -> str:
|
||
|
|
"""获取性能等级"""
|
||
|
|
if response_time < 2.0:
|
||
|
|
return "excellent"
|
||
|
|
elif response_time < 5.0:
|
||
|
|
return "good"
|
||
|
|
elif response_time < 10.0:
|
||
|
|
return "fair"
|
||
|
|
else:
|
||
|
|
return "poor"
|
||
|
|
|
||
|
|
def get_system_status(self) -> Dict[str, Any]:
|
||
|
|
"""获取系统状态"""
|
||
|
|
try:
|
||
|
|
if not self.performance_metrics:
|
||
|
|
return {"status": "no_data"}
|
||
|
|
|
||
|
|
latest_metrics = self.performance_metrics[-1]
|
||
|
|
|
||
|
|
# 计算趋势
|
||
|
|
if len(self.performance_metrics) >= 5:
|
||
|
|
recent_cpu = [m["cpu_percent"] for m in list(self.performance_metrics)[-5:]]
|
||
|
|
recent_memory = [m["memory_percent"] for m in list(self.performance_metrics)[-5:]]
|
||
|
|
|
||
|
|
cpu_trend = "stable"
|
||
|
|
if recent_cpu[-1] > recent_cpu[0] + 10:
|
||
|
|
cpu_trend = "increasing"
|
||
|
|
elif recent_cpu[-1] < recent_cpu[0] - 10:
|
||
|
|
cpu_trend = "decreasing"
|
||
|
|
|
||
|
|
memory_trend = "stable"
|
||
|
|
if recent_memory[-1] > recent_memory[0] + 5:
|
||
|
|
memory_trend = "increasing"
|
||
|
|
elif recent_memory[-1] < recent_memory[0] - 5:
|
||
|
|
memory_trend = "decreasing"
|
||
|
|
else:
|
||
|
|
cpu_trend = "insufficient_data"
|
||
|
|
memory_trend = "insufficient_data"
|
||
|
|
|
||
|
|
return {
|
||
|
|
"status": "healthy",
|
||
|
|
"cpu_percent": latest_metrics["cpu_percent"],
|
||
|
|
"memory_percent": latest_metrics["memory_percent"],
|
||
|
|
"disk_percent": latest_metrics["disk_percent"],
|
||
|
|
"active_connections": latest_metrics["active_connections"],
|
||
|
|
"cpu_trend": cpu_trend,
|
||
|
|
"memory_trend": memory_trend,
|
||
|
|
"timestamp": latest_metrics["timestamp"]
|
||
|
|
}
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"获取系统状态失败: {e}")
|
||
|
|
return {"status": "error", "message": str(e)}
|
||
|
|
|
||
|
|
def cleanup_old_metrics(self, days: int = 7) -> int:
|
||
|
|
"""清理旧指标数据"""
|
||
|
|
try:
|
||
|
|
if not self.redis_client:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
cutoff_time = (datetime.now() - timedelta(days=days)).timestamp()
|
||
|
|
|
||
|
|
# 清理系统指标
|
||
|
|
removed_count = self.redis_client.zremrangebyscore(
|
||
|
|
"system_metrics",
|
||
|
|
0,
|
||
|
|
cutoff_time
|
||
|
|
)
|
||
|
|
|
||
|
|
# 清理频率限制数据
|
||
|
|
rate_limit_keys = self.redis_client.keys("rate_limit:*")
|
||
|
|
for key in rate_limit_keys:
|
||
|
|
self.redis_client.delete(key)
|
||
|
|
|
||
|
|
# 清理成本限制数据
|
||
|
|
cost_limit_keys = self.redis_client.keys("cost_limit:*")
|
||
|
|
for key in cost_limit_keys:
|
||
|
|
self.redis_client.delete(key)
|
||
|
|
|
||
|
|
logger.info(f"清理系统优化数据成功: 数量={removed_count}")
|
||
|
|
return removed_count
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"清理系统优化数据失败: {e}")
|
||
|
|
return 0
|