Merge branch 'main' of http://jeason.online:3000/zhaojie/iov_data_analysis_agent

2026-04-19 16:29:59 +08:00
parent c5083736e2 b033eb61cc
commit b256aa27d9
22 changed files with 2060 additions and 916 deletions
--- a/main.py
+++ b/main.py
@@ -1,58 +1,34 @@
-from data_analysis_agent import DataAnalysisAgent
-from config.llm_config import LLMConfig
+# -*- coding: utf-8 -*-
+"""
+CLI 入口 - 数据分析智能体
+"""

-import sys
+import glob
 import os
+import sys
 from datetime import datetime

+from data_analysis_agent import DataAnalysisAgent
+from config.llm_config import LLMConfig
 from utils.create_session_dir import create_session_output_dir
-
-class DualLogger:
-    """同时输出到终端和文件的日志记录器"""
-    def __init__(self, log_dir, filename="log.txt"):
-        self.terminal = sys.stdout
-        log_path = os.path.join(log_dir, filename)
-        self.log = open(log_path, "a", encoding="utf-8")
-        
-    def write(self, message):
-        self.terminal.write(message)
-        # 过滤掉生成的代码块，不写入日志文件
-        if "[TOOL] 执行代码:" in message:
-            return
-        self.log.write(message)
-        self.log.flush()
-        
-    def flush(self):
-        self.terminal.flush()
-        self.log.flush()
-
-def setup_logging(log_dir):
-    """配置日志记录"""
-    # 记录开始时间
-    logger = DualLogger(log_dir)
-    sys.stdout = logger
-    # 可选：也将错误输出重定向
-    # sys.stderr = logger 
-    print(f"\n{'='*20} Run Started at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {'='*20}\n")
-    print(f"[DOC] 日志文件已保存至: {os.path.join(log_dir, 'log.txt')}")
+from utils.logger import PrintCapture


 def main():
    llm_config = LLMConfig()
-    import glob
-    import os
-    # 自动查找当前目录及remotecontrol目录下的所有数据文件
-    data_extensions = ['*.csv', '*.xlsx', '*.xls']
-    search_dirs = ['cleaned_data']
+
+    # 自动查找数据文件
+    data_extensions = ["*.csv", "*.xlsx", "*.xls"]
+    search_dirs = ["cleaned_data"]
    files = []
-    
+
    for search_dir in search_dirs:
        for ext in data_extensions:
            pattern = os.path.join(search_dir, ext)
            files.extend(glob.glob(pattern))
-    
+
    if not files:
-        print("[WARN] 未在当前目录找到数据文件 (.csv, .xlsx)，尝试使用默认文件")
+        print("[WARN] 未在 cleaned_data 目录找到数据文件，尝试使用默认文件")
        files = ["./cleaned_data.csv"]
    else:
        print(f"[DIR] 自动识别到以下数据文件: {files}")
@@ -63,46 +39,43 @@ def main():
 通过多轮交叉分析与趋势洞察，为提升车联网服务质量、优化资源配置及降低运营风险提供数据驱动的决策依据，问题总揽，高频问题、重点问题分析，输出若干个重要的统计指标，并绘制相关图表；
 结合图表，总结一份，车联网运维工单健康度报告，汇报给我。
    """
-    
-    # 在主函数中先创建会话目录，以便存放日志
-    # 默认输出目录为 'outputs'
+
+    # 创建会话目录
    base_output_dir = "outputs"
    session_output_dir = create_session_output_dir(base_output_dir, analysis_requirement)
-    
-    # 设置日志
-    setup_logging(session_output_dir)

-    # 如果希望强制运行到最大轮数，设置 force_max_rounds=True
-    agent = DataAnalysisAgent(llm_config, force_max_rounds=False)
+    # 使用 PrintCapture 替代全局 stdout 劫持
+    log_path = os.path.join(session_output_dir, "log.txt")
+
+    with PrintCapture(log_path):
+        print(f"\n{'='*20} Run Started at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {'='*20}\n")
+        print(f"[DOC] 日志文件已保存至: {log_path}")
+
+        agent = DataAnalysisAgent(llm_config, force_max_rounds=False)
+
+        # 交互式分析循环
+        while True:
+            is_first_run = agent.current_round == 0 and not agent.conversation_history
+
+            report = agent.analyze(
+                user_input=analysis_requirement,
+                files=files if is_first_run else None,
+                session_output_dir=session_output_dir,
+                reset_session=is_first_run,
+                max_rounds=None if is_first_run else 10,
+            )
+            print("\n" + "=" * 30 + " 当前阶段分析完成 " + "=" * 30)
+
+            print("\n[TIP] 你可以继续对数据提出分析需求，或者输入 'exit'/'quit' 结束程序。")
+            user_response = input("[>] 请输入后续分析需求 (直接回车退出): ").strip()
+
+            if not user_response or user_response.lower() in ["exit", "quit", "n", "no"]:
+                print("[BYE] 分析结束，再见！")
+                break
+
+            analysis_requirement = user_response
+            print(f"\n[LOOP] 收到新需求，正在继续分析...")

-    # --- 交互式分析循环 ---
-    while True:
-        # 执行分析
-        # 首次运行时 reset_session=True (默认)
-        # 后续运行时 reset_session=False
-        is_first_run = (agent.current_round == 0 and not agent.conversation_history)
-        
-        report = agent.analyze(
-            user_input=analysis_requirement,
-            files=files if is_first_run else None, # 后续轮次不需要重复传文件路径，agent已有上下文
-            session_output_dir=session_output_dir,
-            reset_session=is_first_run,
-            max_rounds=None if is_first_run else 10 # 追问时限制为10轮
-        )
-        print("\n" + "="*30 + " 当前阶段分析完成 " + "="*30)
-        
-        # 询问用户是否继续
-        print("\n[TIP] 你可以继续对数据提出分析需求，或者输入 'exit'/'quit' 结束程序。")
-        user_response = input("[>] 请输入后续分析需求 (直接回车退出): ").strip()
-        
-        if not user_response or user_response.lower() in ['exit', 'quit', 'n', 'no']:
-            print("[BYE] 分析结束，再见！")
-            break
-        
-        # 更新需求，进入下一轮循环
-        analysis_requirement = user_response
-        print(f"\n[LOOP] 收到新需求，正在继续分析...")
- 

 if __name__ == "__main__":
    main()