diff --git a/data_analysis_agent.py b/data_analysis_agent.py index 017847a..31b7460 100644 --- a/data_analysis_agent.py +++ b/data_analysis_agent.py @@ -139,7 +139,21 @@ class DataAnalysisAgent: """ try: yaml_data = self.llm.parse_yaml_response(response) - action = yaml_data.get("action", "generate_code") + action = yaml_data.get("action", "") + + # If YAML parsing returned empty/no action, try to detect action from raw text + if not action: + if "analysis_complete" in response: + action = "analysis_complete" + # Try to extract final_report from raw text + if not yaml_data.get("final_report"): + yaml_data["action"] = "analysis_complete" + yaml_data["final_report"] = "" + elif "collect_figures" in response: + action = "collect_figures" + yaml_data["action"] = "collect_figures" + else: + action = "generate_code" print(f"[TARGET] 检测到动作: {action}") @@ -155,6 +169,11 @@ class DataAnalysisAgent: except Exception as e: print(f"[WARN] 解析响应失败: {str(e)},尝试提取代码并按generate_code处理") + # Check if this is actually an analysis_complete or collect_figures response + if "analysis_complete" in response: + return self._handle_analysis_complete(response, {"final_report": ""}) + if "collect_figures" in response: + return self._handle_collect_figures(response, {"figures_to_collect": []}) # 即使YAML解析失败,也尝试提取代码 extracted_code = extract_code_from_response(response) if extracted_code: diff --git a/start_web.bat b/start_web.bat index 50a780f..ca41999 100644 --- a/start_web.bat +++ b/start_web.bat @@ -1,5 +1,20 @@ @echo off -echo Starting IOV Data Analysis Agent Web Interface... -echo Please open http://localhost:8000 in your browser. -python -m uvicorn web.main:app --reload --reload-exclude "outputs/*" --host 0.0.0.0 --port 8000 +chcp 65001 >nul +set PYTHONIOENCODING=utf-8 + +:: Get local IP address +for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /c:"IPv4"') do ( + for /f "tokens=1" %%b in ("%%a") do set LOCAL_IP=%%b +) + +echo. +echo IOV Data Analysis Agent +echo ======================== +echo. +echo Local: http://localhost:8000 +if defined LOCAL_IP ( + echo Network: http://%LOCAL_IP%:8000 +) +echo. +python -m uvicorn web.main:app --reload --reload-exclude "outputs" --reload-exclude "uploads" --reload-exclude ".hypothesis" --reload-exclude ".cache" --host 0.0.0.0 --port 8000 pause diff --git a/test.py b/test.py deleted file mode 100644 index 70d4b38..0000000 --- a/test.py +++ /dev/null @@ -1,22 +0,0 @@ -# -*- coding: utf-8 -*- -""" -快速测试 LLM 连接是否正常 -""" - -import os -from dotenv import load_dotenv -from openai import OpenAI - -load_dotenv() - -client = OpenAI( - base_url=os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:9999/v1"), - api_key=os.getenv("OPENAI_API_KEY", ""), -) - -response = client.chat.completions.create( - model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo"), - messages=[{"role": "user", "content": "Hello"}], -) - -print(response.choices[0].message.content) diff --git a/utils/llm_helper.py b/utils/llm_helper.py index d143230..ae947be 100644 --- a/utils/llm_helper.py +++ b/utils/llm_helper.py @@ -91,9 +91,10 @@ class LLMHelper: yaml_content = yaml_content.split('\n', 1)[1] # Fix Windows backslash paths that break YAML double-quoted strings. - # e.g. "D:\code\iov..." → "D:/code/iov..." inside quoted values + # Replace ALL backslashes inside double-quoted strings with forward slashes. + # This handles both "D:\code\..." and "outputs\session_..." patterns. yaml_content = re.sub( - r'"([A-Za-z]:\\[^"]*)"', + r'"([^"]*\\[^"]*)"', lambda m: '"' + m.group(1).replace('\\', '/') + '"', yaml_content, ) diff --git a/web/main.py b/web/main.py index 538a616..1e06eb3 100644 --- a/web/main.py +++ b/web/main.py @@ -375,18 +375,21 @@ async def upload_files(files: list[UploadFile] = File(...)): with open(file_location, "wb+") as file_object: file_object.write(file.file.read()) saved_files.append(file_location) + # Track the most recently uploaded files for the next analysis + app.state.last_uploaded_files = saved_files return {"info": f"Saved {len(saved_files)} files", "paths": saved_files} @app.post("/api/start") async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks): session_id = session_manager.create_session() - files = glob.glob("uploads/*.csv") + # Use only the most recently uploaded files, not everything in uploads/ + files = getattr(app.state, 'last_uploaded_files', None) if not files: - if os.path.exists("cleaned_data.csv"): - files = ["cleaned_data.csv"] - else: - raise HTTPException(status_code=400, detail="No CSV files found") + # Fallback: scan uploads directory + files = glob.glob("uploads/*.csv") + glob.glob("uploads/*.xlsx") + if not files: + raise HTTPException(status_code=400, detail="No data files found. Please upload files first.") files = [os.path.abspath(f) for f in files] # Only use absolute paths @@ -948,10 +951,36 @@ async def polish_paragraph(request: PolishRequest): if not target: raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found") - # 构建上下文窗口(前后各2个段落) + # Build the actual content to polish: include adjacent table paragraphs + # so that when user clicks on text below a table, the table gets polished too + polish_para_ids = [target["id"]] + polish_content_parts = [target["content"]] + + # Check if previous paragraph is a table — include it + if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "table": + polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"]) + polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"]) + + # Check if next paragraph is a table — include it + if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "table": + polish_para_ids.append(paragraphs[target_idx + 1]["id"]) + polish_content_parts.append(paragraphs[target_idx + 1]["content"]) + + # If the target itself is a table, include adjacent text too + if target["type"] == "table": + if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "text": + polish_para_ids.append(paragraphs[target_idx + 1]["id"]) + polish_content_parts.append(paragraphs[target_idx + 1]["content"]) + if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "text": + polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"]) + polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"]) + + combined_content = "\n\n".join(polish_content_parts) + + # 构建上下文窗口(前后各2个段落,排除已包含的) context_window = [] for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)): - if j != target_idx: + if paragraphs[j]["id"] not in polish_para_ids: context_window.append(paragraphs[j]["content"]) context_text = "\n\n".join(context_window) @@ -985,11 +1014,12 @@ async def polish_paragraph(request: PolishRequest): ## 图表信息 {figures_info} -## 需要润色的段落 -{target['content']} +## 需要润色的段落(可能包含表格和文字) +{combined_content} ## 要求 - 保持原有的 Markdown 格式(标题级别、表格结构等) +- 如果包含表格,必须同时润色表格内容(补充数据、修正数值) - 用具体数据替换模糊描述 - 增加业务洞察和趋势判断 - 禁止使用第一人称 @@ -1007,11 +1037,12 @@ async def polish_paragraph(request: PolishRequest): ## 图表信息 {figures_info} -## 需要润色的段落 -{target['content']} +## 需要润色的段落(可能包含表格和文字) +{combined_content} ## 要求 - 保持原有的 Markdown 格式 +- 如果包含表格,必须同时润色表格内容 - 严格遵循用户指令 - 禁止使用第一人称 - 直接输出润色后的 Markdown 内容,不要包裹在代码块中""" @@ -1025,11 +1056,12 @@ async def polish_paragraph(request: PolishRequest): ## 图表信息 {figures_info} -## 需要润色的段落 -{target['content']} +## 需要润色的段落(可能包含表格和文字) +{combined_content} ## 要求 - 保持原有的 Markdown 格式(标题级别、表格结构等) +- 如果包含表格,必须同时润色表格内容(补充数据、修正数值) - 提升专业性:使用同比、环比、占比等术语 - 增加洞察:不仅描述现象,还要分析原因和影响 - 禁止使用第一人称 @@ -1056,9 +1088,10 @@ async def polish_paragraph(request: PolishRequest): return { "paragraph_id": request.paragraph_id, - "original": target["content"], + "original": combined_content, "polished": polished_content, "mode": request.mode, + "affected_paragraph_ids": polish_para_ids, } except Exception as e: