YAML 反斜杠修复扩大范围 — 之前只匹配 "D:\..." 格式,现在匹配所有双引号内含反斜杠的字符串。"outputs\session_20260420..." 会被正确转成 "outputs/session_20260420...",不再导致 YAML 解析失败。这直接解决了第 10-19 轮的死循环。

_process_response 的 analysis_complete 检测已经在上一轮修好了,配合反斜杠修复,YAML 能正确解析出 action: "analysis_complete",不会再 fallback 到代码执行。

文件选择改为只用最近一次上传的文件 — app.state.last_uploaded_files 记录上传的文件列表,/api/start 优先使用它,不再 glob("uploads/*.csv") 把所有历史文件都拿来分析。
This commit is contained in:
2026-04-20 13:09:54 +08:00
parent 7303008f48
commit c7224153b1
5 changed files with 88 additions and 42 deletions

View File

@@ -139,7 +139,21 @@ class DataAnalysisAgent:
""" """
try: try:
yaml_data = self.llm.parse_yaml_response(response) yaml_data = self.llm.parse_yaml_response(response)
action = yaml_data.get("action", "generate_code") action = yaml_data.get("action", "")
# If YAML parsing returned empty/no action, try to detect action from raw text
if not action:
if "analysis_complete" in response:
action = "analysis_complete"
# Try to extract final_report from raw text
if not yaml_data.get("final_report"):
yaml_data["action"] = "analysis_complete"
yaml_data["final_report"] = ""
elif "collect_figures" in response:
action = "collect_figures"
yaml_data["action"] = "collect_figures"
else:
action = "generate_code"
print(f"[TARGET] 检测到动作: {action}") print(f"[TARGET] 检测到动作: {action}")
@@ -155,6 +169,11 @@ class DataAnalysisAgent:
except Exception as e: except Exception as e:
print(f"[WARN] 解析响应失败: {str(e)}尝试提取代码并按generate_code处理") print(f"[WARN] 解析响应失败: {str(e)}尝试提取代码并按generate_code处理")
# Check if this is actually an analysis_complete or collect_figures response
if "analysis_complete" in response:
return self._handle_analysis_complete(response, {"final_report": ""})
if "collect_figures" in response:
return self._handle_collect_figures(response, {"figures_to_collect": []})
# 即使YAML解析失败也尝试提取代码 # 即使YAML解析失败也尝试提取代码
extracted_code = extract_code_from_response(response) extracted_code = extract_code_from_response(response)
if extracted_code: if extracted_code:

View File

@@ -1,5 +1,20 @@
@echo off @echo off
echo Starting IOV Data Analysis Agent Web Interface... chcp 65001 >nul
echo Please open http://localhost:8000 in your browser. set PYTHONIOENCODING=utf-8
python -m uvicorn web.main:app --reload --reload-exclude "outputs/*" --host 0.0.0.0 --port 8000
:: Get local IP address
for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /c:"IPv4"') do (
for /f "tokens=1" %%b in ("%%a") do set LOCAL_IP=%%b
)
echo.
echo IOV Data Analysis Agent
echo ========================
echo.
echo Local: http://localhost:8000
if defined LOCAL_IP (
echo Network: http://%LOCAL_IP%:8000
)
echo.
python -m uvicorn web.main:app --reload --reload-exclude "outputs" --reload-exclude "uploads" --reload-exclude ".hypothesis" --reload-exclude ".cache" --host 0.0.0.0 --port 8000
pause pause

22
test.py
View File

@@ -1,22 +0,0 @@
# -*- coding: utf-8 -*-
"""
快速测试 LLM 连接是否正常
"""
import os
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()
client = OpenAI(
base_url=os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:9999/v1"),
api_key=os.getenv("OPENAI_API_KEY", ""),
)
response = client.chat.completions.create(
model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo"),
messages=[{"role": "user", "content": "Hello"}],
)
print(response.choices[0].message.content)

View File

@@ -91,9 +91,10 @@ class LLMHelper:
yaml_content = yaml_content.split('\n', 1)[1] yaml_content = yaml_content.split('\n', 1)[1]
# Fix Windows backslash paths that break YAML double-quoted strings. # Fix Windows backslash paths that break YAML double-quoted strings.
# e.g. "D:\code\iov..." → "D:/code/iov..." inside quoted values # Replace ALL backslashes inside double-quoted strings with forward slashes.
# This handles both "D:\code\..." and "outputs\session_..." patterns.
yaml_content = re.sub( yaml_content = re.sub(
r'"([A-Za-z]:\\[^"]*)"', r'"([^"]*\\[^"]*)"',
lambda m: '"' + m.group(1).replace('\\', '/') + '"', lambda m: '"' + m.group(1).replace('\\', '/') + '"',
yaml_content, yaml_content,
) )

View File

@@ -375,18 +375,21 @@ async def upload_files(files: list[UploadFile] = File(...)):
with open(file_location, "wb+") as file_object: with open(file_location, "wb+") as file_object:
file_object.write(file.file.read()) file_object.write(file.file.read())
saved_files.append(file_location) saved_files.append(file_location)
# Track the most recently uploaded files for the next analysis
app.state.last_uploaded_files = saved_files
return {"info": f"Saved {len(saved_files)} files", "paths": saved_files} return {"info": f"Saved {len(saved_files)} files", "paths": saved_files}
@app.post("/api/start") @app.post("/api/start")
async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks): async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks):
session_id = session_manager.create_session() session_id = session_manager.create_session()
files = glob.glob("uploads/*.csv") # Use only the most recently uploaded files, not everything in uploads/
files = getattr(app.state, 'last_uploaded_files', None)
if not files: if not files:
if os.path.exists("cleaned_data.csv"): # Fallback: scan uploads directory
files = ["cleaned_data.csv"] files = glob.glob("uploads/*.csv") + glob.glob("uploads/*.xlsx")
else: if not files:
raise HTTPException(status_code=400, detail="No CSV files found") raise HTTPException(status_code=400, detail="No data files found. Please upload files first.")
files = [os.path.abspath(f) for f in files] # Only use absolute paths files = [os.path.abspath(f) for f in files] # Only use absolute paths
@@ -948,10 +951,36 @@ async def polish_paragraph(request: PolishRequest):
if not target: if not target:
raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found") raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found")
# 构建上下文窗口前后各2个段落 # Build the actual content to polish: include adjacent table paragraphs
# so that when user clicks on text below a table, the table gets polished too
polish_para_ids = [target["id"]]
polish_content_parts = [target["content"]]
# Check if previous paragraph is a table — include it
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "table":
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
# Check if next paragraph is a table — include it
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "table":
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
# If the target itself is a table, include adjacent text too
if target["type"] == "table":
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "text":
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "text":
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
combined_content = "\n\n".join(polish_content_parts)
# 构建上下文窗口前后各2个段落排除已包含的
context_window = [] context_window = []
for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)): for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)):
if j != target_idx: if paragraphs[j]["id"] not in polish_para_ids:
context_window.append(paragraphs[j]["content"]) context_window.append(paragraphs[j]["content"])
context_text = "\n\n".join(context_window) context_text = "\n\n".join(context_window)
@@ -985,11 +1014,12 @@ async def polish_paragraph(request: PolishRequest):
## 图表信息 ## 图表信息
{figures_info} {figures_info}
## 需要润色的段落 ## 需要润色的段落(可能包含表格和文字)
{target['content']} {combined_content}
## 要求 ## 要求
- 保持原有的 Markdown 格式(标题级别、表格结构等) - 保持原有的 Markdown 格式(标题级别、表格结构等)
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
- 用具体数据替换模糊描述 - 用具体数据替换模糊描述
- 增加业务洞察和趋势判断 - 增加业务洞察和趋势判断
- 禁止使用第一人称 - 禁止使用第一人称
@@ -1007,11 +1037,12 @@ async def polish_paragraph(request: PolishRequest):
## 图表信息 ## 图表信息
{figures_info} {figures_info}
## 需要润色的段落 ## 需要润色的段落(可能包含表格和文字)
{target['content']} {combined_content}
## 要求 ## 要求
- 保持原有的 Markdown 格式 - 保持原有的 Markdown 格式
- 如果包含表格,必须同时润色表格内容
- 严格遵循用户指令 - 严格遵循用户指令
- 禁止使用第一人称 - 禁止使用第一人称
- 直接输出润色后的 Markdown 内容,不要包裹在代码块中""" - 直接输出润色后的 Markdown 内容,不要包裹在代码块中"""
@@ -1025,11 +1056,12 @@ async def polish_paragraph(request: PolishRequest):
## 图表信息 ## 图表信息
{figures_info} {figures_info}
## 需要润色的段落 ## 需要润色的段落(可能包含表格和文字)
{target['content']} {combined_content}
## 要求 ## 要求
- 保持原有的 Markdown 格式(标题级别、表格结构等) - 保持原有的 Markdown 格式(标题级别、表格结构等)
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
- 提升专业性:使用同比、环比、占比等术语 - 提升专业性:使用同比、环比、占比等术语
- 增加洞察:不仅描述现象,还要分析原因和影响 - 增加洞察:不仅描述现象,还要分析原因和影响
- 禁止使用第一人称 - 禁止使用第一人称
@@ -1056,9 +1088,10 @@ async def polish_paragraph(request: PolishRequest):
return { return {
"paragraph_id": request.paragraph_id, "paragraph_id": request.paragraph_id,
"original": target["content"], "original": combined_content,
"polished": polished_content, "polished": polished_content,
"mode": request.mode, "mode": request.mode,
"affected_paragraph_ids": polish_para_ids,
} }
except Exception as e: except Exception as e: