YAML 反斜杠修复扩大范围 — 之前只匹配 "D:\..." 格式,现在匹配所有双引号内含反斜杠的字符串。"outputs\session_20260420..." 会被正确转成 "outputs/session_20260420...",不再导致 YAML 解析失败。这直接解决了第 10-19 轮的死循环。
_process_response 的 analysis_complete 检测已经在上一轮修好了,配合反斜杠修复,YAML 能正确解析出 action: "analysis_complete",不会再 fallback 到代码执行。
文件选择改为只用最近一次上传的文件 — app.state.last_uploaded_files 记录上传的文件列表,/api/start 优先使用它,不再 glob("uploads/*.csv") 把所有历史文件都拿来分析。
This commit is contained in:
@@ -139,7 +139,21 @@ class DataAnalysisAgent:
|
||||
"""
|
||||
try:
|
||||
yaml_data = self.llm.parse_yaml_response(response)
|
||||
action = yaml_data.get("action", "generate_code")
|
||||
action = yaml_data.get("action", "")
|
||||
|
||||
# If YAML parsing returned empty/no action, try to detect action from raw text
|
||||
if not action:
|
||||
if "analysis_complete" in response:
|
||||
action = "analysis_complete"
|
||||
# Try to extract final_report from raw text
|
||||
if not yaml_data.get("final_report"):
|
||||
yaml_data["action"] = "analysis_complete"
|
||||
yaml_data["final_report"] = ""
|
||||
elif "collect_figures" in response:
|
||||
action = "collect_figures"
|
||||
yaml_data["action"] = "collect_figures"
|
||||
else:
|
||||
action = "generate_code"
|
||||
|
||||
print(f"[TARGET] 检测到动作: {action}")
|
||||
|
||||
@@ -155,6 +169,11 @@ class DataAnalysisAgent:
|
||||
|
||||
except Exception as e:
|
||||
print(f"[WARN] 解析响应失败: {str(e)},尝试提取代码并按generate_code处理")
|
||||
# Check if this is actually an analysis_complete or collect_figures response
|
||||
if "analysis_complete" in response:
|
||||
return self._handle_analysis_complete(response, {"final_report": ""})
|
||||
if "collect_figures" in response:
|
||||
return self._handle_collect_figures(response, {"figures_to_collect": []})
|
||||
# 即使YAML解析失败,也尝试提取代码
|
||||
extracted_code = extract_code_from_response(response)
|
||||
if extracted_code:
|
||||
|
||||
@@ -1,5 +1,20 @@
|
||||
@echo off
|
||||
echo Starting IOV Data Analysis Agent Web Interface...
|
||||
echo Please open http://localhost:8000 in your browser.
|
||||
python -m uvicorn web.main:app --reload --reload-exclude "outputs/*" --host 0.0.0.0 --port 8000
|
||||
chcp 65001 >nul
|
||||
set PYTHONIOENCODING=utf-8
|
||||
|
||||
:: Get local IP address
|
||||
for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /c:"IPv4"') do (
|
||||
for /f "tokens=1" %%b in ("%%a") do set LOCAL_IP=%%b
|
||||
)
|
||||
|
||||
echo.
|
||||
echo IOV Data Analysis Agent
|
||||
echo ========================
|
||||
echo.
|
||||
echo Local: http://localhost:8000
|
||||
if defined LOCAL_IP (
|
||||
echo Network: http://%LOCAL_IP%:8000
|
||||
)
|
||||
echo.
|
||||
python -m uvicorn web.main:app --reload --reload-exclude "outputs" --reload-exclude "uploads" --reload-exclude ".hypothesis" --reload-exclude ".cache" --host 0.0.0.0 --port 8000
|
||||
pause
|
||||
|
||||
22
test.py
22
test.py
@@ -1,22 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
快速测试 LLM 连接是否正常
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from openai import OpenAI
|
||||
|
||||
load_dotenv()
|
||||
|
||||
client = OpenAI(
|
||||
base_url=os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:9999/v1"),
|
||||
api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo"),
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
)
|
||||
|
||||
print(response.choices[0].message.content)
|
||||
@@ -91,9 +91,10 @@ class LLMHelper:
|
||||
yaml_content = yaml_content.split('\n', 1)[1]
|
||||
|
||||
# Fix Windows backslash paths that break YAML double-quoted strings.
|
||||
# e.g. "D:\code\iov..." → "D:/code/iov..." inside quoted values
|
||||
# Replace ALL backslashes inside double-quoted strings with forward slashes.
|
||||
# This handles both "D:\code\..." and "outputs\session_..." patterns.
|
||||
yaml_content = re.sub(
|
||||
r'"([A-Za-z]:\\[^"]*)"',
|
||||
r'"([^"]*\\[^"]*)"',
|
||||
lambda m: '"' + m.group(1).replace('\\', '/') + '"',
|
||||
yaml_content,
|
||||
)
|
||||
|
||||
61
web/main.py
61
web/main.py
@@ -375,18 +375,21 @@ async def upload_files(files: list[UploadFile] = File(...)):
|
||||
with open(file_location, "wb+") as file_object:
|
||||
file_object.write(file.file.read())
|
||||
saved_files.append(file_location)
|
||||
# Track the most recently uploaded files for the next analysis
|
||||
app.state.last_uploaded_files = saved_files
|
||||
return {"info": f"Saved {len(saved_files)} files", "paths": saved_files}
|
||||
|
||||
@app.post("/api/start")
|
||||
async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks):
|
||||
session_id = session_manager.create_session()
|
||||
|
||||
files = glob.glob("uploads/*.csv")
|
||||
# Use only the most recently uploaded files, not everything in uploads/
|
||||
files = getattr(app.state, 'last_uploaded_files', None)
|
||||
if not files:
|
||||
if os.path.exists("cleaned_data.csv"):
|
||||
files = ["cleaned_data.csv"]
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No CSV files found")
|
||||
# Fallback: scan uploads directory
|
||||
files = glob.glob("uploads/*.csv") + glob.glob("uploads/*.xlsx")
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No data files found. Please upload files first.")
|
||||
|
||||
files = [os.path.abspath(f) for f in files] # Only use absolute paths
|
||||
|
||||
@@ -948,10 +951,36 @@ async def polish_paragraph(request: PolishRequest):
|
||||
if not target:
|
||||
raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found")
|
||||
|
||||
# 构建上下文窗口(前后各2个段落)
|
||||
# Build the actual content to polish: include adjacent table paragraphs
|
||||
# so that when user clicks on text below a table, the table gets polished too
|
||||
polish_para_ids = [target["id"]]
|
||||
polish_content_parts = [target["content"]]
|
||||
|
||||
# Check if previous paragraph is a table — include it
|
||||
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "table":
|
||||
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
|
||||
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
|
||||
|
||||
# Check if next paragraph is a table — include it
|
||||
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "table":
|
||||
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
|
||||
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
|
||||
|
||||
# If the target itself is a table, include adjacent text too
|
||||
if target["type"] == "table":
|
||||
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "text":
|
||||
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
|
||||
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
|
||||
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "text":
|
||||
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
|
||||
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
|
||||
|
||||
combined_content = "\n\n".join(polish_content_parts)
|
||||
|
||||
# 构建上下文窗口(前后各2个段落,排除已包含的)
|
||||
context_window = []
|
||||
for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)):
|
||||
if j != target_idx:
|
||||
if paragraphs[j]["id"] not in polish_para_ids:
|
||||
context_window.append(paragraphs[j]["content"])
|
||||
context_text = "\n\n".join(context_window)
|
||||
|
||||
@@ -985,11 +1014,12 @@ async def polish_paragraph(request: PolishRequest):
|
||||
## 图表信息
|
||||
{figures_info}
|
||||
|
||||
## 需要润色的段落
|
||||
{target['content']}
|
||||
## 需要润色的段落(可能包含表格和文字)
|
||||
{combined_content}
|
||||
|
||||
## 要求
|
||||
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
||||
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
|
||||
- 用具体数据替换模糊描述
|
||||
- 增加业务洞察和趋势判断
|
||||
- 禁止使用第一人称
|
||||
@@ -1007,11 +1037,12 @@ async def polish_paragraph(request: PolishRequest):
|
||||
## 图表信息
|
||||
{figures_info}
|
||||
|
||||
## 需要润色的段落
|
||||
{target['content']}
|
||||
## 需要润色的段落(可能包含表格和文字)
|
||||
{combined_content}
|
||||
|
||||
## 要求
|
||||
- 保持原有的 Markdown 格式
|
||||
- 如果包含表格,必须同时润色表格内容
|
||||
- 严格遵循用户指令
|
||||
- 禁止使用第一人称
|
||||
- 直接输出润色后的 Markdown 内容,不要包裹在代码块中"""
|
||||
@@ -1025,11 +1056,12 @@ async def polish_paragraph(request: PolishRequest):
|
||||
## 图表信息
|
||||
{figures_info}
|
||||
|
||||
## 需要润色的段落
|
||||
{target['content']}
|
||||
## 需要润色的段落(可能包含表格和文字)
|
||||
{combined_content}
|
||||
|
||||
## 要求
|
||||
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
||||
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
|
||||
- 提升专业性:使用同比、环比、占比等术语
|
||||
- 增加洞察:不仅描述现象,还要分析原因和影响
|
||||
- 禁止使用第一人称
|
||||
@@ -1056,9 +1088,10 @@ async def polish_paragraph(request: PolishRequest):
|
||||
|
||||
return {
|
||||
"paragraph_id": request.paragraph_id,
|
||||
"original": target["content"],
|
||||
"original": combined_content,
|
||||
"polished": polished_content,
|
||||
"mode": request.mode,
|
||||
"affected_paragraph_ids": polish_para_ids,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user