YAML 反斜杠修复扩大范围 — 之前只匹配 "D:\..." 格式,现在匹配所有双引号内含反斜杠的字符串。"outputs\session_20260420..." 会被正确转成 "outputs/session_20260420...",不再导致 YAML 解析失败。这直接解决了第 10-19 轮的死循环。
_process_response 的 analysis_complete 检测已经在上一轮修好了,配合反斜杠修复,YAML 能正确解析出 action: "analysis_complete",不会再 fallback 到代码执行。
文件选择改为只用最近一次上传的文件 — app.state.last_uploaded_files 记录上传的文件列表,/api/start 优先使用它,不再 glob("uploads/*.csv") 把所有历史文件都拿来分析。
This commit is contained in:
@@ -139,7 +139,21 @@ class DataAnalysisAgent:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
yaml_data = self.llm.parse_yaml_response(response)
|
yaml_data = self.llm.parse_yaml_response(response)
|
||||||
action = yaml_data.get("action", "generate_code")
|
action = yaml_data.get("action", "")
|
||||||
|
|
||||||
|
# If YAML parsing returned empty/no action, try to detect action from raw text
|
||||||
|
if not action:
|
||||||
|
if "analysis_complete" in response:
|
||||||
|
action = "analysis_complete"
|
||||||
|
# Try to extract final_report from raw text
|
||||||
|
if not yaml_data.get("final_report"):
|
||||||
|
yaml_data["action"] = "analysis_complete"
|
||||||
|
yaml_data["final_report"] = ""
|
||||||
|
elif "collect_figures" in response:
|
||||||
|
action = "collect_figures"
|
||||||
|
yaml_data["action"] = "collect_figures"
|
||||||
|
else:
|
||||||
|
action = "generate_code"
|
||||||
|
|
||||||
print(f"[TARGET] 检测到动作: {action}")
|
print(f"[TARGET] 检测到动作: {action}")
|
||||||
|
|
||||||
@@ -155,6 +169,11 @@ class DataAnalysisAgent:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 解析响应失败: {str(e)},尝试提取代码并按generate_code处理")
|
print(f"[WARN] 解析响应失败: {str(e)},尝试提取代码并按generate_code处理")
|
||||||
|
# Check if this is actually an analysis_complete or collect_figures response
|
||||||
|
if "analysis_complete" in response:
|
||||||
|
return self._handle_analysis_complete(response, {"final_report": ""})
|
||||||
|
if "collect_figures" in response:
|
||||||
|
return self._handle_collect_figures(response, {"figures_to_collect": []})
|
||||||
# 即使YAML解析失败,也尝试提取代码
|
# 即使YAML解析失败,也尝试提取代码
|
||||||
extracted_code = extract_code_from_response(response)
|
extracted_code = extract_code_from_response(response)
|
||||||
if extracted_code:
|
if extracted_code:
|
||||||
|
|||||||
@@ -1,5 +1,20 @@
|
|||||||
@echo off
|
@echo off
|
||||||
echo Starting IOV Data Analysis Agent Web Interface...
|
chcp 65001 >nul
|
||||||
echo Please open http://localhost:8000 in your browser.
|
set PYTHONIOENCODING=utf-8
|
||||||
python -m uvicorn web.main:app --reload --reload-exclude "outputs/*" --host 0.0.0.0 --port 8000
|
|
||||||
|
:: Get local IP address
|
||||||
|
for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /c:"IPv4"') do (
|
||||||
|
for /f "tokens=1" %%b in ("%%a") do set LOCAL_IP=%%b
|
||||||
|
)
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo IOV Data Analysis Agent
|
||||||
|
echo ========================
|
||||||
|
echo.
|
||||||
|
echo Local: http://localhost:8000
|
||||||
|
if defined LOCAL_IP (
|
||||||
|
echo Network: http://%LOCAL_IP%:8000
|
||||||
|
)
|
||||||
|
echo.
|
||||||
|
python -m uvicorn web.main:app --reload --reload-exclude "outputs" --reload-exclude "uploads" --reload-exclude ".hypothesis" --reload-exclude ".cache" --host 0.0.0.0 --port 8000
|
||||||
pause
|
pause
|
||||||
|
|||||||
22
test.py
22
test.py
@@ -1,22 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
快速测试 LLM 连接是否正常
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
base_url=os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:9999/v1"),
|
|
||||||
api_key=os.getenv("OPENAI_API_KEY", ""),
|
|
||||||
)
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo"),
|
|
||||||
messages=[{"role": "user", "content": "Hello"}],
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response.choices[0].message.content)
|
|
||||||
@@ -91,9 +91,10 @@ class LLMHelper:
|
|||||||
yaml_content = yaml_content.split('\n', 1)[1]
|
yaml_content = yaml_content.split('\n', 1)[1]
|
||||||
|
|
||||||
# Fix Windows backslash paths that break YAML double-quoted strings.
|
# Fix Windows backslash paths that break YAML double-quoted strings.
|
||||||
# e.g. "D:\code\iov..." → "D:/code/iov..." inside quoted values
|
# Replace ALL backslashes inside double-quoted strings with forward slashes.
|
||||||
|
# This handles both "D:\code\..." and "outputs\session_..." patterns.
|
||||||
yaml_content = re.sub(
|
yaml_content = re.sub(
|
||||||
r'"([A-Za-z]:\\[^"]*)"',
|
r'"([^"]*\\[^"]*)"',
|
||||||
lambda m: '"' + m.group(1).replace('\\', '/') + '"',
|
lambda m: '"' + m.group(1).replace('\\', '/') + '"',
|
||||||
yaml_content,
|
yaml_content,
|
||||||
)
|
)
|
||||||
|
|||||||
61
web/main.py
61
web/main.py
@@ -375,18 +375,21 @@ async def upload_files(files: list[UploadFile] = File(...)):
|
|||||||
with open(file_location, "wb+") as file_object:
|
with open(file_location, "wb+") as file_object:
|
||||||
file_object.write(file.file.read())
|
file_object.write(file.file.read())
|
||||||
saved_files.append(file_location)
|
saved_files.append(file_location)
|
||||||
|
# Track the most recently uploaded files for the next analysis
|
||||||
|
app.state.last_uploaded_files = saved_files
|
||||||
return {"info": f"Saved {len(saved_files)} files", "paths": saved_files}
|
return {"info": f"Saved {len(saved_files)} files", "paths": saved_files}
|
||||||
|
|
||||||
@app.post("/api/start")
|
@app.post("/api/start")
|
||||||
async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks):
|
async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks):
|
||||||
session_id = session_manager.create_session()
|
session_id = session_manager.create_session()
|
||||||
|
|
||||||
files = glob.glob("uploads/*.csv")
|
# Use only the most recently uploaded files, not everything in uploads/
|
||||||
|
files = getattr(app.state, 'last_uploaded_files', None)
|
||||||
if not files:
|
if not files:
|
||||||
if os.path.exists("cleaned_data.csv"):
|
# Fallback: scan uploads directory
|
||||||
files = ["cleaned_data.csv"]
|
files = glob.glob("uploads/*.csv") + glob.glob("uploads/*.xlsx")
|
||||||
else:
|
if not files:
|
||||||
raise HTTPException(status_code=400, detail="No CSV files found")
|
raise HTTPException(status_code=400, detail="No data files found. Please upload files first.")
|
||||||
|
|
||||||
files = [os.path.abspath(f) for f in files] # Only use absolute paths
|
files = [os.path.abspath(f) for f in files] # Only use absolute paths
|
||||||
|
|
||||||
@@ -948,10 +951,36 @@ async def polish_paragraph(request: PolishRequest):
|
|||||||
if not target:
|
if not target:
|
||||||
raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found")
|
raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found")
|
||||||
|
|
||||||
# 构建上下文窗口(前后各2个段落)
|
# Build the actual content to polish: include adjacent table paragraphs
|
||||||
|
# so that when user clicks on text below a table, the table gets polished too
|
||||||
|
polish_para_ids = [target["id"]]
|
||||||
|
polish_content_parts = [target["content"]]
|
||||||
|
|
||||||
|
# Check if previous paragraph is a table — include it
|
||||||
|
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "table":
|
||||||
|
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
|
||||||
|
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
|
||||||
|
|
||||||
|
# Check if next paragraph is a table — include it
|
||||||
|
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "table":
|
||||||
|
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
|
||||||
|
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
|
||||||
|
|
||||||
|
# If the target itself is a table, include adjacent text too
|
||||||
|
if target["type"] == "table":
|
||||||
|
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "text":
|
||||||
|
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
|
||||||
|
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
|
||||||
|
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "text":
|
||||||
|
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
|
||||||
|
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
|
||||||
|
|
||||||
|
combined_content = "\n\n".join(polish_content_parts)
|
||||||
|
|
||||||
|
# 构建上下文窗口(前后各2个段落,排除已包含的)
|
||||||
context_window = []
|
context_window = []
|
||||||
for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)):
|
for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)):
|
||||||
if j != target_idx:
|
if paragraphs[j]["id"] not in polish_para_ids:
|
||||||
context_window.append(paragraphs[j]["content"])
|
context_window.append(paragraphs[j]["content"])
|
||||||
context_text = "\n\n".join(context_window)
|
context_text = "\n\n".join(context_window)
|
||||||
|
|
||||||
@@ -985,11 +1014,12 @@ async def polish_paragraph(request: PolishRequest):
|
|||||||
## 图表信息
|
## 图表信息
|
||||||
{figures_info}
|
{figures_info}
|
||||||
|
|
||||||
## 需要润色的段落
|
## 需要润色的段落(可能包含表格和文字)
|
||||||
{target['content']}
|
{combined_content}
|
||||||
|
|
||||||
## 要求
|
## 要求
|
||||||
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
||||||
|
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
|
||||||
- 用具体数据替换模糊描述
|
- 用具体数据替换模糊描述
|
||||||
- 增加业务洞察和趋势判断
|
- 增加业务洞察和趋势判断
|
||||||
- 禁止使用第一人称
|
- 禁止使用第一人称
|
||||||
@@ -1007,11 +1037,12 @@ async def polish_paragraph(request: PolishRequest):
|
|||||||
## 图表信息
|
## 图表信息
|
||||||
{figures_info}
|
{figures_info}
|
||||||
|
|
||||||
## 需要润色的段落
|
## 需要润色的段落(可能包含表格和文字)
|
||||||
{target['content']}
|
{combined_content}
|
||||||
|
|
||||||
## 要求
|
## 要求
|
||||||
- 保持原有的 Markdown 格式
|
- 保持原有的 Markdown 格式
|
||||||
|
- 如果包含表格,必须同时润色表格内容
|
||||||
- 严格遵循用户指令
|
- 严格遵循用户指令
|
||||||
- 禁止使用第一人称
|
- 禁止使用第一人称
|
||||||
- 直接输出润色后的 Markdown 内容,不要包裹在代码块中"""
|
- 直接输出润色后的 Markdown 内容,不要包裹在代码块中"""
|
||||||
@@ -1025,11 +1056,12 @@ async def polish_paragraph(request: PolishRequest):
|
|||||||
## 图表信息
|
## 图表信息
|
||||||
{figures_info}
|
{figures_info}
|
||||||
|
|
||||||
## 需要润色的段落
|
## 需要润色的段落(可能包含表格和文字)
|
||||||
{target['content']}
|
{combined_content}
|
||||||
|
|
||||||
## 要求
|
## 要求
|
||||||
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
||||||
|
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
|
||||||
- 提升专业性:使用同比、环比、占比等术语
|
- 提升专业性:使用同比、环比、占比等术语
|
||||||
- 增加洞察:不仅描述现象,还要分析原因和影响
|
- 增加洞察:不仅描述现象,还要分析原因和影响
|
||||||
- 禁止使用第一人称
|
- 禁止使用第一人称
|
||||||
@@ -1056,9 +1088,10 @@ async def polish_paragraph(request: PolishRequest):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"paragraph_id": request.paragraph_id,
|
"paragraph_id": request.paragraph_id,
|
||||||
"original": target["content"],
|
"original": combined_content,
|
||||||
"polished": polished_content,
|
"polished": polished_content,
|
||||||
"mode": request.mode,
|
"mode": request.mode,
|
||||||
|
"affected_paragraph_ids": polish_para_ids,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user