YAML 反斜杠修复扩大范围 — 之前只匹配 "D:\..." 格式,现在匹配所有双引号内含反斜杠的字符串。"outputs\session_20260420..." 会被正确转成 "outputs/session_20260420...",不再导致 YAML 解析失败。这直接解决了第 10-19 轮的死循环。
_process_response 的 analysis_complete 检测已经在上一轮修好了,配合反斜杠修复,YAML 能正确解析出 action: "analysis_complete",不会再 fallback 到代码执行。
文件选择改为只用最近一次上传的文件 — app.state.last_uploaded_files 记录上传的文件列表,/api/start 优先使用它,不再 glob("uploads/*.csv") 把所有历史文件都拿来分析。
This commit is contained in:
61
web/main.py
61
web/main.py
@@ -375,18 +375,21 @@ async def upload_files(files: list[UploadFile] = File(...)):
|
||||
with open(file_location, "wb+") as file_object:
|
||||
file_object.write(file.file.read())
|
||||
saved_files.append(file_location)
|
||||
# Track the most recently uploaded files for the next analysis
|
||||
app.state.last_uploaded_files = saved_files
|
||||
return {"info": f"Saved {len(saved_files)} files", "paths": saved_files}
|
||||
|
||||
@app.post("/api/start")
|
||||
async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks):
|
||||
session_id = session_manager.create_session()
|
||||
|
||||
files = glob.glob("uploads/*.csv")
|
||||
# Use only the most recently uploaded files, not everything in uploads/
|
||||
files = getattr(app.state, 'last_uploaded_files', None)
|
||||
if not files:
|
||||
if os.path.exists("cleaned_data.csv"):
|
||||
files = ["cleaned_data.csv"]
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No CSV files found")
|
||||
# Fallback: scan uploads directory
|
||||
files = glob.glob("uploads/*.csv") + glob.glob("uploads/*.xlsx")
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No data files found. Please upload files first.")
|
||||
|
||||
files = [os.path.abspath(f) for f in files] # Only use absolute paths
|
||||
|
||||
@@ -948,10 +951,36 @@ async def polish_paragraph(request: PolishRequest):
|
||||
if not target:
|
||||
raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found")
|
||||
|
||||
# 构建上下文窗口(前后各2个段落)
|
||||
# Build the actual content to polish: include adjacent table paragraphs
|
||||
# so that when user clicks on text below a table, the table gets polished too
|
||||
polish_para_ids = [target["id"]]
|
||||
polish_content_parts = [target["content"]]
|
||||
|
||||
# Check if previous paragraph is a table — include it
|
||||
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "table":
|
||||
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
|
||||
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
|
||||
|
||||
# Check if next paragraph is a table — include it
|
||||
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "table":
|
||||
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
|
||||
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
|
||||
|
||||
# If the target itself is a table, include adjacent text too
|
||||
if target["type"] == "table":
|
||||
if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "text":
|
||||
polish_para_ids.append(paragraphs[target_idx + 1]["id"])
|
||||
polish_content_parts.append(paragraphs[target_idx + 1]["content"])
|
||||
if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "text":
|
||||
polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
|
||||
polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
|
||||
|
||||
combined_content = "\n\n".join(polish_content_parts)
|
||||
|
||||
# 构建上下文窗口(前后各2个段落,排除已包含的)
|
||||
context_window = []
|
||||
for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)):
|
||||
if j != target_idx:
|
||||
if paragraphs[j]["id"] not in polish_para_ids:
|
||||
context_window.append(paragraphs[j]["content"])
|
||||
context_text = "\n\n".join(context_window)
|
||||
|
||||
@@ -985,11 +1014,12 @@ async def polish_paragraph(request: PolishRequest):
|
||||
## 图表信息
|
||||
{figures_info}
|
||||
|
||||
## 需要润色的段落
|
||||
{target['content']}
|
||||
## 需要润色的段落(可能包含表格和文字)
|
||||
{combined_content}
|
||||
|
||||
## 要求
|
||||
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
||||
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
|
||||
- 用具体数据替换模糊描述
|
||||
- 增加业务洞察和趋势判断
|
||||
- 禁止使用第一人称
|
||||
@@ -1007,11 +1037,12 @@ async def polish_paragraph(request: PolishRequest):
|
||||
## 图表信息
|
||||
{figures_info}
|
||||
|
||||
## 需要润色的段落
|
||||
{target['content']}
|
||||
## 需要润色的段落(可能包含表格和文字)
|
||||
{combined_content}
|
||||
|
||||
## 要求
|
||||
- 保持原有的 Markdown 格式
|
||||
- 如果包含表格,必须同时润色表格内容
|
||||
- 严格遵循用户指令
|
||||
- 禁止使用第一人称
|
||||
- 直接输出润色后的 Markdown 内容,不要包裹在代码块中"""
|
||||
@@ -1025,11 +1056,12 @@ async def polish_paragraph(request: PolishRequest):
|
||||
## 图表信息
|
||||
{figures_info}
|
||||
|
||||
## 需要润色的段落
|
||||
{target['content']}
|
||||
## 需要润色的段落(可能包含表格和文字)
|
||||
{combined_content}
|
||||
|
||||
## 要求
|
||||
- 保持原有的 Markdown 格式(标题级别、表格结构等)
|
||||
- 如果包含表格,必须同时润色表格内容(补充数据、修正数值)
|
||||
- 提升专业性:使用同比、环比、占比等术语
|
||||
- 增加洞察:不仅描述现象,还要分析原因和影响
|
||||
- 禁止使用第一人称
|
||||
@@ -1056,9 +1088,10 @@ async def polish_paragraph(request: PolishRequest):
|
||||
|
||||
return {
|
||||
"paragraph_id": request.paragraph_id,
|
||||
"original": target["content"],
|
||||
"original": combined_content,
|
||||
"polished": polished_content,
|
||||
"mode": request.mode,
|
||||
"affected_paragraph_ids": polish_para_ids,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user