YAML 反斜杠修复扩大范围 — 之前只匹配 "D:\..." 格式，现在匹配所有双引号内含反斜杠的字符串。"outputs\session_20260420..." 会被正确转成 "outputs/session_20260420..."，不再导致 YAML 解析失败。这直接解决了第 10-19 轮的死循环。

_process_response 的 analysis_complete 检测已经在上一轮修好了，配合反斜杠修复，YAML 能正确解析出 action: "analysis_complete"，不会再 fallback 到代码执行。文件选择改为只用最近一次上传的文件 — app.state.last_uploaded_files 记录上传的文件列表，/api/start 优先使用它，不再 glob("uploads/*.csv") 把所有历史文件都拿来分析。
2026-04-20 13:09:54 +08:00
parent 7303008f48
commit c7224153b1
5 changed files with 88 additions and 42 deletions
--- a/web/main.py
+++ b/web/main.py
@@ -375,18 +375,21 @@ async def upload_files(files: list[UploadFile] = File(...)):
        with open(file_location, "wb+") as file_object:
            file_object.write(file.file.read())
        saved_files.append(file_location)
+    # Track the most recently uploaded files for the next analysis
+    app.state.last_uploaded_files = saved_files
    return {"info": f"Saved {len(saved_files)} files", "paths": saved_files}

@app.post("/api/start")
 async def start_analysis(request: StartRequest, background_tasks: BackgroundTasks):
    session_id = session_manager.create_session()
    
-    files = glob.glob("uploads/*.csv")
+    # Use only the most recently uploaded files, not everything in uploads/
+    files = getattr(app.state, 'last_uploaded_files', None)
    if not files:
-        if os.path.exists("cleaned_data.csv"):
-            files = ["cleaned_data.csv"]
-        else:
-            raise HTTPException(status_code=400, detail="No CSV files found")
+        # Fallback: scan uploads directory
+        files = glob.glob("uploads/*.csv") + glob.glob("uploads/*.xlsx")
+    if not files:
+        raise HTTPException(status_code=400, detail="No data files found. Please upload files first.")
            
    files = [os.path.abspath(f) for f in files] # Only use absolute paths
    
@@ -948,10 +951,36 @@ async def polish_paragraph(request: PolishRequest):
    if not target:
        raise HTTPException(status_code=404, detail=f"Paragraph {request.paragraph_id} not found")

-    # 构建上下文窗口（前后各2个段落）
+    # Build the actual content to polish: include adjacent table paragraphs
+    # so that when user clicks on text below a table, the table gets polished too
+    polish_para_ids = [target["id"]]
+    polish_content_parts = [target["content"]]
+
+    # Check if previous paragraph is a table — include it
+    if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "table":
+        polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
+        polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
+
+    # Check if next paragraph is a table — include it
+    if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "table":
+        polish_para_ids.append(paragraphs[target_idx + 1]["id"])
+        polish_content_parts.append(paragraphs[target_idx + 1]["content"])
+
+    # If the target itself is a table, include adjacent text too
+    if target["type"] == "table":
+        if target_idx + 1 < len(paragraphs) and paragraphs[target_idx + 1]["type"] == "text":
+            polish_para_ids.append(paragraphs[target_idx + 1]["id"])
+            polish_content_parts.append(paragraphs[target_idx + 1]["content"])
+        if target_idx > 0 and paragraphs[target_idx - 1]["type"] == "text":
+            polish_para_ids.insert(0, paragraphs[target_idx - 1]["id"])
+            polish_content_parts.insert(0, paragraphs[target_idx - 1]["content"])
+
+    combined_content = "\n\n".join(polish_content_parts)
+
+    # 构建上下文窗口（前后各2个段落，排除已包含的）
    context_window = []
    for j in range(max(0, target_idx - 2), min(len(paragraphs), target_idx + 3)):
-        if j != target_idx:
+        if paragraphs[j]["id"] not in polish_para_ids:
            context_window.append(paragraphs[j]["content"])
    context_text = "\n\n".join(context_window)

@@ -985,11 +1014,12 @@ async def polish_paragraph(request: PolishRequest):
 ## 图表信息
 {figures_info}

-## 需要润色的段落
-{target['content']}
+## 需要润色的段落（可能包含表格和文字）
+{combined_content}

 ## 要求
 - 保持原有的 Markdown 格式（标题级别、表格结构等）
+- 如果包含表格，必须同时润色表格内容（补充数据、修正数值）
 - 用具体数据替换模糊描述
 - 增加业务洞察和趋势判断
 - 禁止使用第一人称
@@ -1007,11 +1037,12 @@ async def polish_paragraph(request: PolishRequest):
 ## 图表信息
 {figures_info}

-## 需要润色的段落
-{target['content']}
+## 需要润色的段落（可能包含表格和文字）
+{combined_content}

 ## 要求
 - 保持原有的 Markdown 格式
+- 如果包含表格，必须同时润色表格内容
 - 严格遵循用户指令
 - 禁止使用第一人称
 - 直接输出润色后的 Markdown 内容，不要包裹在代码块中"""
@@ -1025,11 +1056,12 @@ async def polish_paragraph(request: PolishRequest):
 ## 图表信息
 {figures_info}

-## 需要润色的段落
-{target['content']}
+## 需要润色的段落（可能包含表格和文字）
+{combined_content}

 ## 要求
 - 保持原有的 Markdown 格式（标题级别、表格结构等）
+- 如果包含表格，必须同时润色表格内容（补充数据、修正数值）
 - 提升专业性：使用同比、环比、占比等术语
 - 增加洞察：不仅描述现象，还要分析原因和影响
 - 禁止使用第一人称
@@ -1056,9 +1088,10 @@ async def polish_paragraph(request: PolishRequest):

        return {
            "paragraph_id": request.paragraph_id,
-            "original": target["content"],
+            "original": combined_content,
            "polished": polished_content,
            "mode": request.mode,
+            "affected_paragraph_ids": polish_para_ids,
        }

    except Exception as e: