前后端页面同步策略，支持分析模板热编辑以及yaml配置，修改提示词编码，占用符等问题，优化文件扫描

2026-04-20 09:50:35 +08:00
parent 00bd48e7e7
commit 3e1ecf2549
14 changed files with 539 additions and 287 deletions
--- a/data_analysis_agent.py
+++ b/data_analysis_agent.py
@@ -352,9 +352,13 @@ class DataAnalysisAgent:
    def _compress_trimmed_messages(self, messages: list) -> str:
        """Compress trimmed messages into a concise summary string.

-        Extracts the action type from each assistant message and the execution
-        outcome (success / failure) from the subsequent user feedback message.
-        Code blocks and raw execution output are excluded.
+        Extracts the action type from each assistant message, the execution
+        outcome (success / failure), and completed SOP stages from the
+        subsequent user feedback message.  Code blocks and raw execution
+        output are excluded.
+
+        The summary explicitly lists completed SOP stages so the LLM does
+        not restart from stage 1 after conversation trimming.

        Args:
            messages: List of conversation message dicts to compress.
@@ -364,6 +368,17 @@ class DataAnalysisAgent:
        """
        summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"]
        round_num = 0
+        completed_stages = set()
+
+        # SOP stage keywords to detect from assistant messages
+        stage_keywords = {
+            "阶段1": "数据探索与加载",
+            "阶段2": "基础分布分析",
+            "阶段3": "时序与来源分析",
+            "阶段4": "深度交叉分析",
+            "阶段5": "效率分析",
+            "阶段6": "高级挖掘",
+        }

        for msg in messages:
            content = msg["content"]
@@ -375,12 +390,27 @@ class DataAnalysisAgent:
                    action = "collect_figures"
                elif "action: \"analysis_complete\"" in content or "action: analysis_complete" in content:
                    action = "analysis_complete"
+
+                # Detect completed SOP stages
+                for stage_key, stage_name in stage_keywords.items():
+                    if stage_key in content or stage_name in content:
+                        completed_stages.add(f"{stage_key}: {stage_name}")
+
                summary_parts.append(f"- 轮次{round_num}: 动作={action}")
            elif msg["role"] == "user" and "代码执行反馈" in content:
                success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功"
                if summary_parts and summary_parts[-1].startswith("- 轮次"):
                    summary_parts[-1] += f", 执行结果={success}"

+        # Append completed stages so the LLM knows where to continue
+        if completed_stages:
+            summary_parts.append("")
+            summary_parts.append("**已完成的SOP阶段** (请勿重复执行):")
+            for stage in sorted(completed_stages):
+                summary_parts.append(f"  ✓ {stage}")
+            summary_parts.append("")
+            summary_parts.append("请从下一个未完成的阶段继续，不要重新执行已完成的阶段。")
+
        return "\n".join(summary_parts)

    def _profile_files_parallel(self, file_paths: list) -> tuple:
@@ -948,6 +978,17 @@ class DataAnalysisAgent:
 - 注意：必须使用实际生成的图片文件名，严禁使用占位符
 """

+        # Append actual data files list so the LLM uses real filenames in the report
+        if self._session_ref and self._session_ref.data_files:
+            data_files_summary = "\n**已生成的数据文件列表** (请在报告中使用这些实际文件名，替换模板中的占位文件名如 [4-1TSP问题聚类.xlsx])：\n"
+            for df_meta in self._session_ref.data_files:
+                fname = df_meta.get("filename", "")
+                desc = df_meta.get("description", "")
+                rows = df_meta.get("rows", 0)
+                data_files_summary += f"- {fname} ({rows}行): {desc}\n"
+            data_files_summary += "\n注意：报告模板中的 `[4-1TSP问题聚类.xlsx]` 等占位文件名必须替换为上述实际文件名。如果某类聚类文件未生成，请说明原因（如数据量不足或该分类不适用），不要保留占位符。\n"
+            prompt += data_files_summary
+
        return prompt

    def reset(self):