修了 evidence 捕获的优先级逻辑：

2026-04-20 10:04:47 +08:00
parent 3e1ecf2549
commit 7303008f48
2 changed files with 33 additions and 4 deletions
--- a/utils/code_executor.py
+++ b/utils/code_executor.py
@@ -387,10 +387,14 @@ from IPython.display import display
            sanitized.append(clean)
        return sanitized

-    def _capture_evidence_rows(self, result, shell) -> List[Dict]:
+    def _capture_evidence_rows(self, result, shell, df_snapshot_before=None) -> List[Dict]:
        """
        Capture up to 10 evidence rows from the execution result.
-        First checks result.result, then falls back to the last DataFrame in namespace.
+
+        Priority order:
+        1. result.result if it's a DataFrame (direct code output)
+        2. Smallest newly-created DataFrame this round (most likely an analysis result)
+        3. Last DataFrame in namespace (fallback)
        """
        try:
            # Primary: check if result.result is a DataFrame
@@ -401,6 +405,31 @@ from IPython.display import display
        except Exception:
            pass

+        # Secondary: find the smallest NEW DataFrame created this round
+        # (e.g. groupby result, crosstab, etc. — more relevant than the main df)
+        if df_snapshot_before is not None:
+            try:
+                after = self._snapshot_dataframes(shell)
+                new_names = [n for n in after if n not in df_snapshot_before]
+                if new_names:
+                    # Pick the smallest new DataFrame (most likely a summary/aggregation)
+                    best_df = None
+                    best_size = float('inf')
+                    for name in new_names:
+                        try:
+                            obj = shell.user_ns[name]
+                            if isinstance(obj, pd.DataFrame) and len(obj) < best_size:
+                                best_df = obj
+                                best_size = len(obj)
+                        except Exception:
+                            continue
+                    if best_df is not None:
+                        return self._sanitize_for_json(
+                            best_df.head(10).to_dict(orient="records")
+                        )
+            except Exception:
+                pass
+
        # Fallback: find the last-assigned DataFrame variable in namespace
        try:
            last_df = None
@@ -634,7 +663,7 @@ from IPython.display import display
            # --- 自动保存机制 end ---

            # --- Task 5: Evidence capture ---
-            evidence_rows = self._capture_evidence_rows(result, self.shell)
+            evidence_rows = self._capture_evidence_rows(result, self.shell, df_snapshot_before)

            # --- Task 6.2-6.4: DataFrame auto-detection and export ---
            auto_exported_files = []