修了 evidence 捕获的优先级逻辑:

This commit is contained in:
2026-04-20 10:04:47 +08:00
parent 3e1ecf2549
commit 7303008f48
2 changed files with 33 additions and 4 deletions

View File

@@ -387,10 +387,14 @@ from IPython.display import display
sanitized.append(clean)
return sanitized
def _capture_evidence_rows(self, result, shell) -> List[Dict]:
def _capture_evidence_rows(self, result, shell, df_snapshot_before=None) -> List[Dict]:
"""
Capture up to 10 evidence rows from the execution result.
First checks result.result, then falls back to the last DataFrame in namespace.
Priority order:
1. result.result if it's a DataFrame (direct code output)
2. Smallest newly-created DataFrame this round (most likely an analysis result)
3. Last DataFrame in namespace (fallback)
"""
try:
# Primary: check if result.result is a DataFrame
@@ -401,6 +405,31 @@ from IPython.display import display
except Exception:
pass
# Secondary: find the smallest NEW DataFrame created this round
# (e.g. groupby result, crosstab, etc. — more relevant than the main df)
if df_snapshot_before is not None:
try:
after = self._snapshot_dataframes(shell)
new_names = [n for n in after if n not in df_snapshot_before]
if new_names:
# Pick the smallest new DataFrame (most likely a summary/aggregation)
best_df = None
best_size = float('inf')
for name in new_names:
try:
obj = shell.user_ns[name]
if isinstance(obj, pd.DataFrame) and len(obj) < best_size:
best_df = obj
best_size = len(obj)
except Exception:
continue
if best_df is not None:
return self._sanitize_for_json(
best_df.head(10).to_dict(orient="records")
)
except Exception:
pass
# Fallback: find the last-assigned DataFrame variable in namespace
try:
last_df = None
@@ -634,7 +663,7 @@ from IPython.display import display
# --- 自动保存机制 end ---
# --- Task 5: Evidence capture ---
evidence_rows = self._capture_evidence_rows(result, self.shell)
evidence_rows = self._capture_evidence_rows(result, self.shell, df_snapshot_before)
# --- Task 6.2-6.4: DataFrame auto-detection and export ---
auto_exported_files = []