前后端页面同步策略，支持分析模板热编辑以及yaml配置，修改提示词编码，占用符等问题，优化文件扫描

2026-04-20 09:50:35 +08:00
parent 00bd48e7e7
commit 3e1ecf2549
14 changed files with 539 additions and 287 deletions
--- a/utils/code_executor.py
+++ b/utils/code_executor.py
@@ -92,12 +92,29 @@ class CodeExecutor:
    AUTO_EXPORT_MAX_ROWS = 50000

    # Variable names to skip during DataFrame auto-export
-    # (common import aliases and built-in namespace names)
+    # (common import aliases, built-in namespace names, and typical
+    #  temporary/intermediate variable names that shouldn't be persisted)
    _SKIP_EXPORT_NAMES = {
+        # Import aliases
        "pd", "np", "plt", "sns", "os", "json", "sys", "re", "io",
        "csv", "glob", "duckdb", "display", "math", "datetime", "time",
        "warnings", "logging", "copy", "pickle", "pathlib", "collections",
        "itertools", "functools", "operator", "random", "networkx",
+        # Common data variable — the main loaded DataFrame should not be
+        # auto-exported every round; the LLM can save it explicitly via
+        # DATA_FILE_SAVED if needed.
+        "df",
+        # Typical intermediate/temporary variable names from analysis code
+        "cross_table", "cross_table_filtered",
+        "module_issue_table", "module_issue_filtered",
+        "correlation_matrix",
+        "feature_data", "person_stats", "top_persons",
+        "abnormal_durations", "abnormal_orders",
+        "missing_df", "missing_values", "missing_percent",
+        "monthly_counts", "monthly_summary",
+        "distribution_results", "phrase_freq",
+        "normal_durations",
+        "df_check", "df_temp",
    }

    # Regex for parsing DATA_FILE_SAVED markers
@@ -341,15 +358,31 @@ from IPython.display import display

    @staticmethod
    def _sanitize_for_json(rows: List[Dict]) -> List[Dict]:
-        """Replace NaN/inf/-inf with None so the data is JSON-serializable."""
+        """Make evidence row values JSON-serializable.
+
+        Handles NaN/inf → None, Timestamp/datetime → isoformat string,
+        numpy scalars → Python native types.
+        """
        import math
        sanitized = []
        for row in rows:
            clean = {}
            for k, v in row.items():
-                if isinstance(v, float) and (math.isnan(v) or math.isinf(v)):
+                if v is None:
                    clean[k] = None
+                elif isinstance(v, float) and (math.isnan(v) or math.isinf(v)):
+                    clean[k] = None
+                elif hasattr(v, 'isoformat'):  # Timestamp, datetime
+                    clean[k] = v.isoformat()
+                elif hasattr(v, 'item'):  # numpy scalar
+                    clean[k] = v.item()
                else:
+                    try:
+                        if pd.isna(v):
+                            clean[k] = None
+                            continue
+                    except (TypeError, ValueError):
+                        pass
                    clean[k] = v
            sanitized.append(clean)
        return sanitized
@@ -405,12 +438,17 @@ from IPython.display import display
    def _detect_new_dataframes(
        self, before: Dict[str, int], after: Dict[str, int]
    ) -> List[str]:
-        """Return variable names of new or changed DataFrames."""
-        new_or_changed = []
+        """Return variable names of truly NEW DataFrames only.
+
+        Only returns names that did not exist in the before-snapshot.
+        Changed DataFrames (same name, different id) are excluded to avoid
+        re-exporting the main 'df' or other modified variables every round.
+        """
+        new_only = []
        for name, obj_id in after.items():
-            if name not in before or before[name] != obj_id:
-                new_or_changed.append(name)
-        return new_or_changed
+            if name not in before:
+                new_only.append(name)
+        return new_only

    def _export_dataframe(self, var_name: str, df) -> Optional[Dict[str, Any]]:
        """