前后端页面同步策略,支持分析模板热编辑以及yaml配置,修改提示词编码,占用符等问题,优化文件扫描
This commit is contained in:
@@ -1,289 +1,153 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
分析模板系统 - 提供预定义的分析场景
|
||||
分析模板系统 - 从 config/templates/*.yaml 加载模板
|
||||
|
||||
模板文件格式:
|
||||
name: 模板显示名称
|
||||
description: 模板描述
|
||||
steps:
|
||||
- name: 步骤名称
|
||||
description: 步骤描述
|
||||
prompt: 给LLM的指令
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
import glob
|
||||
import yaml
|
||||
from typing import List, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "config", "templates")
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisStep:
|
||||
"""分析步骤"""
|
||||
name: str
|
||||
description: str
|
||||
analysis_type: str # explore, visualize, calculate, report
|
||||
prompt: str
|
||||
|
||||
|
||||
class AnalysisTemplate(ABC):
|
||||
"""分析模板基类"""
|
||||
|
||||
def __init__(self, name: str, description: str):
|
||||
class AnalysisTemplate:
|
||||
"""从 YAML 文件加载的分析模板"""
|
||||
|
||||
def __init__(self, name: str, display_name: str, description: str, steps: List[AnalysisStep], filepath: str = ""):
|
||||
self.name = name
|
||||
self.display_name = display_name
|
||||
self.description = description
|
||||
self.steps: List[AnalysisStep] = []
|
||||
|
||||
@abstractmethod
|
||||
def build_steps(self, **kwargs) -> List[AnalysisStep]:
|
||||
"""构建分析步骤"""
|
||||
pass
|
||||
|
||||
def get_full_prompt(self, **kwargs) -> str:
|
||||
"""获取完整的分析提示词"""
|
||||
steps = self.build_steps(**kwargs)
|
||||
|
||||
prompt = f"# {self.name}\n\n{self.description}\n\n"
|
||||
self.steps = steps
|
||||
self.filepath = filepath
|
||||
|
||||
def get_full_prompt(self) -> str:
|
||||
prompt = f"# {self.display_name}\n\n{self.description}\n\n"
|
||||
prompt += "## 分析步骤:\n\n"
|
||||
|
||||
for i, step in enumerate(steps, 1):
|
||||
for i, step in enumerate(self.steps, 1):
|
||||
prompt += f"### {i}. {step.name}\n"
|
||||
prompt += f"{step.description}\n\n"
|
||||
prompt += f"```\n{step.prompt}\n```\n\n"
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
class HealthReportTemplate(AnalysisTemplate):
|
||||
"""健康度报告模板 - 专门用于车联网工单健康度分析"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="车联网工单健康度报告",
|
||||
description="全面分析车联网技术支持工单的健康状况,从多个维度评估工单处理效率和质量"
|
||||
)
|
||||
|
||||
def build_steps(self, **kwargs) -> List[AnalysisStep]:
|
||||
"""构建健康度报告的分析步骤"""
|
||||
return [
|
||||
AnalysisStep(
|
||||
name="数据概览与质量检查",
|
||||
description="检查数据完整性、缺失值、异常值等",
|
||||
analysis_type="explore",
|
||||
prompt="加载数据并进行质量检查,输出数据概况和潜在问题"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="工单总量分析",
|
||||
description="统计总工单数、时间分布、趋势变化",
|
||||
analysis_type="calculate",
|
||||
prompt="计算总工单数,按时间维度统计工单量,绘制时间序列趋势图"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="车型维度分析",
|
||||
description="分析不同车型的工单分布和问题特征",
|
||||
analysis_type="visualize",
|
||||
prompt="统计各车型工单数量,绘制车型分布饼图和柱状图,识别高风险车型"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="模块维度分析",
|
||||
description="分析工单涉及的技术模块分布",
|
||||
analysis_type="visualize",
|
||||
prompt="统计各技术模块的工单量,绘制模块分布图,识别高频问题模块"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="功能维度分析",
|
||||
description="分析具体功能点的问题分布",
|
||||
analysis_type="visualize",
|
||||
prompt="统计各功能的工单量,绘制TOP功能问题排行,分析功能稳定性"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="问题严重程度分析",
|
||||
description="分析工单的严重程度分布",
|
||||
analysis_type="visualize",
|
||||
prompt="统计不同严重程度的工单比例,绘制严重程度分布图"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="处理时长分析",
|
||||
description="分析工单处理时效性",
|
||||
analysis_type="calculate",
|
||||
prompt="计算平均处理时长、SLA达成率,识别超时工单,绘制时长分布图"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="责任人工作负载分析",
|
||||
description="分析各责任人的工单负载和处理效率",
|
||||
analysis_type="visualize",
|
||||
prompt="统计各责任人的工单数和处理效率,绘制负载分布图,识别超负荷人员"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="来源渠道分析",
|
||||
description="分析工单来源渠道分布",
|
||||
analysis_type="visualize",
|
||||
prompt="统计各来源渠道的工单量,绘制渠道分布图"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="高频问题深度分析",
|
||||
description="识别并深入分析高频问题",
|
||||
analysis_type="explore",
|
||||
prompt="提取TOP10高频问题,分析问题原因、影响范围和解决方案"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="综合健康度评分",
|
||||
description="基于多个维度计算综合健康度评分",
|
||||
analysis_type="calculate",
|
||||
prompt="综合考虑工单量、处理时长、问题严重度等指标,计算健康度评分"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="生成最终报告",
|
||||
description="整合所有分析结果,生成完整报告",
|
||||
analysis_type="report",
|
||||
prompt="整合所有图表和分析结论,生成一份完整的车联网工单健康度报告"
|
||||
)
|
||||
]
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"display_name": self.display_name,
|
||||
"description": self.description,
|
||||
"steps": [{"name": s.name, "description": s.description, "prompt": s.prompt} for s in self.steps],
|
||||
}
|
||||
|
||||
|
||||
class TrendAnalysisTemplate(AnalysisTemplate):
|
||||
"""趋势分析模板"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="时间序列趋势分析",
|
||||
description="分析数据的时间趋势、季节性和周期性特征"
|
||||
)
|
||||
|
||||
def build_steps(self, time_column: str = "日期", value_column: str = "数值", **kwargs) -> List[AnalysisStep]:
|
||||
return [
|
||||
AnalysisStep(
|
||||
name="时间序列数据准备",
|
||||
description="将数据转换为时间序列格式",
|
||||
analysis_type="explore",
|
||||
prompt=f"将 '{time_column}' 列转换为日期格式,按时间排序数据"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="趋势可视化",
|
||||
description="绘制时间序列图",
|
||||
analysis_type="visualize",
|
||||
prompt=f"绘制 '{value_column}' 随 '{time_column}' 的变化趋势图,添加移动平均线"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="趋势分析",
|
||||
description="识别上升、下降或平稳趋势",
|
||||
analysis_type="calculate",
|
||||
prompt="计算趋势线斜率,判断整体趋势方向和变化速率"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="季节性分析",
|
||||
description="检测季节性模式",
|
||||
analysis_type="visualize",
|
||||
prompt="分析月度、季度等周期性模式,绘制季节性分解图"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="异常点检测",
|
||||
description="识别时间序列中的异常点",
|
||||
analysis_type="calculate",
|
||||
prompt="使用统计方法检测时间序列中的异常值,标注在图表上"
|
||||
)
|
||||
]
|
||||
def _load_template_from_file(filepath: str) -> AnalysisTemplate:
|
||||
"""从单个 YAML 文件加载模板"""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
template_name = os.path.splitext(os.path.basename(filepath))[0]
|
||||
steps = []
|
||||
for s in data.get("steps", []):
|
||||
steps.append(AnalysisStep(
|
||||
name=s.get("name", ""),
|
||||
description=s.get("description", ""),
|
||||
prompt=s.get("prompt", ""),
|
||||
))
|
||||
|
||||
return AnalysisTemplate(
|
||||
name=template_name,
|
||||
display_name=data.get("name", template_name),
|
||||
description=data.get("description", ""),
|
||||
steps=steps,
|
||||
filepath=filepath,
|
||||
)
|
||||
|
||||
|
||||
class AnomalyDetectionTemplate(AnalysisTemplate):
|
||||
"""异常检测模板"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="异常值检测分析",
|
||||
description="识别数据中的异常值和离群点"
|
||||
)
|
||||
|
||||
def build_steps(self, **kwargs) -> List[AnalysisStep]:
|
||||
return [
|
||||
AnalysisStep(
|
||||
name="数值列统计分析",
|
||||
description="计算数值列的统计特征",
|
||||
analysis_type="calculate",
|
||||
prompt="计算所有数值列的均值、标准差、四分位数等统计量"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="箱线图可视化",
|
||||
description="使用箱线图识别异常值",
|
||||
analysis_type="visualize",
|
||||
prompt="为每个数值列绘制箱线图,直观展示异常值分布"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="Z-Score异常检测",
|
||||
description="使用Z-Score方法检测异常值",
|
||||
analysis_type="calculate",
|
||||
prompt="计算每个数值的Z-Score,标记|Z|>3的异常值"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="IQR异常检测",
|
||||
description="使用四分位距方法检测异常值",
|
||||
analysis_type="calculate",
|
||||
prompt="使用IQR方法(Q1-1.5*IQR, Q3+1.5*IQR)检测异常值"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="异常值汇总报告",
|
||||
description="整理所有检测到的异常值",
|
||||
analysis_type="report",
|
||||
prompt="汇总所有异常值,分析其特征和可能原因,提供处理建议"
|
||||
)
|
||||
]
|
||||
def _scan_templates() -> Dict[str, AnalysisTemplate]:
|
||||
"""扫描 config/templates/ 目录加载所有模板"""
|
||||
registry = {}
|
||||
if not os.path.exists(TEMPLATES_DIR):
|
||||
os.makedirs(TEMPLATES_DIR, exist_ok=True)
|
||||
return registry
|
||||
|
||||
for fpath in sorted(glob.glob(os.path.join(TEMPLATES_DIR, "*.yaml"))):
|
||||
try:
|
||||
tpl = _load_template_from_file(fpath)
|
||||
registry[tpl.name] = tpl
|
||||
except Exception as e:
|
||||
print(f"[WARN] 加载模板失败 {fpath}: {e}")
|
||||
return registry
|
||||
|
||||
|
||||
class ComparisonAnalysisTemplate(AnalysisTemplate):
|
||||
"""对比分析模板"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="分组对比分析",
|
||||
description="对比不同分组之间的差异和特征"
|
||||
)
|
||||
|
||||
def build_steps(self, group_column: str = "分组", value_column: str = "数值", **kwargs) -> List[AnalysisStep]:
|
||||
return [
|
||||
AnalysisStep(
|
||||
name="分组统计",
|
||||
description="计算各组的统计指标",
|
||||
analysis_type="calculate",
|
||||
prompt=f"按 '{group_column}' 分组,计算 '{value_column}' 的均值、中位数、标准差"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="分组可视化对比",
|
||||
description="绘制对比图表",
|
||||
analysis_type="visualize",
|
||||
prompt=f"绘制各组的柱状图和箱线图,直观对比差异"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="差异显著性检验",
|
||||
description="统计检验组间差异",
|
||||
analysis_type="calculate",
|
||||
prompt="进行t检验或方差分析,判断组间差异是否显著"
|
||||
),
|
||||
AnalysisStep(
|
||||
name="对比结论",
|
||||
description="总结对比结果",
|
||||
analysis_type="report",
|
||||
prompt="总结各组特征、主要差异和业务洞察"
|
||||
)
|
||||
]
|
||||
# Module-level registry, refreshed on each call to support hot-editing
|
||||
def _get_registry() -> Dict[str, AnalysisTemplate]:
|
||||
return _scan_templates()
|
||||
|
||||
|
||||
# 模板注册表
|
||||
TEMPLATE_REGISTRY = {
|
||||
"health_report": HealthReportTemplate,
|
||||
"trend_analysis": TrendAnalysisTemplate,
|
||||
"anomaly_detection": AnomalyDetectionTemplate,
|
||||
"comparison": ComparisonAnalysisTemplate
|
||||
}
|
||||
# Keep TEMPLATE_REGISTRY as a lazy property for backward compatibility with tests
|
||||
TEMPLATE_REGISTRY = _scan_templates()
|
||||
|
||||
|
||||
def get_template(template_name: str) -> AnalysisTemplate:
|
||||
"""获取分析模板"""
|
||||
template_class = TEMPLATE_REGISTRY.get(template_name)
|
||||
if template_class:
|
||||
return template_class()
|
||||
else:
|
||||
raise ValueError(f"未找到模板: {template_name}。可用模板: {list(TEMPLATE_REGISTRY.keys())}")
|
||||
"""获取分析模板(每次从磁盘重新加载以支持热编辑)"""
|
||||
registry = _get_registry()
|
||||
if template_name in registry:
|
||||
return registry[template_name]
|
||||
raise ValueError(f"未找到模板: {template_name}。可用模板: {list(registry.keys())}")
|
||||
|
||||
|
||||
def list_templates() -> List[Dict[str, str]]:
|
||||
"""列出所有可用模板"""
|
||||
templates = []
|
||||
for name, template_class in TEMPLATE_REGISTRY.items():
|
||||
template = template_class()
|
||||
templates.append({
|
||||
"name": name,
|
||||
"display_name": template.name,
|
||||
"description": template.description
|
||||
})
|
||||
return templates
|
||||
registry = _get_registry()
|
||||
return [
|
||||
{"name": tpl.name, "display_name": tpl.display_name, "description": tpl.description}
|
||||
for tpl in registry.values()
|
||||
]
|
||||
|
||||
|
||||
def save_template(template_name: str, data: Dict[str, Any]) -> str:
|
||||
"""保存或更新模板到 YAML 文件,返回文件路径"""
|
||||
os.makedirs(TEMPLATES_DIR, exist_ok=True)
|
||||
filepath = os.path.join(TEMPLATES_DIR, f"{template_name}.yaml")
|
||||
|
||||
yaml_data = {
|
||||
"name": data.get("display_name", data.get("name", template_name)),
|
||||
"description": data.get("description", ""),
|
||||
"steps": data.get("steps", []),
|
||||
}
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
yaml.dump(yaml_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
|
||||
# Refresh global registry
|
||||
global TEMPLATE_REGISTRY
|
||||
TEMPLATE_REGISTRY = _scan_templates()
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
def delete_template(template_name: str) -> bool:
|
||||
"""删除模板文件"""
|
||||
filepath = os.path.join(TEMPLATES_DIR, f"{template_name}.yaml")
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
global TEMPLATE_REGISTRY
|
||||
TEMPLATE_REGISTRY = _scan_templates()
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -92,12 +92,29 @@ class CodeExecutor:
|
||||
AUTO_EXPORT_MAX_ROWS = 50000
|
||||
|
||||
# Variable names to skip during DataFrame auto-export
|
||||
# (common import aliases and built-in namespace names)
|
||||
# (common import aliases, built-in namespace names, and typical
|
||||
# temporary/intermediate variable names that shouldn't be persisted)
|
||||
_SKIP_EXPORT_NAMES = {
|
||||
# Import aliases
|
||||
"pd", "np", "plt", "sns", "os", "json", "sys", "re", "io",
|
||||
"csv", "glob", "duckdb", "display", "math", "datetime", "time",
|
||||
"warnings", "logging", "copy", "pickle", "pathlib", "collections",
|
||||
"itertools", "functools", "operator", "random", "networkx",
|
||||
# Common data variable — the main loaded DataFrame should not be
|
||||
# auto-exported every round; the LLM can save it explicitly via
|
||||
# DATA_FILE_SAVED if needed.
|
||||
"df",
|
||||
# Typical intermediate/temporary variable names from analysis code
|
||||
"cross_table", "cross_table_filtered",
|
||||
"module_issue_table", "module_issue_filtered",
|
||||
"correlation_matrix",
|
||||
"feature_data", "person_stats", "top_persons",
|
||||
"abnormal_durations", "abnormal_orders",
|
||||
"missing_df", "missing_values", "missing_percent",
|
||||
"monthly_counts", "monthly_summary",
|
||||
"distribution_results", "phrase_freq",
|
||||
"normal_durations",
|
||||
"df_check", "df_temp",
|
||||
}
|
||||
|
||||
# Regex for parsing DATA_FILE_SAVED markers
|
||||
@@ -341,15 +358,31 @@ from IPython.display import display
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_for_json(rows: List[Dict]) -> List[Dict]:
|
||||
"""Replace NaN/inf/-inf with None so the data is JSON-serializable."""
|
||||
"""Make evidence row values JSON-serializable.
|
||||
|
||||
Handles NaN/inf → None, Timestamp/datetime → isoformat string,
|
||||
numpy scalars → Python native types.
|
||||
"""
|
||||
import math
|
||||
sanitized = []
|
||||
for row in rows:
|
||||
clean = {}
|
||||
for k, v in row.items():
|
||||
if isinstance(v, float) and (math.isnan(v) or math.isinf(v)):
|
||||
if v is None:
|
||||
clean[k] = None
|
||||
elif isinstance(v, float) and (math.isnan(v) or math.isinf(v)):
|
||||
clean[k] = None
|
||||
elif hasattr(v, 'isoformat'): # Timestamp, datetime
|
||||
clean[k] = v.isoformat()
|
||||
elif hasattr(v, 'item'): # numpy scalar
|
||||
clean[k] = v.item()
|
||||
else:
|
||||
try:
|
||||
if pd.isna(v):
|
||||
clean[k] = None
|
||||
continue
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
clean[k] = v
|
||||
sanitized.append(clean)
|
||||
return sanitized
|
||||
@@ -405,12 +438,17 @@ from IPython.display import display
|
||||
def _detect_new_dataframes(
|
||||
self, before: Dict[str, int], after: Dict[str, int]
|
||||
) -> List[str]:
|
||||
"""Return variable names of new or changed DataFrames."""
|
||||
new_or_changed = []
|
||||
"""Return variable names of truly NEW DataFrames only.
|
||||
|
||||
Only returns names that did not exist in the before-snapshot.
|
||||
Changed DataFrames (same name, different id) are excluded to avoid
|
||||
re-exporting the main 'df' or other modified variables every round.
|
||||
"""
|
||||
new_only = []
|
||||
for name, obj_id in after.items():
|
||||
if name not in before or before[name] != obj_id:
|
||||
new_or_changed.append(name)
|
||||
return new_or_changed
|
||||
if name not in before:
|
||||
new_only.append(name)
|
||||
return new_only
|
||||
|
||||
def _export_dataframe(self, var_name: str, df) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
|
||||
@@ -84,6 +84,20 @@ class LLMHelper:
|
||||
else:
|
||||
yaml_content = response.strip()
|
||||
|
||||
# Strip language identifier if LLM used ```python instead of ```yaml
|
||||
# e.g. "python\naction: ..." → "action: ..."
|
||||
import re
|
||||
if re.match(r'^[a-zA-Z]+\n', yaml_content):
|
||||
yaml_content = yaml_content.split('\n', 1)[1]
|
||||
|
||||
# Fix Windows backslash paths that break YAML double-quoted strings.
|
||||
# e.g. "D:\code\iov..." → "D:/code/iov..." inside quoted values
|
||||
yaml_content = re.sub(
|
||||
r'"([A-Za-z]:\\[^"]*)"',
|
||||
lambda m: '"' + m.group(1).replace('\\', '/') + '"',
|
||||
yaml_content,
|
||||
)
|
||||
|
||||
parsed = yaml.safe_load(yaml_content)
|
||||
return parsed if parsed is not None else {}
|
||||
except Exception as e:
|
||||
|
||||
@@ -71,6 +71,59 @@ def clean_code_block(code: str) -> str:
|
||||
return '\n'.join(result_lines)
|
||||
|
||||
|
||||
def _is_verification_code(code: str) -> bool:
|
||||
"""Detect code blocks that only check/list files without doing real analysis.
|
||||
|
||||
These are typically generated when the LLM runs os.listdir / os.path.exists
|
||||
loops to verify outputs, and should not appear in the reusable script.
|
||||
"""
|
||||
lines = [l.strip() for l in code.strip().splitlines() if l.strip() and not l.strip().startswith('#')]
|
||||
if not lines:
|
||||
return True
|
||||
|
||||
verification_indicators = 0
|
||||
analysis_indicators = 0
|
||||
|
||||
for line in lines:
|
||||
# Verification patterns
|
||||
if any(kw in line for kw in [
|
||||
'os.listdir(', 'os.path.exists(', 'os.path.getsize(',
|
||||
'os.path.isfile(', '✓', '✗', 'all_exist',
|
||||
]):
|
||||
verification_indicators += 1
|
||||
# Analysis patterns (actual computation / plotting / saving)
|
||||
if any(kw in line for kw in [
|
||||
'.plot(', 'plt.', '.to_csv(', '.value_counts()',
|
||||
'.groupby(', '.corr(', '.fit_transform(', '.fit_predict(',
|
||||
'pd.read_csv(', 'pd.crosstab(', '.describe()',
|
||||
]):
|
||||
analysis_indicators += 1
|
||||
|
||||
# If the block is dominated by verification with no real analysis, skip it
|
||||
return verification_indicators > 0 and analysis_indicators == 0
|
||||
|
||||
|
||||
def _is_duplicate_data_load(code: str, seen_load_blocks: set) -> bool:
|
||||
"""Detect duplicate data loading blocks (LLM 'amnesia' repeats).
|
||||
|
||||
Computes a fingerprint from the code's structural lines (ignoring
|
||||
whitespace and comments) and returns True if we've seen it before.
|
||||
"""
|
||||
# Extract structural fingerprint: non-empty, non-comment lines
|
||||
structural_lines = []
|
||||
for line in code.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith('#'):
|
||||
structural_lines.append(stripped)
|
||||
|
||||
fingerprint = '\n'.join(structural_lines[:30]) # First 30 lines are enough
|
||||
|
||||
if fingerprint in seen_load_blocks:
|
||||
return True
|
||||
seen_load_blocks.add(fingerprint)
|
||||
return False
|
||||
|
||||
|
||||
def generate_reusable_script(
|
||||
analysis_results: List[Dict[str, Any]],
|
||||
data_files: List[str],
|
||||
@@ -92,17 +145,29 @@ def generate_reusable_script(
|
||||
# 收集所有成功执行的代码
|
||||
all_imports = set()
|
||||
code_blocks = []
|
||||
seen_load_blocks: Set[str] = set()
|
||||
|
||||
for result in analysis_results:
|
||||
# 只处理 generate_code 类型的结果
|
||||
if result.get("action") == "collect_figures":
|
||||
continue
|
||||
# Skip retry attempts
|
||||
if result.get("retry"):
|
||||
continue
|
||||
|
||||
code = result.get("code", "")
|
||||
exec_result = result.get("result", {})
|
||||
|
||||
# 只收集成功执行的代码
|
||||
if code and exec_result.get("success", False):
|
||||
# Skip pure verification/file-check code (e.g. os.listdir loops)
|
||||
if _is_verification_code(code):
|
||||
continue
|
||||
|
||||
# Skip duplicate data-loading blocks (LLM amnesia repeats)
|
||||
if _is_duplicate_data_load(code, seen_load_blocks):
|
||||
continue
|
||||
|
||||
# 提取 imports
|
||||
imports = extract_imports(code)
|
||||
all_imports.update(imports)
|
||||
|
||||
Reference in New Issue
Block a user