前后端页面同步策略,支持分析模板热编辑以及yaml配置,修改提示词编码,占用符等问题,优化文件扫描
This commit is contained in:
@@ -71,6 +71,59 @@ def clean_code_block(code: str) -> str:
|
||||
return '\n'.join(result_lines)
|
||||
|
||||
|
||||
def _is_verification_code(code: str) -> bool:
|
||||
"""Detect code blocks that only check/list files without doing real analysis.
|
||||
|
||||
These are typically generated when the LLM runs os.listdir / os.path.exists
|
||||
loops to verify outputs, and should not appear in the reusable script.
|
||||
"""
|
||||
lines = [l.strip() for l in code.strip().splitlines() if l.strip() and not l.strip().startswith('#')]
|
||||
if not lines:
|
||||
return True
|
||||
|
||||
verification_indicators = 0
|
||||
analysis_indicators = 0
|
||||
|
||||
for line in lines:
|
||||
# Verification patterns
|
||||
if any(kw in line for kw in [
|
||||
'os.listdir(', 'os.path.exists(', 'os.path.getsize(',
|
||||
'os.path.isfile(', '✓', '✗', 'all_exist',
|
||||
]):
|
||||
verification_indicators += 1
|
||||
# Analysis patterns (actual computation / plotting / saving)
|
||||
if any(kw in line for kw in [
|
||||
'.plot(', 'plt.', '.to_csv(', '.value_counts()',
|
||||
'.groupby(', '.corr(', '.fit_transform(', '.fit_predict(',
|
||||
'pd.read_csv(', 'pd.crosstab(', '.describe()',
|
||||
]):
|
||||
analysis_indicators += 1
|
||||
|
||||
# If the block is dominated by verification with no real analysis, skip it
|
||||
return verification_indicators > 0 and analysis_indicators == 0
|
||||
|
||||
|
||||
def _is_duplicate_data_load(code: str, seen_load_blocks: set) -> bool:
|
||||
"""Detect duplicate data loading blocks (LLM 'amnesia' repeats).
|
||||
|
||||
Computes a fingerprint from the code's structural lines (ignoring
|
||||
whitespace and comments) and returns True if we've seen it before.
|
||||
"""
|
||||
# Extract structural fingerprint: non-empty, non-comment lines
|
||||
structural_lines = []
|
||||
for line in code.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith('#'):
|
||||
structural_lines.append(stripped)
|
||||
|
||||
fingerprint = '\n'.join(structural_lines[:30]) # First 30 lines are enough
|
||||
|
||||
if fingerprint in seen_load_blocks:
|
||||
return True
|
||||
seen_load_blocks.add(fingerprint)
|
||||
return False
|
||||
|
||||
|
||||
def generate_reusable_script(
|
||||
analysis_results: List[Dict[str, Any]],
|
||||
data_files: List[str],
|
||||
@@ -92,17 +145,29 @@ def generate_reusable_script(
|
||||
# 收集所有成功执行的代码
|
||||
all_imports = set()
|
||||
code_blocks = []
|
||||
seen_load_blocks: Set[str] = set()
|
||||
|
||||
for result in analysis_results:
|
||||
# 只处理 generate_code 类型的结果
|
||||
if result.get("action") == "collect_figures":
|
||||
continue
|
||||
# Skip retry attempts
|
||||
if result.get("retry"):
|
||||
continue
|
||||
|
||||
code = result.get("code", "")
|
||||
exec_result = result.get("result", {})
|
||||
|
||||
# 只收集成功执行的代码
|
||||
if code and exec_result.get("success", False):
|
||||
# Skip pure verification/file-check code (e.g. os.listdir loops)
|
||||
if _is_verification_code(code):
|
||||
continue
|
||||
|
||||
# Skip duplicate data-loading blocks (LLM amnesia repeats)
|
||||
if _is_duplicate_data_load(code, seen_load_blocks):
|
||||
continue
|
||||
|
||||
# 提取 imports
|
||||
imports = extract_imports(code)
|
||||
all_imports.update(imports)
|
||||
|
||||
Reference in New Issue
Block a user