前后端页面同步策略,支持分析模板热编辑以及yaml配置,修改提示词编码,占用符等问题,优化文件扫描

This commit is contained in:
2026-04-20 09:50:35 +08:00
parent 00bd48e7e7
commit 3e1ecf2549
14 changed files with 539 additions and 287 deletions

View File

@@ -71,6 +71,59 @@ def clean_code_block(code: str) -> str:
return '\n'.join(result_lines)
def _is_verification_code(code: str) -> bool:
"""Detect code blocks that only check/list files without doing real analysis.
These are typically generated when the LLM runs os.listdir / os.path.exists
loops to verify outputs, and should not appear in the reusable script.
"""
lines = [l.strip() for l in code.strip().splitlines() if l.strip() and not l.strip().startswith('#')]
if not lines:
return True
verification_indicators = 0
analysis_indicators = 0
for line in lines:
# Verification patterns
if any(kw in line for kw in [
'os.listdir(', 'os.path.exists(', 'os.path.getsize(',
'os.path.isfile(', '', '', 'all_exist',
]):
verification_indicators += 1
# Analysis patterns (actual computation / plotting / saving)
if any(kw in line for kw in [
'.plot(', 'plt.', '.to_csv(', '.value_counts()',
'.groupby(', '.corr(', '.fit_transform(', '.fit_predict(',
'pd.read_csv(', 'pd.crosstab(', '.describe()',
]):
analysis_indicators += 1
# If the block is dominated by verification with no real analysis, skip it
return verification_indicators > 0 and analysis_indicators == 0
def _is_duplicate_data_load(code: str, seen_load_blocks: set) -> bool:
"""Detect duplicate data loading blocks (LLM 'amnesia' repeats).
Computes a fingerprint from the code's structural lines (ignoring
whitespace and comments) and returns True if we've seen it before.
"""
# Extract structural fingerprint: non-empty, non-comment lines
structural_lines = []
for line in code.splitlines():
stripped = line.strip()
if stripped and not stripped.startswith('#'):
structural_lines.append(stripped)
fingerprint = '\n'.join(structural_lines[:30]) # First 30 lines are enough
if fingerprint in seen_load_blocks:
return True
seen_load_blocks.add(fingerprint)
return False
def generate_reusable_script(
analysis_results: List[Dict[str, Any]],
data_files: List[str],
@@ -92,17 +145,29 @@ def generate_reusable_script(
# 收集所有成功执行的代码
all_imports = set()
code_blocks = []
seen_load_blocks: Set[str] = set()
for result in analysis_results:
# 只处理 generate_code 类型的结果
if result.get("action") == "collect_figures":
continue
# Skip retry attempts
if result.get("retry"):
continue
code = result.get("code", "")
exec_result = result.get("result", {})
# 只收集成功执行的代码
if code and exec_result.get("success", False):
# Skip pure verification/file-check code (e.g. os.listdir loops)
if _is_verification_code(code):
continue
# Skip duplicate data-loading blocks (LLM amnesia repeats)
if _is_duplicate_data_load(code, seen_load_blocks):
continue
# 提取 imports
imports = extract_imports(code)
all_imports.update(imports)