大更新,架构调整,数据分析能力提升,

This commit is contained in:
2026-04-19 21:30:08 +08:00
parent 9d01f004d4
commit 00bd48e7e7
26 changed files with 4375 additions and 252 deletions

View File

@@ -154,6 +154,82 @@ def sanitize_execution_feedback(feedback: str, max_lines: int = 30) -> str:
return "\n".join(safe_lines)
def _extract_column_from_error(error_message: str) -> Optional[str]:
"""Extract column name from error message patterns like KeyError: 'col_name'.
Supports:
- KeyError: 'column_name' or KeyError: "column_name"
- column 'column_name' or column "column_name" (case-insensitive)
Returns:
The extracted column name, or None if no column reference is found.
"""
match = re.search(r"KeyError:\s*['\"](.+?)['\"]", error_message)
if match:
return match.group(1)
match = re.search(r"column\s+['\"](.+?)['\"]", error_message, re.IGNORECASE)
if match:
return match.group(1)
return None
def _lookup_column_in_profile(column_name: Optional[str], safe_profile: str) -> Optional[dict]:
"""Look up column metadata in the safe profile markdown table.
Parses the markdown table rows produced by build_safe_profile() and returns
a dict with keys: dtype, null_rate, unique_count, description.
Args:
column_name: The column name to look up (may be None).
safe_profile: The safe profile markdown string.
Returns:
A dict of column metadata, or None if not found.
"""
if not column_name:
return None
for line in safe_profile.split("\n"):
if line.startswith("|") and column_name in line:
parts = [p.strip() for p in line.split("|") if p.strip()]
if len(parts) >= 5 and parts[0] == column_name:
return {
"dtype": parts[1],
"null_rate": parts[2],
"unique_count": parts[3],
"description": parts[4],
}
return None
def generate_enriched_hint(error_message: str, safe_profile: str) -> str:
"""Generate an enriched hint from the safe profile for a data-context error.
Extracts the referenced column name from the error, looks it up in the safe
profile markdown table, and returns a hint string containing only schema-level
metadata — no real data values.
Args:
error_message: The error message from code execution.
safe_profile: The safe profile markdown string.
Returns:
A hint string with retry context and column metadata (if found).
"""
column_name = _extract_column_from_error(error_message)
column_meta = _lookup_column_in_profile(column_name, safe_profile)
hint = "[RETRY CONTEXT] 上一次代码执行因数据上下文错误失败。\n"
hint += f"错误信息: {error_message}\n"
if column_meta:
hint += f"相关列 '{column_name}' 的结构信息:\n"
hint += f" - 数据类型: {column_meta['dtype']}\n"
hint += f" - 唯一值数量: {column_meta['unique_count']}\n"
hint += f" - 空值率: {column_meta['null_rate']}\n"
hint += f" - 特征描述: {column_meta['description']}\n"
hint += "请根据以上结构信息修正代码,不要假设具体的数据值。"
return hint
def _load_dataframe(file_path: str):
"""加载 DataFrame支持多种格式和编码"""
import os