大更新,架构调整,数据分析能力提升,
This commit is contained in:
@@ -154,6 +154,82 @@ def sanitize_execution_feedback(feedback: str, max_lines: int = 30) -> str:
|
||||
return "\n".join(safe_lines)
|
||||
|
||||
|
||||
def _extract_column_from_error(error_message: str) -> Optional[str]:
|
||||
"""Extract column name from error message patterns like KeyError: 'col_name'.
|
||||
|
||||
Supports:
|
||||
- KeyError: 'column_name' or KeyError: "column_name"
|
||||
- column 'column_name' or column "column_name" (case-insensitive)
|
||||
|
||||
Returns:
|
||||
The extracted column name, or None if no column reference is found.
|
||||
"""
|
||||
match = re.search(r"KeyError:\s*['\"](.+?)['\"]", error_message)
|
||||
if match:
|
||||
return match.group(1)
|
||||
match = re.search(r"column\s+['\"](.+?)['\"]", error_message, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def _lookup_column_in_profile(column_name: Optional[str], safe_profile: str) -> Optional[dict]:
|
||||
"""Look up column metadata in the safe profile markdown table.
|
||||
|
||||
Parses the markdown table rows produced by build_safe_profile() and returns
|
||||
a dict with keys: dtype, null_rate, unique_count, description.
|
||||
|
||||
Args:
|
||||
column_name: The column name to look up (may be None).
|
||||
safe_profile: The safe profile markdown string.
|
||||
|
||||
Returns:
|
||||
A dict of column metadata, or None if not found.
|
||||
"""
|
||||
if not column_name:
|
||||
return None
|
||||
for line in safe_profile.split("\n"):
|
||||
if line.startswith("|") and column_name in line:
|
||||
parts = [p.strip() for p in line.split("|") if p.strip()]
|
||||
if len(parts) >= 5 and parts[0] == column_name:
|
||||
return {
|
||||
"dtype": parts[1],
|
||||
"null_rate": parts[2],
|
||||
"unique_count": parts[3],
|
||||
"description": parts[4],
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def generate_enriched_hint(error_message: str, safe_profile: str) -> str:
|
||||
"""Generate an enriched hint from the safe profile for a data-context error.
|
||||
|
||||
Extracts the referenced column name from the error, looks it up in the safe
|
||||
profile markdown table, and returns a hint string containing only schema-level
|
||||
metadata — no real data values.
|
||||
|
||||
Args:
|
||||
error_message: The error message from code execution.
|
||||
safe_profile: The safe profile markdown string.
|
||||
|
||||
Returns:
|
||||
A hint string with retry context and column metadata (if found).
|
||||
"""
|
||||
column_name = _extract_column_from_error(error_message)
|
||||
column_meta = _lookup_column_in_profile(column_name, safe_profile)
|
||||
|
||||
hint = "[RETRY CONTEXT] 上一次代码执行因数据上下文错误失败。\n"
|
||||
hint += f"错误信息: {error_message}\n"
|
||||
if column_meta:
|
||||
hint += f"相关列 '{column_name}' 的结构信息:\n"
|
||||
hint += f" - 数据类型: {column_meta['dtype']}\n"
|
||||
hint += f" - 唯一值数量: {column_meta['unique_count']}\n"
|
||||
hint += f" - 空值率: {column_meta['null_rate']}\n"
|
||||
hint += f" - 特征描述: {column_meta['description']}\n"
|
||||
hint += "请根据以上结构信息修正代码,不要假设具体的数据值。"
|
||||
return hint
|
||||
|
||||
|
||||
def _load_dataframe(file_path: str):
|
||||
"""加载 DataFrame,支持多种格式和编码"""
|
||||
import os
|
||||
|
||||
Reference in New Issue
Block a user