额度不够,使用qianwen修改,未验证

This commit is contained in:
2026-04-20 14:56:39 +08:00
parent d1fb498579
commit 8222c8fd7c
2 changed files with 99 additions and 12 deletions

View File

@@ -29,11 +29,30 @@ from prompts import data_analysis_system_prompt, final_report_system_prompt, dat
# Regex patterns that indicate a data-context error (column/variable/DataFrame issues) # Regex patterns that indicate a data-context error (column/variable/DataFrame issues)
DATA_CONTEXT_PATTERNS = [ DATA_CONTEXT_PATTERNS = [
# KeyError - missing key/column
r"KeyError:\s*['\"](.+?)['\"]", r"KeyError:\s*['\"](.+?)['\"]",
r"ValueError.*(?:column|col|field)", # ValueError - value-related issues
r"NameError.*(?:df|data|frame)", r"ValueError.*(?:column|col|field|shape|axis)",
r"(?:empty|no\s+data|0\s+rows)", # NameError - undefined variables
r"NameError.*(?:df|data|frame|series)",
# Empty/missing data
r"(?:empty|no\s+data|0\s+rows|No\s+data)",
# IndexError - out of bounds
r"IndexError.*(?:out of range|out of bounds)", r"IndexError.*(?:out of range|out of bounds)",
# AttributeError - missing attributes
r"AttributeError.*(?:DataFrame|Series|object)\s+has\s+no\s+attribute",
# Pandas-specific errors
r"pd\.errors\.(?:EmptyDataError|ParserError|MergeError)",
r"MergeError: No common columns",
# Type errors
r"TypeError.*(?:unsupported operand|expected string|cannot convert)",
# UnboundLocalError - undefined local variables
r"UnboundLocalError.*referenced before assignment",
# Syntax errors
r"SyntaxError: invalid syntax",
# Module/Import errors for data libraries
r"ModuleNotFoundError.*(?:pandas|numpy|matplotlib)",
r"ImportError.*(?:pandas|numpy|matplotlib)",
] ]

View File

@@ -26,12 +26,32 @@ from config.app_config import AppConfig
# Helpers # Helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Updated patterns matching data_analysis_agent.py
DATA_CONTEXT_PATTERNS = [ DATA_CONTEXT_PATTERNS = [
# KeyError - missing key/column
r"KeyError:\s*['\"](.+?)['\"]", r"KeyError:\s*['\"](.+?)['\"]",
r"ValueError.*(?:column|col|field)", # ValueError - value-related issues
r"NameError.*(?:df|data|frame)", r"ValueError.*(?:column|col|field|shape|axis)",
r"(?:empty|no\s+data|0\s+rows)", # NameError - undefined variables
r"NameError.*(?:df|data|frame|series)",
# Empty/missing data
r"(?:empty|no\s+data|0\s+rows|No\s+data)",
# IndexError - out of bounds
r"IndexError.*(?:out of range|out of bounds)", r"IndexError.*(?:out of range|out of bounds)",
# AttributeError - missing attributes
r"AttributeError.*(?:DataFrame|Series|object)\s+has\s+no\s+attribute",
# Pandas-specific errors
r"pd\.errors\.(?:EmptyDataError|ParserError|MergeError)",
r"MergeError: No common columns",
# Type errors
r"TypeError.*(?:unsupported operand|expected string|cannot convert)",
# UnboundLocalError - undefined local variables
r"UnboundLocalError.*referenced before assignment",
# Syntax errors
r"SyntaxError: invalid syntax",
# Module/Import errors for data libraries
r"ModuleNotFoundError.*(?:pandas|numpy|matplotlib)",
r"ImportError.*(?:pandas|numpy|matplotlib)",
] ]
@@ -75,11 +95,11 @@ class TestErrorClassifier:
def test_index_out_of_range(self): def test_index_out_of_range(self):
assert classify_error("IndexError: index 10 is out of range") == "data_context" assert classify_error("IndexError: index 10 is out of range") == "data_context"
def test_syntax_error_is_other(self): def test_syntax_error_is_data_context(self):
assert classify_error("SyntaxError: invalid syntax") == "other" assert classify_error("SyntaxError: invalid syntax") == "data_context"
def test_type_error_is_other(self): def test_type_error_is_data_context(self):
assert classify_error("TypeError: unsupported operand") == "other" assert classify_error("TypeError: unsupported operand") == "data_context"
def test_generic_text_is_other(self): def test_generic_text_is_other(self):
assert classify_error("Something went wrong") == "other" assert classify_error("Something went wrong") == "other"
@@ -87,6 +107,55 @@ class TestErrorClassifier:
def test_empty_string_is_other(self): def test_empty_string_is_other(self):
assert classify_error("") == "other" assert classify_error("") == "other"
# ===========================================================================
# Additional tests for improved error classifier
# ===========================================================================
def test_attributeerror_dataframe(self):
assert classify_error("AttributeError: 'DataFrame' object has no attribute 'xxx'") == "data_context"
def test_attributeerror_series(self):
assert classify_error("AttributeError: 'Series' object has no attribute 'xxx'") == "data_context"
def test_pd_emptydataerror(self):
assert classify_error("pd.errors.EmptyDataError: No data") == "data_context"
def test_pd_parsererror(self):
assert classify_error("pd.errors.ParserError: Error tokenizing data") == "data_context"
def test_pd_mergeerror(self):
assert classify_error("MergeError: No common columns to merge") == "data_context"
def test_typeerror_unsupported_operand(self):
assert classify_error("TypeError: unsupported operand type(s) for +: 'int' and 'str'") == "data_context"
def test_typeerror_expected_string(self):
assert classify_error("TypeError: expected string or bytes-like object") == "data_context"
def test_unboundlocalerror(self):
assert classify_error("UnboundLocalError: local variable 'df' referenced before assignment") == "data_context"
def test_syntaxerror(self):
assert classify_error("SyntaxError: invalid syntax") == "data_context"
def test_modulenotfounderror(self):
assert classify_error("ModuleNotFoundError: No module named 'pandas'") == "data_context"
def test_importerror(self):
assert classify_error("ImportError: cannot import name 'xxx' from 'pandas'") == "data_context"
def test_valueerror_shape(self):
assert classify_error("ValueError: shape mismatch") == "data_context"
def test_valueerror_axis(self):
assert classify_error("ValueError: axis out of bounds") == "data_context"
def test_nameerror_series(self):
assert classify_error("NameError: name 'series' is not defined") == "data_context"
def test_no_data_message(self):
assert classify_error("No data available for analysis") == "data_context"
# =========================================================================== # ===========================================================================
# Task 12.1 continued: Unit tests for column extraction and lookup # Task 12.1 continued: Unit tests for column extraction and lookup
@@ -189,8 +258,7 @@ class TestTemplateSystem:
for name in TEMPLATE_REGISTRY: for name in TEMPLATE_REGISTRY:
t = get_template(name) t = get_template(name)
assert t.name # has a display name assert t.name # has a display name
steps = t.build_steps() assert len(t.steps) > 0 # template has steps
assert len(steps) > 0
def test_get_invalid_template_raises(self): def test_get_invalid_template_raises(self):
with pytest.raises(ValueError): with pytest.raises(ValueError):