From 8222c8fd7c8c727a7b818b727662f92b0a1e65d5 Mon Sep 17 00:00:00 2001 From: Jeason <1710884619@qq.com> Date: Mon, 20 Apr 2026 14:56:39 +0800 Subject: [PATCH] =?UTF-8?q?=E9=A2=9D=E5=BA=A6=E4=B8=8D=E5=A4=9F=EF=BC=8C?= =?UTF-8?q?=E4=BD=BF=E7=94=A8qianwen=E4=BF=AE=E6=94=B9=EF=BC=8C=E6=9C=AA?= =?UTF-8?q?=E9=AA=8C=E8=AF=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_analysis_agent.py | 25 ++++++++++-- tests/test_unit.py | 86 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 99 insertions(+), 12 deletions(-) diff --git a/data_analysis_agent.py b/data_analysis_agent.py index 31b7460..63255fb 100644 --- a/data_analysis_agent.py +++ b/data_analysis_agent.py @@ -29,11 +29,30 @@ from prompts import data_analysis_system_prompt, final_report_system_prompt, dat # Regex patterns that indicate a data-context error (column/variable/DataFrame issues) DATA_CONTEXT_PATTERNS = [ + # KeyError - missing key/column r"KeyError:\s*['\"](.+?)['\"]", - r"ValueError.*(?:column|col|field)", - r"NameError.*(?:df|data|frame)", - r"(?:empty|no\s+data|0\s+rows)", + # ValueError - value-related issues + r"ValueError.*(?:column|col|field|shape|axis)", + # NameError - undefined variables + r"NameError.*(?:df|data|frame|series)", + # Empty/missing data + r"(?:empty|no\s+data|0\s+rows|No\s+data)", + # IndexError - out of bounds r"IndexError.*(?:out of range|out of bounds)", + # AttributeError - missing attributes + r"AttributeError.*(?:DataFrame|Series|object)\s+has\s+no\s+attribute", + # Pandas-specific errors + r"pd\.errors\.(?:EmptyDataError|ParserError|MergeError)", + r"MergeError: No common columns", + # Type errors + r"TypeError.*(?:unsupported operand|expected string|cannot convert)", + # UnboundLocalError - undefined local variables + r"UnboundLocalError.*referenced before assignment", + # Syntax errors + r"SyntaxError: invalid syntax", + # Module/Import errors for data libraries + r"ModuleNotFoundError.*(?:pandas|numpy|matplotlib)", + r"ImportError.*(?:pandas|numpy|matplotlib)", ] diff --git a/tests/test_unit.py b/tests/test_unit.py index 69da91a..5c0b25a 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -26,12 +26,32 @@ from config.app_config import AppConfig # Helpers # --------------------------------------------------------------------------- +# Updated patterns matching data_analysis_agent.py DATA_CONTEXT_PATTERNS = [ + # KeyError - missing key/column r"KeyError:\s*['\"](.+?)['\"]", - r"ValueError.*(?:column|col|field)", - r"NameError.*(?:df|data|frame)", - r"(?:empty|no\s+data|0\s+rows)", + # ValueError - value-related issues + r"ValueError.*(?:column|col|field|shape|axis)", + # NameError - undefined variables + r"NameError.*(?:df|data|frame|series)", + # Empty/missing data + r"(?:empty|no\s+data|0\s+rows|No\s+data)", + # IndexError - out of bounds r"IndexError.*(?:out of range|out of bounds)", + # AttributeError - missing attributes + r"AttributeError.*(?:DataFrame|Series|object)\s+has\s+no\s+attribute", + # Pandas-specific errors + r"pd\.errors\.(?:EmptyDataError|ParserError|MergeError)", + r"MergeError: No common columns", + # Type errors + r"TypeError.*(?:unsupported operand|expected string|cannot convert)", + # UnboundLocalError - undefined local variables + r"UnboundLocalError.*referenced before assignment", + # Syntax errors + r"SyntaxError: invalid syntax", + # Module/Import errors for data libraries + r"ModuleNotFoundError.*(?:pandas|numpy|matplotlib)", + r"ImportError.*(?:pandas|numpy|matplotlib)", ] @@ -75,11 +95,11 @@ class TestErrorClassifier: def test_index_out_of_range(self): assert classify_error("IndexError: index 10 is out of range") == "data_context" - def test_syntax_error_is_other(self): - assert classify_error("SyntaxError: invalid syntax") == "other" + def test_syntax_error_is_data_context(self): + assert classify_error("SyntaxError: invalid syntax") == "data_context" - def test_type_error_is_other(self): - assert classify_error("TypeError: unsupported operand") == "other" + def test_type_error_is_data_context(self): + assert classify_error("TypeError: unsupported operand") == "data_context" def test_generic_text_is_other(self): assert classify_error("Something went wrong") == "other" @@ -87,6 +107,55 @@ class TestErrorClassifier: def test_empty_string_is_other(self): assert classify_error("") == "other" + # =========================================================================== + # Additional tests for improved error classifier + # =========================================================================== + + def test_attributeerror_dataframe(self): + assert classify_error("AttributeError: 'DataFrame' object has no attribute 'xxx'") == "data_context" + + def test_attributeerror_series(self): + assert classify_error("AttributeError: 'Series' object has no attribute 'xxx'") == "data_context" + + def test_pd_emptydataerror(self): + assert classify_error("pd.errors.EmptyDataError: No data") == "data_context" + + def test_pd_parsererror(self): + assert classify_error("pd.errors.ParserError: Error tokenizing data") == "data_context" + + def test_pd_mergeerror(self): + assert classify_error("MergeError: No common columns to merge") == "data_context" + + def test_typeerror_unsupported_operand(self): + assert classify_error("TypeError: unsupported operand type(s) for +: 'int' and 'str'") == "data_context" + + def test_typeerror_expected_string(self): + assert classify_error("TypeError: expected string or bytes-like object") == "data_context" + + def test_unboundlocalerror(self): + assert classify_error("UnboundLocalError: local variable 'df' referenced before assignment") == "data_context" + + def test_syntaxerror(self): + assert classify_error("SyntaxError: invalid syntax") == "data_context" + + def test_modulenotfounderror(self): + assert classify_error("ModuleNotFoundError: No module named 'pandas'") == "data_context" + + def test_importerror(self): + assert classify_error("ImportError: cannot import name 'xxx' from 'pandas'") == "data_context" + + def test_valueerror_shape(self): + assert classify_error("ValueError: shape mismatch") == "data_context" + + def test_valueerror_axis(self): + assert classify_error("ValueError: axis out of bounds") == "data_context" + + def test_nameerror_series(self): + assert classify_error("NameError: name 'series' is not defined") == "data_context" + + def test_no_data_message(self): + assert classify_error("No data available for analysis") == "data_context" + # =========================================================================== # Task 12.1 continued: Unit tests for column extraction and lookup @@ -189,8 +258,7 @@ class TestTemplateSystem: for name in TEMPLATE_REGISTRY: t = get_template(name) assert t.name # has a display name - steps = t.build_steps() - assert len(steps) > 0 + assert len(t.steps) > 0 # template has steps def test_get_invalid_template_raises(self): with pytest.raises(ValueError):