# -*- coding: utf-8 -*- """ Property-based tests for agent-robustness-optimization features. Uses hypothesis with reduced examples (max_examples=20) for fast execution. Run: python -m pytest tests/test_properties.py -v """ import os import sys import re # Ensure project root is on path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import pytest from hypothesis import given, settings, assume from hypothesis import strategies as st from utils.data_privacy import ( _extract_column_from_error, _lookup_column_in_profile, generate_enriched_hint, ) from utils.analysis_templates import get_template, list_templates, TEMPLATE_REGISTRY # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- DATA_CONTEXT_PATTERNS = [ r"KeyError:\s*['\"](.+?)['\"]", r"ValueError.*(?:column|col|field)", r"NameError.*(?:df|data|frame)", r"(?:empty|no\s+data|0\s+rows)", r"IndexError.*(?:out of range|out of bounds)", ] def classify_error(error_message: str) -> str: """Mirror of DataAnalysisAgent._classify_error for testing without IPython.""" for pattern in DATA_CONTEXT_PATTERNS: if re.search(pattern, error_message, re.IGNORECASE): return "data_context" return "other" SAMPLE_SAFE_PROFILE = """# 数据结构概览 (Schema Profile) ## 文件: test.csv - **维度**: 100 行 x 3 列 - **列名**: `车型, 模块, 问题类型` ### 列结构: | 列名 | 数据类型 | 空值率 | 唯一值数 | 特征描述 | |------|---------|--------|---------|----------| | 车型 | object | 0.0% | 5 | 低基数分类(5类) | | 模块 | object | 2.0% | 12 | 中基数分类(12类) | | 问题类型 | object | 0.0% | 8 | 低基数分类(8类) | """ # =========================================================================== # Property 1: Error Classification Correctness (Task 11.1) # =========================================================================== # Strategy: generate error messages that contain data-context patterns data_context_error_st = st.one_of( st.from_regex(r"KeyError: '[a-zA-Z_]+'" , fullmatch=True), st.from_regex(r'KeyError: "[a-zA-Z_]+"', fullmatch=True), st.just("ValueError: column 'x' not found"), st.just("NameError: name 'df' is not defined"), st.just("empty DataFrame"), st.just("0 rows returned"), st.just("IndexError: index 5 is out of range"), ) non_data_error_st = st.one_of( st.just("SyntaxError: invalid syntax"), st.just("TypeError: unsupported operand"), st.just("ZeroDivisionError: division by zero"), st.just("ImportError: No module named 'foo'"), st.text(min_size=1, max_size=50).filter( lambda s: not any(re.search(p, s, re.IGNORECASE) for p in DATA_CONTEXT_PATTERNS) ), ) @settings(max_examples=20) @given(err=data_context_error_st) def test_prop1_data_context_errors_classified(err): """Data-context error messages must be classified as 'data_context'.""" assert classify_error(err) == "data_context" @settings(max_examples=20) @given(err=non_data_error_st) def test_prop1_non_data_errors_classified(err): """Non-data error messages must be classified as 'other'.""" assert classify_error(err) == "other" # =========================================================================== # Property 3: Enriched Hint Contains Column Metadata Without Real Data (11.2) # =========================================================================== known_columns = ["车型", "模块", "问题类型"] column_st = st.sampled_from(known_columns) @settings(max_examples=20) @given(col=column_st) def test_prop3_enriched_hint_contains_column_meta(col): """Enriched hint for a known column must contain its metadata.""" error_msg = f"KeyError: '{col}'" hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE) assert col in hint assert "数据类型" in hint assert "唯一值数量" in hint assert "空值率" in hint assert "特征描述" in hint @settings(max_examples=20) @given(col=column_st) def test_prop3_enriched_hint_no_real_data(col): """Enriched hint must NOT contain real data values (min/max/mean/sample rows).""" error_msg = f"KeyError: '{col}'" hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE) # Should not contain statistical values or sample data for forbidden in ["Min=", "Max=", "Mean=", "TOP 5 高频值"]: assert forbidden not in hint # =========================================================================== # Property 4: Env Var Config Override (Task 11.3) # =========================================================================== @settings(max_examples=10) @given(val=st.integers(min_value=1, max_value=100)) def test_prop4_env_override_max_data_context_retries(val): """APP_MAX_DATA_CONTEXT_RETRIES env var must override config.""" from config.app_config import AppConfig os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] = str(val) try: config = AppConfig.from_env() assert config.max_data_context_retries == val finally: del os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] # =========================================================================== # Property 5: Sliding Window Trimming Invariants (Task 11.4) # =========================================================================== def make_history(n_pairs: int, first_msg: str = "initial requirement"): """Build a fake conversation history with n_pairs of user+assistant messages.""" history = [{"role": "user", "content": first_msg}] for i in range(n_pairs): history.append({"role": "assistant", "content": f'action: "generate_code"\ncode: | print({i})'}) history.append({"role": "user", "content": f"代码执行反馈:\n成功 round {i}"}) return history @settings(max_examples=20) @given( n_pairs=st.integers(min_value=1, max_value=30), window=st.integers(min_value=1, max_value=10), ) def test_prop5_trimming_preserves_first_message(n_pairs, window): """After trimming, the first user message is always at index 0.""" history = make_history(n_pairs, first_msg="ORIGINAL_REQ") max_messages = window * 2 if len(history) <= max_messages: return # no trimming needed, invariant trivially holds first_message = history[0] start_idx = 1 has_summary = ( len(history) > 1 and history[1]["role"] == "user" and history[1]["content"].startswith("[分析摘要]") ) if has_summary: start_idx = 2 messages_to_consider = history[start_idx:] messages_to_trim = messages_to_consider[:-max_messages] messages_to_keep = messages_to_consider[-max_messages:] if not messages_to_trim: return new_history = [first_message] new_history.append({"role": "user", "content": "[分析摘要] summary"}) new_history.extend(messages_to_keep) assert new_history[0]["content"] == "ORIGINAL_REQ" assert len(new_history) <= max_messages + 2 # first + summary + window # =========================================================================== # Property 6: Trimming Summary Content (Task 11.5) # =========================================================================== @settings(max_examples=20) @given(n_pairs=st.integers(min_value=2, max_value=15)) def test_prop6_summary_excludes_code_blocks(n_pairs): """Compressed summary must not contain code blocks or raw output.""" history = make_history(n_pairs) # Simulate _compress_trimmed_messages logic summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"] round_num = 0 for msg in history[1:]: # skip first content = msg["content"] if msg["role"] == "assistant": round_num += 1 action = "generate_code" if "collect_figures" in content: action = "collect_figures" summary_parts.append(f"- 轮次{round_num}: 动作={action}") elif msg["role"] == "user" and "代码执行反馈" in content: success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功" if summary_parts and summary_parts[-1].startswith("- 轮次"): summary_parts[-1] += f", 执行结果={success}" summary = "\n".join(summary_parts) assert "```" not in summary assert "print(" not in summary assert "[分析摘要]" in summary # =========================================================================== # Property 7: Template Prompt Integration (Task 11.6) # =========================================================================== valid_template_names = list(TEMPLATE_REGISTRY.keys()) @settings(max_examples=len(valid_template_names)) @given(name=st.sampled_from(valid_template_names)) def test_prop7_template_prompt_prepended(name): """For any valid template, get_full_prompt() output must be non-empty.""" template = get_template(name) prompt = template.get_full_prompt() assert len(prompt) > 0 assert template.display_name in prompt # =========================================================================== # Property 8: Invalid Template Name Raises Error (Task 11.7) # =========================================================================== @settings(max_examples=20) @given(name=st.text(min_size=1, max_size=30).filter(lambda s: s not in TEMPLATE_REGISTRY)) def test_prop8_invalid_template_raises_error(name): """Invalid template names must raise ValueError listing available templates.""" with pytest.raises(ValueError) as exc_info: get_template(name) error_msg = str(exc_info.value) # Must list available template names for valid_name in TEMPLATE_REGISTRY: assert valid_name in error_msg # =========================================================================== # Property 11: Parallel Profile Merge With Error Resilience (Task 11.8) # =========================================================================== def test_prop11_parallel_profile_error_resilience(): """Parallel profiling with mix of valid/invalid files includes all entries.""" from utils.data_privacy import build_safe_profile, build_local_profile valid_file = "uploads/data_simple_200.csv" invalid_file = "/nonexistent/fake_file.csv" # Test build_safe_profile handles missing files gracefully safe = build_safe_profile([valid_file, invalid_file]) assert "fake_file.csv" in safe # error entry present if os.path.exists(valid_file): assert "data_simple_200.csv" in safe # valid entry present