大更新，架构调整，数据分析能力提升，

2026-04-19 21:30:08 +08:00
parent 9d01f004d4
commit 00bd48e7e7
26 changed files with 4375 additions and 252 deletions
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -0,0 +1,285 @@
+# -*- coding: utf-8 -*-
+"""
+Property-based tests for agent-robustness-optimization features.
+Uses hypothesis with reduced examples (max_examples=20) for fast execution.
+
+Run: python -m pytest tests/test_properties.py -v
+"""
+
+import os
+import sys
+import re
+
+# Ensure project root is on path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pytest
+from hypothesis import given, settings, assume
+from hypothesis import strategies as st
+
+from utils.data_privacy import (
+    _extract_column_from_error,
+    _lookup_column_in_profile,
+    generate_enriched_hint,
+)
+from utils.analysis_templates import get_template, list_templates, TEMPLATE_REGISTRY
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+DATA_CONTEXT_PATTERNS = [
+    r"KeyError:\s*['\"](.+?)['\"]",
+    r"ValueError.*(?:column|col|field)",
+    r"NameError.*(?:df|data|frame)",
+    r"(?:empty|no\s+data|0\s+rows)",
+    r"IndexError.*(?:out of range|out of bounds)",
+]
+
+
+def classify_error(error_message: str) -> str:
+    """Mirror of DataAnalysisAgent._classify_error for testing without IPython."""
+    for pattern in DATA_CONTEXT_PATTERNS:
+        if re.search(pattern, error_message, re.IGNORECASE):
+            return "data_context"
+    return "other"
+
+
+SAMPLE_SAFE_PROFILE = """# 数据结构概览 (Schema Profile)
+
+## 文件: test.csv
+
+- **维度**: 100 行 x 3 列
+- **列名**: `车型, 模块, 问题类型`
+
+### 列结构:
+
+| 列名 | 数据类型 | 空值率 | 唯一值数 | 特征描述 |
+|------|---------|--------|---------|----------|
+| 车型 | object | 0.0% | 5 | 低基数分类（5类） |
+| 模块 | object | 2.0% | 12 | 中基数分类（12类） |
+| 问题类型 | object | 0.0% | 8 | 低基数分类（8类） |
+"""
+
+
+# ===========================================================================
+# Property 1: Error Classification Correctness (Task 11.1)
+# ===========================================================================
+
+# Strategy: generate error messages that contain data-context patterns
+data_context_error_st = st.one_of(
+    st.from_regex(r"KeyError: '[a-zA-Z_]+'" , fullmatch=True),
+    st.from_regex(r'KeyError: "[a-zA-Z_]+"', fullmatch=True),
+    st.just("ValueError: column 'x' not found"),
+    st.just("NameError: name 'df' is not defined"),
+    st.just("empty DataFrame"),
+    st.just("0 rows returned"),
+    st.just("IndexError: index 5 is out of range"),
+)
+
+non_data_error_st = st.one_of(
+    st.just("SyntaxError: invalid syntax"),
+    st.just("TypeError: unsupported operand"),
+    st.just("ZeroDivisionError: division by zero"),
+    st.just("ImportError: No module named 'foo'"),
+    st.text(min_size=1, max_size=50).filter(
+        lambda s: not any(re.search(p, s, re.IGNORECASE) for p in DATA_CONTEXT_PATTERNS)
+    ),
+)
+
+
+@settings(max_examples=20)
+@given(err=data_context_error_st)
+def test_prop1_data_context_errors_classified(err):
+    """Data-context error messages must be classified as 'data_context'."""
+    assert classify_error(err) == "data_context"
+
+
+@settings(max_examples=20)
+@given(err=non_data_error_st)
+def test_prop1_non_data_errors_classified(err):
+    """Non-data error messages must be classified as 'other'."""
+    assert classify_error(err) == "other"
+
+
+# ===========================================================================
+# Property 3: Enriched Hint Contains Column Metadata Without Real Data (11.2)
+# ===========================================================================
+
+known_columns = ["车型", "模块", "问题类型"]
+column_st = st.sampled_from(known_columns)
+
+
+@settings(max_examples=20)
+@given(col=column_st)
+def test_prop3_enriched_hint_contains_column_meta(col):
+    """Enriched hint for a known column must contain its metadata."""
+    error_msg = f"KeyError: '{col}'"
+    hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE)
+    assert col in hint
+    assert "数据类型" in hint
+    assert "唯一值数量" in hint
+    assert "空值率" in hint
+    assert "特征描述" in hint
+
+
+@settings(max_examples=20)
+@given(col=column_st)
+def test_prop3_enriched_hint_no_real_data(col):
+    """Enriched hint must NOT contain real data values (min/max/mean/sample rows)."""
+    error_msg = f"KeyError: '{col}'"
+    hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE)
+    # Should not contain statistical values or sample data
+    for forbidden in ["Min=", "Max=", "Mean=", "TOP 5 高频值"]:
+        assert forbidden not in hint
+
+
+# ===========================================================================
+# Property 4: Env Var Config Override (Task 11.3)
+# ===========================================================================
+
+@settings(max_examples=10)
+@given(val=st.integers(min_value=1, max_value=100))
+def test_prop4_env_override_max_data_context_retries(val):
+    """APP_MAX_DATA_CONTEXT_RETRIES env var must override config."""
+    from config.app_config import AppConfig
+    os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] = str(val)
+    try:
+        config = AppConfig.from_env()
+        assert config.max_data_context_retries == val
+    finally:
+        del os.environ["APP_MAX_DATA_CONTEXT_RETRIES"]
+
+
+# ===========================================================================
+# Property 5: Sliding Window Trimming Invariants (Task 11.4)
+# ===========================================================================
+
+def make_history(n_pairs: int, first_msg: str = "initial requirement"):
+    """Build a fake conversation history with n_pairs of user+assistant messages."""
+    history = [{"role": "user", "content": first_msg}]
+    for i in range(n_pairs):
+        history.append({"role": "assistant", "content": f'action: "generate_code"\ncode: |  print({i})'})
+        history.append({"role": "user", "content": f"代码执行反馈:\n成功 round {i}"})
+    return history
+
+
+@settings(max_examples=20)
+@given(
+    n_pairs=st.integers(min_value=1, max_value=30),
+    window=st.integers(min_value=1, max_value=10),
+)
+def test_prop5_trimming_preserves_first_message(n_pairs, window):
+    """After trimming, the first user message is always at index 0."""
+    history = make_history(n_pairs, first_msg="ORIGINAL_REQ")
+    max_messages = window * 2
+
+    if len(history) <= max_messages:
+        return  # no trimming needed, invariant trivially holds
+
+    first_message = history[0]
+    start_idx = 1
+    has_summary = (
+        len(history) > 1
+        and history[1]["role"] == "user"
+        and history[1]["content"].startswith("[分析摘要]")
+    )
+    if has_summary:
+        start_idx = 2
+
+    messages_to_consider = history[start_idx:]
+    messages_to_trim = messages_to_consider[:-max_messages]
+    messages_to_keep = messages_to_consider[-max_messages:]
+
+    if not messages_to_trim:
+        return
+
+    new_history = [first_message]
+    new_history.append({"role": "user", "content": "[分析摘要] summary"})
+    new_history.extend(messages_to_keep)
+
+    assert new_history[0]["content"] == "ORIGINAL_REQ"
+    assert len(new_history) <= max_messages + 2  # first + summary + window
+
+
+# ===========================================================================
+# Property 6: Trimming Summary Content (Task 11.5)
+# ===========================================================================
+
+@settings(max_examples=20)
+@given(n_pairs=st.integers(min_value=2, max_value=15))
+def test_prop6_summary_excludes_code_blocks(n_pairs):
+    """Compressed summary must not contain code blocks or raw output."""
+    history = make_history(n_pairs)
+    # Simulate _compress_trimmed_messages logic
+    summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"]
+    round_num = 0
+    for msg in history[1:]:  # skip first
+        content = msg["content"]
+        if msg["role"] == "assistant":
+            round_num += 1
+            action = "generate_code"
+            if "collect_figures" in content:
+                action = "collect_figures"
+            summary_parts.append(f"- 轮次{round_num}: 动作={action}")
+        elif msg["role"] == "user" and "代码执行反馈" in content:
+            success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功"
+            if summary_parts and summary_parts[-1].startswith("- 轮次"):
+                summary_parts[-1] += f", 执行结果={success}"
+
+    summary = "\n".join(summary_parts)
+    assert "```" not in summary
+    assert "print(" not in summary
+    assert "[分析摘要]" in summary
+
+
+# ===========================================================================
+# Property 7: Template Prompt Integration (Task 11.6)
+# ===========================================================================
+
+valid_template_names = list(TEMPLATE_REGISTRY.keys())
+
+
+@settings(max_examples=len(valid_template_names))
+@given(name=st.sampled_from(valid_template_names))
+def test_prop7_template_prompt_prepended(name):
+    """For any valid template, get_full_prompt() output must be non-empty."""
+    template = get_template(name)
+    prompt = template.get_full_prompt()
+    assert len(prompt) > 0
+    assert template.name in prompt
+
+
+# ===========================================================================
+# Property 8: Invalid Template Name Raises Error (Task 11.7)
+# ===========================================================================
+
+@settings(max_examples=20)
+@given(name=st.text(min_size=1, max_size=30).filter(lambda s: s not in TEMPLATE_REGISTRY))
+def test_prop8_invalid_template_raises_error(name):
+    """Invalid template names must raise ValueError listing available templates."""
+    with pytest.raises(ValueError) as exc_info:
+        get_template(name)
+    error_msg = str(exc_info.value)
+    # Must list available template names
+    for valid_name in TEMPLATE_REGISTRY:
+        assert valid_name in error_msg
+
+
+# ===========================================================================
+# Property 11: Parallel Profile Merge With Error Resilience (Task 11.8)
+# ===========================================================================
+
+def test_prop11_parallel_profile_error_resilience():
+    """Parallel profiling with mix of valid/invalid files includes all entries."""
+    from utils.data_privacy import build_safe_profile, build_local_profile
+
+    valid_file = "uploads/data_simple_200.csv"
+    invalid_file = "/nonexistent/fake_file.csv"
+
+    # Test build_safe_profile handles missing files gracefully
+    safe = build_safe_profile([valid_file, invalid_file])
+    assert "fake_file.csv" in safe  # error entry present
+    if os.path.exists(valid_file):
+        assert "data_simple_200.csv" in safe  # valid entry present