Files
iov_data_analysis_agent/tests/test_properties.py

286 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
Property-based tests for agent-robustness-optimization features.
Uses hypothesis with reduced examples (max_examples=20) for fast execution.
Run: python -m pytest tests/test_properties.py -v
"""
import os
import sys
import re
# Ensure project root is on path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pytest
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from utils.data_privacy import (
_extract_column_from_error,
_lookup_column_in_profile,
generate_enriched_hint,
)
from utils.analysis_templates import get_template, list_templates, TEMPLATE_REGISTRY
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
DATA_CONTEXT_PATTERNS = [
r"KeyError:\s*['\"](.+?)['\"]",
r"ValueError.*(?:column|col|field)",
r"NameError.*(?:df|data|frame)",
r"(?:empty|no\s+data|0\s+rows)",
r"IndexError.*(?:out of range|out of bounds)",
]
def classify_error(error_message: str) -> str:
"""Mirror of DataAnalysisAgent._classify_error for testing without IPython."""
for pattern in DATA_CONTEXT_PATTERNS:
if re.search(pattern, error_message, re.IGNORECASE):
return "data_context"
return "other"
SAMPLE_SAFE_PROFILE = """# 数据结构概览 (Schema Profile)
## 文件: test.csv
- **维度**: 100 行 x 3 列
- **列名**: `车型, 模块, 问题类型`
### 列结构:
| 列名 | 数据类型 | 空值率 | 唯一值数 | 特征描述 |
|------|---------|--------|---------|----------|
| 车型 | object | 0.0% | 5 | 低基数分类5类 |
| 模块 | object | 2.0% | 12 | 中基数分类12类 |
| 问题类型 | object | 0.0% | 8 | 低基数分类8类 |
"""
# ===========================================================================
# Property 1: Error Classification Correctness (Task 11.1)
# ===========================================================================
# Strategy: generate error messages that contain data-context patterns
data_context_error_st = st.one_of(
st.from_regex(r"KeyError: '[a-zA-Z_]+'" , fullmatch=True),
st.from_regex(r'KeyError: "[a-zA-Z_]+"', fullmatch=True),
st.just("ValueError: column 'x' not found"),
st.just("NameError: name 'df' is not defined"),
st.just("empty DataFrame"),
st.just("0 rows returned"),
st.just("IndexError: index 5 is out of range"),
)
non_data_error_st = st.one_of(
st.just("SyntaxError: invalid syntax"),
st.just("TypeError: unsupported operand"),
st.just("ZeroDivisionError: division by zero"),
st.just("ImportError: No module named 'foo'"),
st.text(min_size=1, max_size=50).filter(
lambda s: not any(re.search(p, s, re.IGNORECASE) for p in DATA_CONTEXT_PATTERNS)
),
)
@settings(max_examples=20)
@given(err=data_context_error_st)
def test_prop1_data_context_errors_classified(err):
"""Data-context error messages must be classified as 'data_context'."""
assert classify_error(err) == "data_context"
@settings(max_examples=20)
@given(err=non_data_error_st)
def test_prop1_non_data_errors_classified(err):
"""Non-data error messages must be classified as 'other'."""
assert classify_error(err) == "other"
# ===========================================================================
# Property 3: Enriched Hint Contains Column Metadata Without Real Data (11.2)
# ===========================================================================
known_columns = ["车型", "模块", "问题类型"]
column_st = st.sampled_from(known_columns)
@settings(max_examples=20)
@given(col=column_st)
def test_prop3_enriched_hint_contains_column_meta(col):
"""Enriched hint for a known column must contain its metadata."""
error_msg = f"KeyError: '{col}'"
hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE)
assert col in hint
assert "数据类型" in hint
assert "唯一值数量" in hint
assert "空值率" in hint
assert "特征描述" in hint
@settings(max_examples=20)
@given(col=column_st)
def test_prop3_enriched_hint_no_real_data(col):
"""Enriched hint must NOT contain real data values (min/max/mean/sample rows)."""
error_msg = f"KeyError: '{col}'"
hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE)
# Should not contain statistical values or sample data
for forbidden in ["Min=", "Max=", "Mean=", "TOP 5 高频值"]:
assert forbidden not in hint
# ===========================================================================
# Property 4: Env Var Config Override (Task 11.3)
# ===========================================================================
@settings(max_examples=10)
@given(val=st.integers(min_value=1, max_value=100))
def test_prop4_env_override_max_data_context_retries(val):
"""APP_MAX_DATA_CONTEXT_RETRIES env var must override config."""
from config.app_config import AppConfig
os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] = str(val)
try:
config = AppConfig.from_env()
assert config.max_data_context_retries == val
finally:
del os.environ["APP_MAX_DATA_CONTEXT_RETRIES"]
# ===========================================================================
# Property 5: Sliding Window Trimming Invariants (Task 11.4)
# ===========================================================================
def make_history(n_pairs: int, first_msg: str = "initial requirement"):
"""Build a fake conversation history with n_pairs of user+assistant messages."""
history = [{"role": "user", "content": first_msg}]
for i in range(n_pairs):
history.append({"role": "assistant", "content": f'action: "generate_code"\ncode: | print({i})'})
history.append({"role": "user", "content": f"代码执行反馈:\n成功 round {i}"})
return history
@settings(max_examples=20)
@given(
n_pairs=st.integers(min_value=1, max_value=30),
window=st.integers(min_value=1, max_value=10),
)
def test_prop5_trimming_preserves_first_message(n_pairs, window):
"""After trimming, the first user message is always at index 0."""
history = make_history(n_pairs, first_msg="ORIGINAL_REQ")
max_messages = window * 2
if len(history) <= max_messages:
return # no trimming needed, invariant trivially holds
first_message = history[0]
start_idx = 1
has_summary = (
len(history) > 1
and history[1]["role"] == "user"
and history[1]["content"].startswith("[分析摘要]")
)
if has_summary:
start_idx = 2
messages_to_consider = history[start_idx:]
messages_to_trim = messages_to_consider[:-max_messages]
messages_to_keep = messages_to_consider[-max_messages:]
if not messages_to_trim:
return
new_history = [first_message]
new_history.append({"role": "user", "content": "[分析摘要] summary"})
new_history.extend(messages_to_keep)
assert new_history[0]["content"] == "ORIGINAL_REQ"
assert len(new_history) <= max_messages + 2 # first + summary + window
# ===========================================================================
# Property 6: Trimming Summary Content (Task 11.5)
# ===========================================================================
@settings(max_examples=20)
@given(n_pairs=st.integers(min_value=2, max_value=15))
def test_prop6_summary_excludes_code_blocks(n_pairs):
"""Compressed summary must not contain code blocks or raw output."""
history = make_history(n_pairs)
# Simulate _compress_trimmed_messages logic
summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"]
round_num = 0
for msg in history[1:]: # skip first
content = msg["content"]
if msg["role"] == "assistant":
round_num += 1
action = "generate_code"
if "collect_figures" in content:
action = "collect_figures"
summary_parts.append(f"- 轮次{round_num}: 动作={action}")
elif msg["role"] == "user" and "代码执行反馈" in content:
success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功"
if summary_parts and summary_parts[-1].startswith("- 轮次"):
summary_parts[-1] += f", 执行结果={success}"
summary = "\n".join(summary_parts)
assert "```" not in summary
assert "print(" not in summary
assert "[分析摘要]" in summary
# ===========================================================================
# Property 7: Template Prompt Integration (Task 11.6)
# ===========================================================================
valid_template_names = list(TEMPLATE_REGISTRY.keys())
@settings(max_examples=len(valid_template_names))
@given(name=st.sampled_from(valid_template_names))
def test_prop7_template_prompt_prepended(name):
"""For any valid template, get_full_prompt() output must be non-empty."""
template = get_template(name)
prompt = template.get_full_prompt()
assert len(prompt) > 0
assert template.display_name in prompt
# ===========================================================================
# Property 8: Invalid Template Name Raises Error (Task 11.7)
# ===========================================================================
@settings(max_examples=20)
@given(name=st.text(min_size=1, max_size=30).filter(lambda s: s not in TEMPLATE_REGISTRY))
def test_prop8_invalid_template_raises_error(name):
"""Invalid template names must raise ValueError listing available templates."""
with pytest.raises(ValueError) as exc_info:
get_template(name)
error_msg = str(exc_info.value)
# Must list available template names
for valid_name in TEMPLATE_REGISTRY:
assert valid_name in error_msg
# ===========================================================================
# Property 11: Parallel Profile Merge With Error Resilience (Task 11.8)
# ===========================================================================
def test_prop11_parallel_profile_error_resilience():
"""Parallel profiling with mix of valid/invalid files includes all entries."""
from utils.data_privacy import build_safe_profile, build_local_profile
valid_file = "uploads/data_simple_200.csv"
invalid_file = "/nonexistent/fake_file.csv"
# Test build_safe_profile handles missing files gracefully
safe = build_safe_profile([valid_file, invalid_file])
assert "fake_file.csv" in safe # error entry present
if os.path.exists(valid_file):
assert "data_simple_200.csv" in safe # valid entry present