2026-04-19 21:30:08 +08:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
"""
|
|
|
|
|
|
Property-based tests for agent-robustness-optimization features.
|
|
|
|
|
|
Uses hypothesis with reduced examples (max_examples=20) for fast execution.
|
|
|
|
|
|
|
|
|
|
|
|
Run: python -m pytest tests/test_properties.py -v
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
# Ensure project root is on path
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
from hypothesis import given, settings, assume
|
|
|
|
|
|
from hypothesis import strategies as st
|
|
|
|
|
|
|
|
|
|
|
|
from utils.data_privacy import (
|
|
|
|
|
|
_extract_column_from_error,
|
|
|
|
|
|
_lookup_column_in_profile,
|
|
|
|
|
|
generate_enriched_hint,
|
|
|
|
|
|
)
|
|
|
|
|
|
from utils.analysis_templates import get_template, list_templates, TEMPLATE_REGISTRY
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Helpers
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
DATA_CONTEXT_PATTERNS = [
|
|
|
|
|
|
r"KeyError:\s*['\"](.+?)['\"]",
|
|
|
|
|
|
r"ValueError.*(?:column|col|field)",
|
|
|
|
|
|
r"NameError.*(?:df|data|frame)",
|
|
|
|
|
|
r"(?:empty|no\s+data|0\s+rows)",
|
|
|
|
|
|
r"IndexError.*(?:out of range|out of bounds)",
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def classify_error(error_message: str) -> str:
|
|
|
|
|
|
"""Mirror of DataAnalysisAgent._classify_error for testing without IPython."""
|
|
|
|
|
|
for pattern in DATA_CONTEXT_PATTERNS:
|
|
|
|
|
|
if re.search(pattern, error_message, re.IGNORECASE):
|
|
|
|
|
|
return "data_context"
|
|
|
|
|
|
return "other"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SAMPLE_SAFE_PROFILE = """# 数据结构概览 (Schema Profile)
|
|
|
|
|
|
|
|
|
|
|
|
## 文件: test.csv
|
|
|
|
|
|
|
|
|
|
|
|
- **维度**: 100 行 x 3 列
|
|
|
|
|
|
- **列名**: `车型, 模块, 问题类型`
|
|
|
|
|
|
|
|
|
|
|
|
### 列结构:
|
|
|
|
|
|
|
|
|
|
|
|
| 列名 | 数据类型 | 空值率 | 唯一值数 | 特征描述 |
|
|
|
|
|
|
|------|---------|--------|---------|----------|
|
|
|
|
|
|
| 车型 | object | 0.0% | 5 | 低基数分类(5类) |
|
|
|
|
|
|
| 模块 | object | 2.0% | 12 | 中基数分类(12类) |
|
|
|
|
|
|
| 问题类型 | object | 0.0% | 8 | 低基数分类(8类) |
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 1: Error Classification Correctness (Task 11.1)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
# Strategy: generate error messages that contain data-context patterns
|
|
|
|
|
|
data_context_error_st = st.one_of(
|
|
|
|
|
|
st.from_regex(r"KeyError: '[a-zA-Z_]+'" , fullmatch=True),
|
|
|
|
|
|
st.from_regex(r'KeyError: "[a-zA-Z_]+"', fullmatch=True),
|
|
|
|
|
|
st.just("ValueError: column 'x' not found"),
|
|
|
|
|
|
st.just("NameError: name 'df' is not defined"),
|
|
|
|
|
|
st.just("empty DataFrame"),
|
|
|
|
|
|
st.just("0 rows returned"),
|
|
|
|
|
|
st.just("IndexError: index 5 is out of range"),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
non_data_error_st = st.one_of(
|
|
|
|
|
|
st.just("SyntaxError: invalid syntax"),
|
|
|
|
|
|
st.just("TypeError: unsupported operand"),
|
|
|
|
|
|
st.just("ZeroDivisionError: division by zero"),
|
|
|
|
|
|
st.just("ImportError: No module named 'foo'"),
|
|
|
|
|
|
st.text(min_size=1, max_size=50).filter(
|
|
|
|
|
|
lambda s: not any(re.search(p, s, re.IGNORECASE) for p in DATA_CONTEXT_PATTERNS)
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(err=data_context_error_st)
|
|
|
|
|
|
def test_prop1_data_context_errors_classified(err):
|
|
|
|
|
|
"""Data-context error messages must be classified as 'data_context'."""
|
|
|
|
|
|
assert classify_error(err) == "data_context"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(err=non_data_error_st)
|
|
|
|
|
|
def test_prop1_non_data_errors_classified(err):
|
|
|
|
|
|
"""Non-data error messages must be classified as 'other'."""
|
|
|
|
|
|
assert classify_error(err) == "other"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 3: Enriched Hint Contains Column Metadata Without Real Data (11.2)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
known_columns = ["车型", "模块", "问题类型"]
|
|
|
|
|
|
column_st = st.sampled_from(known_columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(col=column_st)
|
|
|
|
|
|
def test_prop3_enriched_hint_contains_column_meta(col):
|
|
|
|
|
|
"""Enriched hint for a known column must contain its metadata."""
|
|
|
|
|
|
error_msg = f"KeyError: '{col}'"
|
|
|
|
|
|
hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE)
|
|
|
|
|
|
assert col in hint
|
|
|
|
|
|
assert "数据类型" in hint
|
|
|
|
|
|
assert "唯一值数量" in hint
|
|
|
|
|
|
assert "空值率" in hint
|
|
|
|
|
|
assert "特征描述" in hint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(col=column_st)
|
|
|
|
|
|
def test_prop3_enriched_hint_no_real_data(col):
|
|
|
|
|
|
"""Enriched hint must NOT contain real data values (min/max/mean/sample rows)."""
|
|
|
|
|
|
error_msg = f"KeyError: '{col}'"
|
|
|
|
|
|
hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE)
|
|
|
|
|
|
# Should not contain statistical values or sample data
|
|
|
|
|
|
for forbidden in ["Min=", "Max=", "Mean=", "TOP 5 高频值"]:
|
|
|
|
|
|
assert forbidden not in hint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 4: Env Var Config Override (Task 11.3)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=10)
|
|
|
|
|
|
@given(val=st.integers(min_value=1, max_value=100))
|
|
|
|
|
|
def test_prop4_env_override_max_data_context_retries(val):
|
|
|
|
|
|
"""APP_MAX_DATA_CONTEXT_RETRIES env var must override config."""
|
|
|
|
|
|
from config.app_config import AppConfig
|
|
|
|
|
|
os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] = str(val)
|
|
|
|
|
|
try:
|
|
|
|
|
|
config = AppConfig.from_env()
|
|
|
|
|
|
assert config.max_data_context_retries == val
|
|
|
|
|
|
finally:
|
|
|
|
|
|
del os.environ["APP_MAX_DATA_CONTEXT_RETRIES"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 5: Sliding Window Trimming Invariants (Task 11.4)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
def make_history(n_pairs: int, first_msg: str = "initial requirement"):
|
|
|
|
|
|
"""Build a fake conversation history with n_pairs of user+assistant messages."""
|
|
|
|
|
|
history = [{"role": "user", "content": first_msg}]
|
|
|
|
|
|
for i in range(n_pairs):
|
|
|
|
|
|
history.append({"role": "assistant", "content": f'action: "generate_code"\ncode: | print({i})'})
|
|
|
|
|
|
history.append({"role": "user", "content": f"代码执行反馈:\n成功 round {i}"})
|
|
|
|
|
|
return history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(
|
|
|
|
|
|
n_pairs=st.integers(min_value=1, max_value=30),
|
|
|
|
|
|
window=st.integers(min_value=1, max_value=10),
|
|
|
|
|
|
)
|
|
|
|
|
|
def test_prop5_trimming_preserves_first_message(n_pairs, window):
|
|
|
|
|
|
"""After trimming, the first user message is always at index 0."""
|
|
|
|
|
|
history = make_history(n_pairs, first_msg="ORIGINAL_REQ")
|
|
|
|
|
|
max_messages = window * 2
|
|
|
|
|
|
|
|
|
|
|
|
if len(history) <= max_messages:
|
|
|
|
|
|
return # no trimming needed, invariant trivially holds
|
|
|
|
|
|
|
|
|
|
|
|
first_message = history[0]
|
|
|
|
|
|
start_idx = 1
|
|
|
|
|
|
has_summary = (
|
|
|
|
|
|
len(history) > 1
|
|
|
|
|
|
and history[1]["role"] == "user"
|
|
|
|
|
|
and history[1]["content"].startswith("[分析摘要]")
|
|
|
|
|
|
)
|
|
|
|
|
|
if has_summary:
|
|
|
|
|
|
start_idx = 2
|
|
|
|
|
|
|
|
|
|
|
|
messages_to_consider = history[start_idx:]
|
|
|
|
|
|
messages_to_trim = messages_to_consider[:-max_messages]
|
|
|
|
|
|
messages_to_keep = messages_to_consider[-max_messages:]
|
|
|
|
|
|
|
|
|
|
|
|
if not messages_to_trim:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
new_history = [first_message]
|
|
|
|
|
|
new_history.append({"role": "user", "content": "[分析摘要] summary"})
|
|
|
|
|
|
new_history.extend(messages_to_keep)
|
|
|
|
|
|
|
|
|
|
|
|
assert new_history[0]["content"] == "ORIGINAL_REQ"
|
|
|
|
|
|
assert len(new_history) <= max_messages + 2 # first + summary + window
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 6: Trimming Summary Content (Task 11.5)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(n_pairs=st.integers(min_value=2, max_value=15))
|
|
|
|
|
|
def test_prop6_summary_excludes_code_blocks(n_pairs):
|
|
|
|
|
|
"""Compressed summary must not contain code blocks or raw output."""
|
|
|
|
|
|
history = make_history(n_pairs)
|
|
|
|
|
|
# Simulate _compress_trimmed_messages logic
|
|
|
|
|
|
summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"]
|
|
|
|
|
|
round_num = 0
|
|
|
|
|
|
for msg in history[1:]: # skip first
|
|
|
|
|
|
content = msg["content"]
|
|
|
|
|
|
if msg["role"] == "assistant":
|
|
|
|
|
|
round_num += 1
|
|
|
|
|
|
action = "generate_code"
|
|
|
|
|
|
if "collect_figures" in content:
|
|
|
|
|
|
action = "collect_figures"
|
|
|
|
|
|
summary_parts.append(f"- 轮次{round_num}: 动作={action}")
|
|
|
|
|
|
elif msg["role"] == "user" and "代码执行反馈" in content:
|
|
|
|
|
|
success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功"
|
|
|
|
|
|
if summary_parts and summary_parts[-1].startswith("- 轮次"):
|
|
|
|
|
|
summary_parts[-1] += f", 执行结果={success}"
|
|
|
|
|
|
|
|
|
|
|
|
summary = "\n".join(summary_parts)
|
|
|
|
|
|
assert "```" not in summary
|
|
|
|
|
|
assert "print(" not in summary
|
|
|
|
|
|
assert "[分析摘要]" in summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 7: Template Prompt Integration (Task 11.6)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
valid_template_names = list(TEMPLATE_REGISTRY.keys())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=len(valid_template_names))
|
|
|
|
|
|
@given(name=st.sampled_from(valid_template_names))
|
|
|
|
|
|
def test_prop7_template_prompt_prepended(name):
|
|
|
|
|
|
"""For any valid template, get_full_prompt() output must be non-empty."""
|
|
|
|
|
|
template = get_template(name)
|
|
|
|
|
|
prompt = template.get_full_prompt()
|
|
|
|
|
|
assert len(prompt) > 0
|
2026-04-20 09:50:35 +08:00
|
|
|
|
assert template.display_name in prompt
|
2026-04-19 21:30:08 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 8: Invalid Template Name Raises Error (Task 11.7)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
@settings(max_examples=20)
|
|
|
|
|
|
@given(name=st.text(min_size=1, max_size=30).filter(lambda s: s not in TEMPLATE_REGISTRY))
|
|
|
|
|
|
def test_prop8_invalid_template_raises_error(name):
|
|
|
|
|
|
"""Invalid template names must raise ValueError listing available templates."""
|
|
|
|
|
|
with pytest.raises(ValueError) as exc_info:
|
|
|
|
|
|
get_template(name)
|
|
|
|
|
|
error_msg = str(exc_info.value)
|
|
|
|
|
|
# Must list available template names
|
|
|
|
|
|
for valid_name in TEMPLATE_REGISTRY:
|
|
|
|
|
|
assert valid_name in error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
# Property 11: Parallel Profile Merge With Error Resilience (Task 11.8)
|
|
|
|
|
|
# ===========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
def test_prop11_parallel_profile_error_resilience():
|
|
|
|
|
|
"""Parallel profiling with mix of valid/invalid files includes all entries."""
|
|
|
|
|
|
from utils.data_privacy import build_safe_profile, build_local_profile
|
|
|
|
|
|
|
|
|
|
|
|
valid_file = "uploads/data_simple_200.csv"
|
|
|
|
|
|
invalid_file = "/nonexistent/fake_file.csv"
|
|
|
|
|
|
|
|
|
|
|
|
# Test build_safe_profile handles missing files gracefully
|
|
|
|
|
|
safe = build_safe_profile([valid_file, invalid_file])
|
|
|
|
|
|
assert "fake_file.csv" in safe # error entry present
|
|
|
|
|
|
if os.path.exists(valid_file):
|
|
|
|
|
|
assert "data_simple_200.csv" in safe # valid entry present
|