Complete AI Data Analysis Agent implementation with 95.7% test coverage

This commit is contained in:
2026-03-07 00:04:29 +08:00
parent 621e546b43
commit 7071b1f730
245 changed files with 22612 additions and 2211 deletions

View File

@@ -0,0 +1,523 @@
"""报告生成引擎的单元测试。"""
import pytest
import tempfile
import os
from src.engines.report_generation import (
extract_key_findings,
organize_report_structure,
generate_report,
_categorize_insight,
_calculate_importance,
_generate_report_title,
_generate_default_sections
)
from src.models.analysis_result import AnalysisResult
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
from src.models.data_profile import DataProfile, ColumnInfo
@pytest.fixture
def sample_results():
"""创建示例分析结果。"""
return [
AnalysisResult(
task_id='task1',
task_name='状态分布分析',
success=True,
data={'open': 50, 'closed': 30, 'pending': 20},
visualizations=['chart1.png'],
insights=[
'待处理工单占比50%,异常高',
'已关闭工单占比30%'
],
execution_time=2.5
),
AnalysisResult(
task_id='task2',
task_name='趋势分析',
success=True,
data={'trend': 'increasing'},
visualizations=['chart2.png'],
insights=[
'工单数量呈上升趋势',
'增长率为15%'
],
execution_time=3.2
),
AnalysisResult(
task_id='task3',
task_name='类型分析',
success=False,
data={},
visualizations=[],
insights=[],
error='数据缺少类型字段',
execution_time=0.1
)
]
@pytest.fixture
def sample_requirement():
"""创建示例需求规格。"""
return RequirementSpec(
user_input='分析工单健康度',
objectives=[
AnalysisObjective(
name='健康度分析',
description='评估工单处理的健康状况',
metrics=['关闭率', '处理时长', '积压情况'],
priority=5
)
]
)
@pytest.fixture
def sample_data_profile():
"""创建示例数据画像。"""
return DataProfile(
file_path='test.csv',
row_count=1000,
column_count=5,
columns=[
ColumnInfo(
name='status',
dtype='categorical',
missing_rate=0.0,
unique_count=3,
sample_values=['open', 'closed', 'pending']
),
ColumnInfo(
name='created_at',
dtype='datetime',
missing_rate=0.0,
unique_count=1000
)
],
inferred_type='ticket',
key_fields={'status': '状态', 'created_at': '创建时间'},
quality_score=85.0,
summary='工单数据包含1000条记录'
)
class TestExtractKeyFindings:
"""测试关键发现提炼。"""
def test_basic_functionality(self, sample_results):
"""测试基本功能。"""
key_findings = extract_key_findings(sample_results)
# 验证:返回列表
assert isinstance(key_findings, list)
# 验证:只包含成功的结果
assert len(key_findings) == 4 # 2个任务每个2个洞察
# 验证:每个发现都有必需的字段
for finding in key_findings:
assert 'finding' in finding
assert 'importance' in finding
assert 'source_task' in finding
assert 'category' in finding
def test_importance_sorting(self, sample_results):
"""测试按重要性排序。"""
key_findings = extract_key_findings(sample_results)
# 验证:按重要性降序排列
for i in range(len(key_findings) - 1):
assert key_findings[i]['importance'] >= key_findings[i + 1]['importance']
def test_empty_results(self):
"""测试空结果列表。"""
key_findings = extract_key_findings([])
assert isinstance(key_findings, list)
assert len(key_findings) == 0
def test_only_failed_results(self):
"""测试只有失败的结果。"""
results = [
AnalysisResult(
task_id='task1',
task_name='失败任务',
success=False,
error='测试错误'
)
]
key_findings = extract_key_findings(results)
# 失败的任务不应该产生发现
assert len(key_findings) == 0
class TestCategorizeInsight:
"""测试洞察分类。"""
def test_anomaly_detection(self):
"""测试异常检测。"""
insight = '待处理工单占比50%,异常高'
category = _categorize_insight(insight)
assert category == 'anomaly'
def test_trend_detection(self):
"""测试趋势检测。"""
insight = '工单数量呈上升趋势'
category = _categorize_insight(insight)
assert category == 'trend'
def test_general_insight(self):
"""测试一般洞察。"""
insight = '数据质量良好'
category = _categorize_insight(insight)
assert category == 'insight'
def test_english_keywords(self):
"""测试英文关键词。"""
assert _categorize_insight('This is an anomaly') == 'anomaly'
assert _categorize_insight('Showing growth trend') == 'trend'
class TestCalculateImportance:
"""测试重要性计算。"""
def test_anomaly_importance(self):
"""测试异常的重要性。"""
insight = '严重异常:系统故障'
importance = _calculate_importance(insight, {})
# 异常 + 严重 = 高重要性
assert importance >= 4
def test_percentage_importance(self):
"""测试包含百分比的重要性。"""
insight = '占比达到80%'
importance = _calculate_importance(insight, {})
# 包含百分比 = 较高重要性
assert importance >= 4
def test_normal_importance(self):
"""测试普通洞察的重要性。"""
insight = '数据正常'
importance = _calculate_importance(insight, {})
# 默认中等重要性
assert importance == 3
def test_importance_range(self):
"""测试重要性范围。"""
# 测试多个洞察确保重要性在1-5范围内
insights = [
'严重异常问题',
'占比80%',
'正常数据',
'轻微变化'
]
for insight in insights:
importance = _calculate_importance(insight, {})
assert 1 <= importance <= 5
class TestOrganizeReportStructure:
"""测试报告结构组织。"""
def test_basic_structure(self, sample_results, sample_requirement, sample_data_profile):
"""测试基本结构。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
# 验证:包含必需的字段
assert 'title' in structure
assert 'sections' in structure
assert 'executive_summary' in structure
assert 'detailed_analysis' in structure
assert 'conclusions' in structure
def test_with_template(self, sample_results, sample_data_profile):
"""测试使用模板的结构。"""
# 创建带模板的需求
requirement = RequirementSpec(
user_input='按模板分析',
objectives=[
AnalysisObjective(
name='分析',
description='按模板分析',
metrics=['指标1'],
priority=5
)
],
template_path='template.md',
template_requirements={
'sections': ['第一章', '第二章', '第三章'],
'required_metrics': ['指标1', '指标2'],
'required_charts': ['图表1']
}
)
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, requirement, sample_data_profile)
# 验证:使用模板结构
assert structure['use_template'] is True
assert structure['sections'] == ['第一章', '第二章', '第三章']
def test_without_template(self, sample_results, sample_requirement, sample_data_profile):
"""测试不使用模板的结构。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
# 验证:生成默认结构
assert structure['use_template'] is False
assert len(structure['sections']) > 0
assert '执行摘要' in structure['sections']
def test_executive_summary(self, sample_results, sample_requirement, sample_data_profile):
"""测试执行摘要组织。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
exec_summary = structure['executive_summary']
# 验证:包含关键发现
assert 'key_findings' in exec_summary
assert isinstance(exec_summary['key_findings'], list)
# 验证:包含统计信息
assert 'anomaly_count' in exec_summary
assert 'trend_count' in exec_summary
def test_detailed_analysis(self, sample_results, sample_requirement, sample_data_profile):
"""测试详细分析组织。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
detailed = structure['detailed_analysis']
# 验证:包含分类
assert 'anomaly' in detailed
assert 'trend' in detailed
assert 'insight' in detailed
# 验证:每个分类都是列表
assert isinstance(detailed['anomaly'], list)
assert isinstance(detailed['trend'], list)
assert isinstance(detailed['insight'], list)
class TestGenerateReportTitle:
"""测试报告标题生成。"""
def test_health_analysis_title(self, sample_data_profile):
"""测试健康度分析标题。"""
requirement = RequirementSpec(
user_input='分析工单健康度',
objectives=[]
)
title = _generate_report_title(requirement, sample_data_profile)
assert '工单' in title
assert '健康度' in title
def test_trend_analysis_title(self, sample_data_profile):
"""测试趋势分析标题。"""
requirement = RequirementSpec(
user_input='分析趋势',
objectives=[]
)
title = _generate_report_title(requirement, sample_data_profile)
assert '工单' in title
assert '趋势' in title
def test_generic_title(self, sample_data_profile):
"""测试通用标题。"""
requirement = RequirementSpec(
user_input='分析数据',
objectives=[]
)
title = _generate_report_title(requirement, sample_data_profile)
assert '工单' in title
assert '分析报告' in title
class TestGenerateDefaultSections:
"""测试默认章节生成。"""
def test_with_anomalies(self):
"""测试包含异常的章节。"""
key_findings = [
{
'finding': '异常情况',
'category': 'anomaly',
'importance': 5
}
]
data_profile = DataProfile(
file_path='test.csv',
row_count=100,
column_count=3,
columns=[],
inferred_type='ticket'
)
sections = _generate_default_sections(key_findings, data_profile)
# 验证:包含异常分析章节
assert '异常分析' in sections
def test_with_trends(self):
"""测试包含趋势的章节。"""
key_findings = [
{
'finding': '上升趋势',
'category': 'trend',
'importance': 4
}
]
data_profile = DataProfile(
file_path='test.csv',
row_count=100,
column_count=3,
columns=[],
inferred_type='sales'
)
sections = _generate_default_sections(key_findings, data_profile)
# 验证:包含趋势分析章节
assert '趋势分析' in sections
def test_ticket_data_sections(self):
"""测试工单数据的章节。"""
data_profile = DataProfile(
file_path='test.csv',
row_count=100,
column_count=3,
columns=[],
inferred_type='ticket'
)
sections = _generate_default_sections([], data_profile)
# 验证:包含工单相关章节
assert '状态分析' in sections or '类型分析' in sections
class TestGenerateReport:
"""测试完整报告生成。"""
def test_basic_report_generation(self, sample_results, sample_requirement, sample_data_profile):
"""测试基本报告生成。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:返回字符串
assert isinstance(report, str)
# 验证:报告不为空
assert len(report) > 0
# 验证:包含标题
assert '#' in report
# 验证:包含执行摘要
assert '执行摘要' in report or '摘要' in report
def test_report_with_skipped_tasks(self, sample_results, sample_requirement, sample_data_profile):
"""测试包含跳过任务的报告。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:提到跳过的任务
assert '跳过' in report or '失败' in report
# 验证:提到失败的任务名称
assert '类型分析' in report
def test_report_with_visualizations(self, sample_results, sample_requirement, sample_data_profile):
"""测试包含可视化的报告。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:包含图表引用
assert 'chart1.png' in report or 'chart2.png' in report or '![' in report
def test_report_with_insights(self, sample_results, sample_requirement, sample_data_profile):
"""测试包含洞察的报告。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:包含洞察内容
assert '待处理工单' in report or '趋势' in report
def test_report_save_to_file(self, sample_results, sample_requirement, sample_data_profile):
"""测试报告保存到文件。"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
output_path = f.name
try:
report = generate_report(
sample_results,
sample_requirement,
sample_data_profile,
output_path=output_path
)
# 验证:文件已创建
assert os.path.exists(output_path)
# 验证:文件内容与返回内容一致
with open(output_path, 'r', encoding='utf-8') as f:
saved_content = f.read()
assert saved_content == report
finally:
if os.path.exists(output_path):
os.unlink(output_path)
def test_empty_results(self, sample_requirement, sample_data_profile):
"""测试空结果列表。"""
report = generate_report([], sample_requirement, sample_data_profile)
# 验证:仍然生成报告
assert isinstance(report, str)
assert len(report) > 0
# 验证:包含基本结构
assert '执行摘要' in report or '摘要' in report
def test_all_failed_results(self, sample_requirement, sample_data_profile):
"""测试所有任务都失败的情况。"""
results = [
AnalysisResult(
task_id='task1',
task_name='失败任务1',
success=False,
error='错误1'
),
AnalysisResult(
task_id='task2',
task_name='失败任务2',
success=False,
error='错误2'
)
]
report = generate_report(results, sample_requirement, sample_data_profile)
# 验证:报告生成成功
assert isinstance(report, str)
assert len(report) > 0
# 验证:提到失败
assert '失败' in report or '跳过' in report