Files
iov_data_analysis_agent/tests/test_phase2.py

218 lines
9.4 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
"""
Unit tests for Phase 2: CodeExecutor Enhancements
Run: python -m pytest tests/test_phase2.py -v
"""
import os
import sys
import tempfile
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pytest
import pandas as pd
from utils.code_executor import CodeExecutor
@pytest.fixture
def executor(tmp_path):
"""Create a CodeExecutor with a temp output directory."""
return CodeExecutor(output_dir=str(tmp_path))
# ===========================================================================
# Task 5: Evidence capture
# ===========================================================================
class TestEvidenceCapture:
def test_evidence_from_result_dataframe(self, executor):
"""5.1: When result.result is a DataFrame, capture head(10) as evidence_rows."""
code = "import pandas as pd\npd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})"
result = executor.execute_code(code)
assert result["success"] is True
assert "evidence_rows" in result
assert len(result["evidence_rows"]) == 3
assert result["evidence_rows"][0] == {"a": 1, "b": 4}
def test_evidence_capped_at_10(self, executor):
"""5.1: Evidence rows are capped at 10."""
code = "import pandas as pd\npd.DataFrame({'x': list(range(100))})"
result = executor.execute_code(code)
assert result["success"] is True
assert len(result["evidence_rows"]) == 10
def test_evidence_fallback_to_namespace(self, executor):
"""5.2: When result.result is not a DataFrame, fallback to namespace."""
code = "import pandas as pd\nmy_data = pd.DataFrame({'col': [10, 20]})\nprint('done')"
result = executor.execute_code(code)
assert result["success"] is True
assert len(result["evidence_rows"]) == 2
assert result["evidence_rows"][0] == {"col": 10}
def test_evidence_empty_when_no_dataframe(self, executor):
"""5.3: Returns empty list when no DataFrame is produced."""
executor.reset_environment()
code = "x = 42"
result = executor.execute_code(code)
assert result["success"] is True
assert result["evidence_rows"] == []
def test_evidence_key_in_failure(self, executor):
"""5.3: evidence_rows key present even on failure."""
code = "import not_a_real_module"
result = executor.execute_code(code)
assert "evidence_rows" in result
assert result["evidence_rows"] == []
# ===========================================================================
# Task 6: DataFrame auto-detection and export
# ===========================================================================
class TestDataFrameAutoExport:
def test_new_dataframe_exported(self, executor, tmp_path):
"""6.1-6.4: New DataFrame is detected and exported to CSV."""
code = "import pandas as pd\nresult_df = pd.DataFrame({'a': [1], 'b': [2]})"
result = executor.execute_code(code)
assert result["success"] is True
assert len(result["auto_exported_files"]) >= 1
exported = result["auto_exported_files"][0]
assert exported["variable_name"] == "result_df"
assert exported["filename"] == "result_df.csv"
assert exported["rows"] == 1
assert exported["cols"] == 2
assert exported["columns"] == ["a", "b"]
# Verify file actually exists
assert os.path.exists(os.path.join(str(tmp_path), "result_df.csv"))
def test_dedup_suffix(self, executor, tmp_path):
"""6.3: Numeric suffix deduplication when file exists."""
# Create first file
code1 = "import pandas as pd\nmy_df = pd.DataFrame({'x': [1]})"
result1 = executor.execute_code(code1)
assert result1["success"] is True
# Reset the DataFrame to force a new id
code2 = "my_df = pd.DataFrame({'x': [2]})"
result2 = executor.execute_code(code2)
assert result2["success"] is True
exported_files = result2["auto_exported_files"]
assert len(exported_files) >= 1
# The second export should have _1 suffix
assert exported_files[0]["filename"] == "my_df_1.csv"
def test_skip_module_names(self, executor):
"""6.1: Module-level names like pd, np are skipped."""
code = "x = 42" # pd and np already in namespace from setup
result = executor.execute_code(code)
# Should not export pd or np as DataFrames
for f in result["auto_exported_files"]:
assert f["variable_name"] not in ("pd", "np", "plt", "sns")
def test_auto_exported_files_key_in_result(self, executor):
"""6.5: auto_exported_files key always present."""
code = "x = 1"
result = executor.execute_code(code)
assert "auto_exported_files" in result
assert isinstance(result["auto_exported_files"], list)
def test_changed_dataframe_detected(self, executor, tmp_path):
"""6.2: Changed DataFrame (same name, new object) is detected."""
code1 = "import pandas as pd\ndf_test = pd.DataFrame({'a': [1]})"
executor.execute_code(code1)
code2 = "df_test = pd.DataFrame({'a': [1, 2, 3]})"
result2 = executor.execute_code(code2)
assert result2["success"] is True
exported = [f for f in result2["auto_exported_files"] if f["variable_name"] == "df_test"]
assert len(exported) == 1
assert exported[0]["rows"] == 3
# ===========================================================================
# Task 7: DATA_FILE_SAVED marker parsing
# ===========================================================================
class TestDataFileSavedMarkerParsing:
def test_parse_single_marker(self, executor):
"""7.1-7.2: Parse a single DATA_FILE_SAVED marker from stdout."""
code = 'print("[DATA_FILE_SAVED] filename: output.csv, rows: 42, description: Test data")'
result = executor.execute_code(code)
assert result["success"] is True
assert len(result["prompt_saved_files"]) == 1
parsed = result["prompt_saved_files"][0]
assert parsed["filename"] == "output.csv"
assert parsed["rows"] == 42
assert parsed["description"] == "Test data"
def test_parse_multiple_markers(self, executor):
"""7.1-7.2: Parse multiple markers."""
code = (
'print("[DATA_FILE_SAVED] filename: a.csv, rows: 10, description: File A")\n'
'print("[DATA_FILE_SAVED] filename: b.xlsx, rows: 20, description: File B")'
)
result = executor.execute_code(code)
assert result["success"] is True
assert len(result["prompt_saved_files"]) == 2
assert result["prompt_saved_files"][0]["filename"] == "a.csv"
assert result["prompt_saved_files"][1]["filename"] == "b.xlsx"
def test_no_markers(self, executor):
"""7.3: No markers means empty list."""
code = 'print("hello world")'
result = executor.execute_code(code)
assert result["success"] is True
assert result["prompt_saved_files"] == []
def test_prompt_saved_files_key_in_result(self, executor):
"""7.3: prompt_saved_files key always present."""
code = "x = 1"
result = executor.execute_code(code)
assert "prompt_saved_files" in result
assert isinstance(result["prompt_saved_files"], list)
def test_malformed_marker_skipped(self, executor):
"""7.1: Malformed markers are silently skipped."""
code = 'print("[DATA_FILE_SAVED] this is not valid")'
result = executor.execute_code(code)
assert result["success"] is True
assert result["prompt_saved_files"] == []
def test_chinese_filename_and_description(self, executor):
"""7.2: Chinese characters in filename and description."""
code = 'print("[DATA_FILE_SAVED] filename: 数据汇总.csv, rows: 100, description: 各类型TOP问题聚合统计")'
result = executor.execute_code(code)
assert result["success"] is True
assert len(result["prompt_saved_files"]) == 1
assert result["prompt_saved_files"][0]["filename"] == "数据汇总.csv"
assert result["prompt_saved_files"][0]["description"] == "各类型TOP问题聚合统计"
# ===========================================================================
# Return structure integrity
# ===========================================================================
class TestReturnStructure:
def test_success_return_has_all_keys(self, executor):
"""All 7 keys present on success."""
result = executor.execute_code("x = 1")
expected_keys = {"success", "output", "error", "variables",
"evidence_rows", "auto_exported_files", "prompt_saved_files"}
assert expected_keys.issubset(set(result.keys()))
def test_safety_failure_has_all_keys(self, executor):
"""All 7 keys present on safety check failure."""
result = executor.execute_code("import socket")
expected_keys = {"success", "output", "error", "variables",
"evidence_rows", "auto_exported_files", "prompt_saved_files"}
assert expected_keys.issubset(set(result.keys()))
def test_execution_error_has_all_keys(self, executor):
"""All 7 keys present on execution error."""
result = executor.execute_code("1/0")
expected_keys = {"success", "output", "error", "variables",
"evidence_rows", "auto_exported_files", "prompt_saved_files"}
assert expected_keys.issubset(set(result.keys()))