218 lines
9.4 KiB
Python
218 lines
9.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Unit tests for Phase 2: CodeExecutor Enhancements
|
|
|
|
Run: python -m pytest tests/test_phase2.py -v
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
from utils.code_executor import CodeExecutor
|
|
|
|
|
|
@pytest.fixture
|
|
def executor(tmp_path):
|
|
"""Create a CodeExecutor with a temp output directory."""
|
|
return CodeExecutor(output_dir=str(tmp_path))
|
|
|
|
|
|
# ===========================================================================
|
|
# Task 5: Evidence capture
|
|
# ===========================================================================
|
|
|
|
class TestEvidenceCapture:
|
|
def test_evidence_from_result_dataframe(self, executor):
|
|
"""5.1: When result.result is a DataFrame, capture head(10) as evidence_rows."""
|
|
code = "import pandas as pd\npd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})"
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert "evidence_rows" in result
|
|
assert len(result["evidence_rows"]) == 3
|
|
assert result["evidence_rows"][0] == {"a": 1, "b": 4}
|
|
|
|
def test_evidence_capped_at_10(self, executor):
|
|
"""5.1: Evidence rows are capped at 10."""
|
|
code = "import pandas as pd\npd.DataFrame({'x': list(range(100))})"
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert len(result["evidence_rows"]) == 10
|
|
|
|
def test_evidence_fallback_to_namespace(self, executor):
|
|
"""5.2: When result.result is not a DataFrame, fallback to namespace."""
|
|
code = "import pandas as pd\nmy_data = pd.DataFrame({'col': [10, 20]})\nprint('done')"
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert len(result["evidence_rows"]) == 2
|
|
assert result["evidence_rows"][0] == {"col": 10}
|
|
|
|
def test_evidence_empty_when_no_dataframe(self, executor):
|
|
"""5.3: Returns empty list when no DataFrame is produced."""
|
|
executor.reset_environment()
|
|
code = "x = 42"
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert result["evidence_rows"] == []
|
|
|
|
def test_evidence_key_in_failure(self, executor):
|
|
"""5.3: evidence_rows key present even on failure."""
|
|
code = "import not_a_real_module"
|
|
result = executor.execute_code(code)
|
|
assert "evidence_rows" in result
|
|
assert result["evidence_rows"] == []
|
|
|
|
|
|
# ===========================================================================
|
|
# Task 6: DataFrame auto-detection and export
|
|
# ===========================================================================
|
|
|
|
class TestDataFrameAutoExport:
|
|
def test_new_dataframe_exported(self, executor, tmp_path):
|
|
"""6.1-6.4: New DataFrame is detected and exported to CSV."""
|
|
code = "import pandas as pd\nresult_df = pd.DataFrame({'a': [1], 'b': [2]})"
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert len(result["auto_exported_files"]) >= 1
|
|
exported = result["auto_exported_files"][0]
|
|
assert exported["variable_name"] == "result_df"
|
|
assert exported["filename"] == "result_df.csv"
|
|
assert exported["rows"] == 1
|
|
assert exported["cols"] == 2
|
|
assert exported["columns"] == ["a", "b"]
|
|
# Verify file actually exists
|
|
assert os.path.exists(os.path.join(str(tmp_path), "result_df.csv"))
|
|
|
|
def test_dedup_suffix(self, executor, tmp_path):
|
|
"""6.3: Numeric suffix deduplication when file exists."""
|
|
# Create first file
|
|
code1 = "import pandas as pd\nmy_df = pd.DataFrame({'x': [1]})"
|
|
result1 = executor.execute_code(code1)
|
|
assert result1["success"] is True
|
|
|
|
# Reset the DataFrame to force a new id
|
|
code2 = "my_df = pd.DataFrame({'x': [2]})"
|
|
result2 = executor.execute_code(code2)
|
|
assert result2["success"] is True
|
|
exported_files = result2["auto_exported_files"]
|
|
assert len(exported_files) >= 1
|
|
# The second export should have _1 suffix
|
|
assert exported_files[0]["filename"] == "my_df_1.csv"
|
|
|
|
def test_skip_module_names(self, executor):
|
|
"""6.1: Module-level names like pd, np are skipped."""
|
|
code = "x = 42" # pd and np already in namespace from setup
|
|
result = executor.execute_code(code)
|
|
# Should not export pd or np as DataFrames
|
|
for f in result["auto_exported_files"]:
|
|
assert f["variable_name"] not in ("pd", "np", "plt", "sns")
|
|
|
|
def test_auto_exported_files_key_in_result(self, executor):
|
|
"""6.5: auto_exported_files key always present."""
|
|
code = "x = 1"
|
|
result = executor.execute_code(code)
|
|
assert "auto_exported_files" in result
|
|
assert isinstance(result["auto_exported_files"], list)
|
|
|
|
def test_changed_dataframe_detected(self, executor, tmp_path):
|
|
"""6.2: Changed DataFrame (same name, new object) is detected."""
|
|
code1 = "import pandas as pd\ndf_test = pd.DataFrame({'a': [1]})"
|
|
executor.execute_code(code1)
|
|
|
|
code2 = "df_test = pd.DataFrame({'a': [1, 2, 3]})"
|
|
result2 = executor.execute_code(code2)
|
|
assert result2["success"] is True
|
|
exported = [f for f in result2["auto_exported_files"] if f["variable_name"] == "df_test"]
|
|
assert len(exported) == 1
|
|
assert exported[0]["rows"] == 3
|
|
|
|
|
|
# ===========================================================================
|
|
# Task 7: DATA_FILE_SAVED marker parsing
|
|
# ===========================================================================
|
|
|
|
class TestDataFileSavedMarkerParsing:
|
|
def test_parse_single_marker(self, executor):
|
|
"""7.1-7.2: Parse a single DATA_FILE_SAVED marker from stdout."""
|
|
code = 'print("[DATA_FILE_SAVED] filename: output.csv, rows: 42, description: Test data")'
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert len(result["prompt_saved_files"]) == 1
|
|
parsed = result["prompt_saved_files"][0]
|
|
assert parsed["filename"] == "output.csv"
|
|
assert parsed["rows"] == 42
|
|
assert parsed["description"] == "Test data"
|
|
|
|
def test_parse_multiple_markers(self, executor):
|
|
"""7.1-7.2: Parse multiple markers."""
|
|
code = (
|
|
'print("[DATA_FILE_SAVED] filename: a.csv, rows: 10, description: File A")\n'
|
|
'print("[DATA_FILE_SAVED] filename: b.xlsx, rows: 20, description: File B")'
|
|
)
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert len(result["prompt_saved_files"]) == 2
|
|
assert result["prompt_saved_files"][0]["filename"] == "a.csv"
|
|
assert result["prompt_saved_files"][1]["filename"] == "b.xlsx"
|
|
|
|
def test_no_markers(self, executor):
|
|
"""7.3: No markers means empty list."""
|
|
code = 'print("hello world")'
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert result["prompt_saved_files"] == []
|
|
|
|
def test_prompt_saved_files_key_in_result(self, executor):
|
|
"""7.3: prompt_saved_files key always present."""
|
|
code = "x = 1"
|
|
result = executor.execute_code(code)
|
|
assert "prompt_saved_files" in result
|
|
assert isinstance(result["prompt_saved_files"], list)
|
|
|
|
def test_malformed_marker_skipped(self, executor):
|
|
"""7.1: Malformed markers are silently skipped."""
|
|
code = 'print("[DATA_FILE_SAVED] this is not valid")'
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert result["prompt_saved_files"] == []
|
|
|
|
def test_chinese_filename_and_description(self, executor):
|
|
"""7.2: Chinese characters in filename and description."""
|
|
code = 'print("[DATA_FILE_SAVED] filename: 数据汇总.csv, rows: 100, description: 各类型TOP问题聚合统计")'
|
|
result = executor.execute_code(code)
|
|
assert result["success"] is True
|
|
assert len(result["prompt_saved_files"]) == 1
|
|
assert result["prompt_saved_files"][0]["filename"] == "数据汇总.csv"
|
|
assert result["prompt_saved_files"][0]["description"] == "各类型TOP问题聚合统计"
|
|
|
|
|
|
# ===========================================================================
|
|
# Return structure integrity
|
|
# ===========================================================================
|
|
|
|
class TestReturnStructure:
|
|
def test_success_return_has_all_keys(self, executor):
|
|
"""All 7 keys present on success."""
|
|
result = executor.execute_code("x = 1")
|
|
expected_keys = {"success", "output", "error", "variables",
|
|
"evidence_rows", "auto_exported_files", "prompt_saved_files"}
|
|
assert expected_keys.issubset(set(result.keys()))
|
|
|
|
def test_safety_failure_has_all_keys(self, executor):
|
|
"""All 7 keys present on safety check failure."""
|
|
result = executor.execute_code("import socket")
|
|
expected_keys = {"success", "output", "error", "variables",
|
|
"evidence_rows", "auto_exported_files", "prompt_saved_files"}
|
|
assert expected_keys.issubset(set(result.keys()))
|
|
|
|
def test_execution_error_has_all_keys(self, executor):
|
|
"""All 7 keys present on execution error."""
|
|
result = executor.execute_code("1/0")
|
|
expected_keys = {"success", "output", "error", "variables",
|
|
"evidence_rows", "auto_exported_files", "prompt_saved_files"}
|
|
assert expected_keys.issubset(set(result.keys()))
|