From 00bd48e7e72a590fd10149403cdc62df6ee4a457 Mon Sep 17 00:00:00 2001 From: Jeason <1710884619@qq.com> Date: Sun, 19 Apr 2026 21:30:08 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=A7=E6=9B=B4=E6=96=B0=EF=BC=8C=E6=9E=B6?= =?UTF-8?q?=E6=9E=84=E8=B0=83=E6=95=B4=EF=BC=8C=E6=95=B0=E6=8D=AE=E5=88=86?= =?UTF-8?q?=E6=9E=90=E8=83=BD=E5=8A=9B=E6=8F=90=E5=8D=87=EF=BC=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../agent-robustness-optimization/tasks.md | 96 +-- .../analysis-dashboard-redesign/.config.kiro | 1 + .../analysis-dashboard-redesign/design.md | 393 +++++++++++ .../requirements.md | 159 +++++ .../analysis-dashboard-redesign/tasks.md | 102 +++ __init__.py | 17 +- config/app_config.py | 8 +- conftest.py | 6 + data_analysis_agent.py | 351 +++++++++- prompts.py | 17 +- pyproject.toml | 3 + tests/__init__.py | 1 + tests/conftest.py | 11 + tests/test_dashboard_properties.py | 651 ++++++++++++++++++ tests/test_phase1.py | 238 +++++++ tests/test_phase2.py | 217 ++++++ tests/test_phase3.py | 233 +++++++ tests/test_properties.py | 285 ++++++++ tests/test_unit.py | 229 ++++++ utils/code_executor.py | 202 +++++- utils/data_loader.py | 105 +++ utils/data_privacy.py | 76 ++ web/main.py | 212 +++++- web/static/clean_style.css | 493 +++++++++++-- web/static/index.html | 76 +- web/static/script.js | 445 +++++++++--- 26 files changed, 4375 insertions(+), 252 deletions(-) create mode 100644 .kiro/specs/analysis-dashboard-redesign/.config.kiro create mode 100644 .kiro/specs/analysis-dashboard-redesign/design.md create mode 100644 .kiro/specs/analysis-dashboard-redesign/requirements.md create mode 100644 .kiro/specs/analysis-dashboard-redesign/tasks.md create mode 100644 conftest.py create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_dashboard_properties.py create mode 100644 tests/test_phase1.py create mode 100644 tests/test_phase2.py create mode 100644 tests/test_phase3.py create mode 100644 tests/test_properties.py create mode 100644 tests/test_unit.py diff --git a/.kiro/specs/agent-robustness-optimization/tasks.md b/.kiro/specs/agent-robustness-optimization/tasks.md index e1c7097..f05da91 100644 --- a/.kiro/specs/agent-robustness-optimization/tasks.md +++ b/.kiro/specs/agent-robustness-optimization/tasks.md @@ -9,66 +9,66 @@ ## Priority 2: Data Privacy Fallback (R1–R3) -- [ ] 2. Implement error classification - - [-] 2.1 Add `_classify_error(error_message: str) -> str` method to `DataAnalysisAgent` in `data_analysis_agent.py` with regex patterns for KeyError, ValueError, NameError, empty DataFrame - - [-] 2.2 Add `_extract_column_from_error(error_message: str) -> Optional[str]` function to `utils/data_privacy.py` - - [-] 2.3 Add `_lookup_column_in_profile(column_name, safe_profile) -> Optional[dict]` function to `utils/data_privacy.py` -- [ ] 3. Implement enriched hint generation - - [-] 3.1 Add `generate_enriched_hint(error_message: str, safe_profile: str) -> str` function to `utils/data_privacy.py` - - [-] 3.2 Integrate retry logic into the `analyze()` loop in `data_analysis_agent.py`: add per-round retry counter, call `_classify_error` on failures, generate enriched hint when below retry limit, fall back to normal error handling at limit +- [x] 2. Implement error classification + - [x] 2.1 Add `_classify_error(error_message: str) -> str` method to `DataAnalysisAgent` in `data_analysis_agent.py` with regex patterns for KeyError, ValueError, NameError, empty DataFrame + - [x] 2.2 Add `_extract_column_from_error(error_message: str) -> Optional[str]` function to `utils/data_privacy.py` + - [x] 2.3 Add `_lookup_column_in_profile(column_name, safe_profile) -> Optional[dict]` function to `utils/data_privacy.py` +- [x] 3. Implement enriched hint generation + - [x] 3.1 Add `generate_enriched_hint(error_message: str, safe_profile: str) -> str` function to `utils/data_privacy.py` + - [x] 3.2 Integrate retry logic into the `analyze()` loop in `data_analysis_agent.py`: add per-round retry counter, call `_classify_error` on failures, generate enriched hint when below retry limit, fall back to normal error handling at limit ## Priority 3: Conversation History Trimming (R4–R5) -- [ ] 4. Implement conversation trimming - - [~] 4.1 Add `_trim_conversation_history()` method to `DataAnalysisAgent` implementing sliding window with first-message preservation - - [~] 4.2 Add `_compress_trimmed_messages(messages: list) -> str` method to `DataAnalysisAgent` that generates summary with action types and success/failure, excluding code blocks and raw output - - [~] 4.3 Call `_trim_conversation_history()` at the start of each round in the `analyze()` loop, after the first round +- [x] 4. Implement conversation trimming + - [x] 4.1 Add `_trim_conversation_history()` method to `DataAnalysisAgent` implementing sliding window with first-message preservation + - [x] 4.2 Add `_compress_trimmed_messages(messages: list) -> str` method to `DataAnalysisAgent` that generates summary with action types and success/failure, excluding code blocks and raw output + - [x] 4.3 Call `_trim_conversation_history()` at the start of each round in the `analyze()` loop, after the first round ## Priority 4: Analysis Template System (R6–R8) -- [ ] 5. Backend template integration - - [~] 5.1 Add optional `template_name` parameter to `DataAnalysisAgent.analyze()` method; retrieve template via `get_template()`, prepend `get_full_prompt()` to user requirement - - [~] 5.2 Add `GET /api/templates` endpoint to `web/main.py` returning `list_templates()` result - - [~] 5.3 Add optional `template` field to `StartRequest` model in `web/main.py`; pass template name to agent in `run_analysis_task` -- [ ] 6. Frontend template selector - - [~] 6.1 Add template selector HTML section (cards above requirement input) to `web/static/index.html` - - [~] 6.2 Add template fetching, selection logic, and "No Template" default to `web/static/script.js` - - [~] 6.3 Add template card styles (`.template-card`, `.template-card.selected`) to `web/static/clean_style.css` +- [x] 5. Backend template integration + - [x] 5.1 Add optional `template_name` parameter to `DataAnalysisAgent.analyze()` method; retrieve template via `get_template()`, prepend `get_full_prompt()` to user requirement + - [x] 5.2 Add `GET /api/templates` endpoint to `web/main.py` returning `list_templates()` result + - [x] 5.3 Add optional `template` field to `StartRequest` model in `web/main.py`; pass template name to agent in `run_analysis_task` +- [x] 6. Frontend template selector + - [x] 6.1 Add template selector HTML section (cards above requirement input) to `web/static/index.html` + - [x] 6.2 Add template fetching, selection logic, and "No Template" default to `web/static/script.js` + - [x] 6.3 Add template card styles (`.template-card`, `.template-card.selected`) to `web/static/clean_style.css` ## Priority 5: Frontend Progress Bar (R9) -- [ ] 7. Backend progress updates - - [~] 7.1 Add `set_progress_callback(callback)` method to `DataAnalysisAgent`; call callback at start of each round in `analyze()` loop - - [~] 7.2 Wire progress callback in `run_analysis_task` in `web/main.py` to update `SessionData` progress fields - - [~] 7.3 Add `current_round`, `max_rounds`, `progress_percentage`, `status_message` to `GET /api/status` response in `web/main.py` -- [ ] 8. Frontend progress bar - - [~] 8.1 Add progress bar HTML element below the status bar area in `web/static/index.html` - - [~] 8.2 Add `updateProgressBar(percentage, message)` function to `web/static/script.js`; call it during polling when `is_running` is true; set to 100% on completion - - [~] 8.3 Add progress bar styles with CSS transition animation to `web/static/clean_style.css` +- [x] 7. Backend progress updates + - [x] 7.1 Add `set_progress_callback(callback)` method to `DataAnalysisAgent`; call callback at start of each round in `analyze()` loop + - [x] 7.2 Wire progress callback in `run_analysis_task` in `web/main.py` to update `SessionData` progress fields + - [x] 7.3 Add `current_round`, `max_rounds`, `progress_percentage`, `status_message` to `GET /api/status` response in `web/main.py` +- [x] 8. Frontend progress bar + - [x] 8.1 Add progress bar HTML element below the status bar area in `web/static/index.html` + - [x] 8.2 Add `updateProgressBar(percentage, message)` function to `web/static/script.js`; call it during polling when `is_running` is true; set to 100% on completion + - [x] 8.3 Add progress bar styles with CSS transition animation to `web/static/clean_style.css` ## Priority 6: Multi-File Chunked & Parallel Loading (R10–R11) -- [ ] 9. Chunked loading enhancement - - [~] 9.1 Add `_profile_chunked(file_path: str) -> str` function to `utils/data_loader.py` that profiles using first chunk + sampled subsequent chunks - - [~] 9.2 Add `load_and_profile_data_smart(file_paths, max_file_size_mb) -> str` function to `utils/data_loader.py` that selects chunked vs full loading based on file size threshold - - [~] 9.3 Update `DataAnalysisAgent.analyze()` to use smart loader and expose chunked iterator in Code_Executor namespace for large files -- [ ] 10. Parallel profiling - - [~] 10.1 Add `_profile_files_parallel(file_paths: list) -> tuple[str, str]` method to `DataAnalysisAgent` using `ThreadPoolExecutor` with `max_parallel_profiles` workers - - [~] 10.2 Update `DataAnalysisAgent.analyze()` to call `_profile_files_parallel` when multiple files are provided, replacing sequential `build_safe_profile` + `build_local_profile` calls +- [x] 9. Chunked loading enhancement + - [x] 9.1 Add `_profile_chunked(file_path: str) -> str` function to `utils/data_loader.py` that profiles using first chunk + sampled subsequent chunks + - [x] 9.2 Add `load_and_profile_data_smart(file_paths, max_file_size_mb) -> str` function to `utils/data_loader.py` that selects chunked vs full loading based on file size threshold + - [x] 9.3 Update `DataAnalysisAgent.analyze()` to use smart loader and expose chunked iterator in Code_Executor namespace for large files +- [x] 10. Parallel profiling + - [x] 10.1 Add `_profile_files_parallel(file_paths: list) -> tuple[str, str]` method to `DataAnalysisAgent` using `ThreadPoolExecutor` with `max_parallel_profiles` workers + - [x] 10.2 Update `DataAnalysisAgent.analyze()` to call `_profile_files_parallel` when multiple files are provided, replacing sequential `build_safe_profile` + `build_local_profile` calls ## Priority 7: Testing -- [ ] 11. Write property-based tests - - [ ] 11.1 ~PBT~ Property test for error classification correctness (Property 1) using `hypothesis` - - [ ] 11.2 ~PBT~ Property test for enriched hint content and privacy (Property 3) using `hypothesis` - - [ ] 11.3 ~PBT~ Property test for env var config override (Property 4) using `hypothesis` - - [ ] 11.4 ~PBT~ Property test for sliding window trimming invariants (Property 5) using `hypothesis` - - [ ] 11.5 ~PBT~ Property test for trimming summary content (Property 6) using `hypothesis` - - [ ] 11.6 ~PBT~ Property test for template prompt integration (Property 7) using `hypothesis` - - [ ] 11.7 ~PBT~ Property test for invalid template error (Property 8) using `hypothesis` - - [ ] 11.8 ~PBT~ Property test for parallel profile merge with error resilience (Property 11) using `hypothesis` -- [ ] 12. Write unit and integration tests - - [ ] 12.1 Unit tests for error classifier with known error messages - - [ ] 12.2 Unit tests for conversation trimming at boundary conditions - - [ ] 12.3 Integration tests for `GET /api/templates` and `POST /api/start` with template field - - [ ] 12.4 Integration tests for `GET /api/status` progress fields +- [x] 11. Write property-based tests + - [x] 11.1 ~PBT~ Property test for error classification correctness (Property 1) using `hypothesis` + - [x] 11.2 ~PBT~ Property test for enriched hint content and privacy (Property 3) using `hypothesis` + - [x] 11.3 ~PBT~ Property test for env var config override (Property 4) using `hypothesis` + - [x] 11.4 ~PBT~ Property test for sliding window trimming invariants (Property 5) using `hypothesis` + - [x] 11.5 ~PBT~ Property test for trimming summary content (Property 6) using `hypothesis` + - [x] 11.6 ~PBT~ Property test for template prompt integration (Property 7) using `hypothesis` + - [x] 11.7 ~PBT~ Property test for invalid template error (Property 8) using `hypothesis` + - [x] 11.8 ~PBT~ Property test for parallel profile merge with error resilience (Property 11) using `hypothesis` +- [x] 12. Write unit and integration tests + - [x] 12.1 Unit tests for error classifier with known error messages + - [x] 12.2 Unit tests for conversation trimming at boundary conditions + - [x] 12.3 Integration tests for `GET /api/templates` and `POST /api/start` with template field + - [x] 12.4 Integration tests for `GET /api/status` progress fields diff --git a/.kiro/specs/analysis-dashboard-redesign/.config.kiro b/.kiro/specs/analysis-dashboard-redesign/.config.kiro new file mode 100644 index 0000000..671bc7f --- /dev/null +++ b/.kiro/specs/analysis-dashboard-redesign/.config.kiro @@ -0,0 +1 @@ +{"specId": "ea41aaef-0737-4255-bcad-90f156a5b2d5", "workflowType": "requirements-first", "specType": "feature"} \ No newline at end of file diff --git a/.kiro/specs/analysis-dashboard-redesign/design.md b/.kiro/specs/analysis-dashboard-redesign/design.md new file mode 100644 index 0000000..d1dd4a6 --- /dev/null +++ b/.kiro/specs/analysis-dashboard-redesign/design.md @@ -0,0 +1,393 @@ +# Design Document: Analysis Dashboard Redesign + +## Overview + +This design transforms the Analysis Dashboard from a raw-log-centric 3-tab layout (Live Log, Report, Gallery) into a structured, evidence-driven 3-tab layout (Execution Process, Data Files, Report). The core architectural change is introducing a **Round_Data** structured data model that flows from the agent's execution loop through the API to the frontend, replacing the current raw text log approach. + +Key design decisions: +- **Round_Data as the central abstraction**: Every analysis round produces a structured object containing reasoning, code, result summary, data evidence, and raw log. This single model drives the Execution Process tab, evidence linking, and data file tracking. +- **Auto-detection at the CodeExecutor level**: DataFrame detection and CSV export happen transparently in `CodeExecutor.execute_code()`, requiring no LLM cooperation. Prompt guidance is additive — it encourages the LLM to save files explicitly, but the system doesn't depend on it. +- **Gallery absorbed into Report**: Images are already rendered inline via `marked.js` Markdown parsing. Removing the Gallery tab is a subtraction, not an addition. +- **Evidence linking via HTML comments**: The LLM annotates report paragraphs with `` comments during final report generation. The backend parses these to build a `supporting_data` mapping. This is a best-effort approach — missing annotations simply mean no "查看支撑数据" button. + +## Architecture + +```mermaid +flowchart TD + subgraph Backend + A[DataAnalysisAgent] -->|produces| B[Round_Data objects] + A -->|calls| C[CodeExecutor] + C -->|auto-detects DataFrames| D[CSV export to session dir] + C -->|captures evidence rows| B + C -->|parses DATA_FILE_SAVED markers| E[File metadata] + B -->|stored on| F[SessionData] + E -->|stored on| F + F -->|serves| G[GET /api/status] + F -->|serves| H[GET /api/data-files] + F -->|serves| I[GET /api/report] + end + + subgraph Frontend + G -->|rounds array| J[Execution Process Tab] + H -->|file list + preview| K[Data Files Tab] + I -->|paragraphs + supporting_data| L[Report Tab] + end +``` + +### Data Flow + +1. **Agent loop** (`DataAnalysisAgent.analyze`): Each round calls `CodeExecutor.execute_code()`, which returns an enriched result dict containing `evidence_rows`, `auto_exported_files`, and `prompt_saved_files`. The agent wraps this into a `Round_Data` dict and appends it to `SessionData.rounds`. + +2. **Status polling**: Frontend polls `GET /api/status` every 2 seconds. The response now includes a `rounds` array. The frontend incrementally appends new `Round_Card` elements — it tracks the last-seen round count and only renders new entries. + +3. **Data Files**: `GET /api/data-files` reads `SessionData.data_files` plus scans the session directory for CSV/XLSX files (fallback discovery). Preview reads the first 5 rows via pandas. + +4. **Report with evidence**: `GET /api/report` parses `` annotations, looks up `SessionData.rounds[N].evidence_rows`, and builds a `supporting_data` mapping keyed by paragraph ID. + +## Components and Interfaces + +### 1. CodeExecutor Enhancements (`utils/code_executor.py`) + +**New behavior in `execute_code()`:** + +```python +def execute_code(self, code: str) -> Dict[str, Any]: + """Returns dict with keys: success, output, error, variables, + evidence_rows, auto_exported_files, prompt_saved_files""" +``` + +- **DataFrame snapshot before/after**: Before execution, capture `{name: id(obj)}` for all DataFrame variables. After execution, detect new names or changed `id()` values. +- **Evidence capture**: If the execution result is a DataFrame (via `result.result`), call `.head(10).to_dict(orient='records')` to produce `evidence_rows`. Also check the last assigned DataFrame variable in the namespace. +- **Auto-export**: For each newly detected DataFrame, export to `{session_dir}/{var_name}.csv` with dedup suffix. Record metadata in `auto_exported_files` list. +- **Marker parsing**: Scan `captured.stdout` for `[DATA_FILE_SAVED]` lines, parse filename/rows/description, record in `prompt_saved_files` list. + +**Interface contract:** +```python +# evidence_rows: list[dict] — up to 10 rows as dicts +# auto_exported_files: list[dict] — [{variable_name, filename, rows, cols, columns}] +# prompt_saved_files: list[dict] — [{filename, rows, description}] +``` + +### 2. DataAnalysisAgent Changes (`data_analysis_agent.py`) + +**Round_Data construction** in `_handle_generate_code()` and the main loop: + +```python +round_data = { + "round": self.current_round, + "reasoning": yaml_data.get("reasoning", ""), + "code": code, + "result_summary": self._summarize_result(result), + "evidence_rows": result.get("evidence_rows", []), + "raw_log": feedback, + "auto_exported_files": result.get("auto_exported_files", []), + "prompt_saved_files": result.get("prompt_saved_files", []), +} +``` + +The agent appends `round_data` to `SessionData.rounds` (accessed via the progress callback or a direct reference). File metadata from both `auto_exported_files` and `prompt_saved_files` is merged into `SessionData.data_files`. + +**`_summarize_result()`**: Produces a one-line summary from the execution result — e.g., "执行成功,输出 DataFrame (150行×8列)" or "执行失败: KeyError: 'col_x'". + +### 3. SessionData Extension (`web/main.py`) + +```python +class SessionData: + def __init__(self, session_id: str): + # ... existing fields ... + self.rounds: List[Dict] = [] # Round_Data objects + self.data_files: List[Dict] = [] # File metadata dicts +``` + +Persistence: `rounds` and `data_files` are written to `results.json` on analysis completion (existing pattern). + +### 4. API Changes (`web/main.py`) + +**`GET /api/status`** — add `rounds` to response: +```python +return { + # ... existing fields ... + "rounds": session.rounds, +} +``` + +**`GET /api/data-files`** — new endpoint: +```python +@app.get("/api/data-files") +async def list_data_files(session_id: str = Query(...)): + # Returns session.data_files + fallback directory scan +``` + +**`GET /api/data-files/preview`** — new endpoint: +```python +@app.get("/api/data-files/preview") +async def preview_data_file(session_id: str = Query(...), filename: str = Query(...)): + # Reads CSV/XLSX, returns {columns: [...], rows: [...first 5...]} +``` + +**`GET /api/data-files/download`** — new endpoint: +```python +@app.get("/api/data-files/download") +async def download_data_file(session_id: str = Query(...), filename: str = Query(...)): + # Returns FileResponse with appropriate MIME type +``` + +**`GET /api/report`** — enhanced response: +```python +return { + "content": content, + "base_path": web_base_path, + "paragraphs": paragraphs, + "supporting_data": supporting_data_map, # NEW: {paragraph_id: [evidence_rows]} +} +``` + +### 5. Prompt Changes (`prompts.py`) + +Add to `data_analysis_system_prompt` after the existing code generation rules: + +``` +**中间数据保存规则**: +- 当你生成了有价值的中间数据(筛选子集、聚合表、聚类结果等),请主动保存为CSV/XLSX文件。 +- 保存后必须打印标记行:`[DATA_FILE_SAVED] filename: {文件名}, rows: {行数}, description: {描述}` +- 示例: + ```python + top_issues.to_csv(os.path.join(session_output_dir, "TOP问题汇总.csv"), index=False) + print(f"[DATA_FILE_SAVED] filename: TOP问题汇总.csv, rows: {len(top_issues)}, description: 各类型TOP问题聚合统计") + ``` +``` + +Add to `final_report_system_prompt` for evidence annotation: + +``` +**证据标注规则**: +- 当报告段落的结论来源于某一轮分析的数据,请在段落末尾添加HTML注释标注:`` +- N 为产生该数据的分析轮次编号(从1开始) +- 示例:某段落描述了第3轮分析发现的车型分布规律,则在段落末尾添加 `` +``` + +### 6. Frontend Changes + +**`index.html`**: +- Replace tab labels: "Live Log" → "执行过程", add "数据文件", keep "Report" +- Remove Gallery tab HTML and carousel container +- Add Execution Process tab container with round card template +- Add Data Files tab container with file card template + +**`script.js`**: +- Remove gallery functions and state +- Add `renderRoundCards(rounds)` — incremental rendering using a `lastRenderedRound` counter +- Add `loadDataFiles()`, `previewDataFile(filename)`, `downloadDataFile(filename)` +- Modify `startPolling()` to call `renderRoundCards()` and `loadDataFiles()` on each cycle +- Add `showSupportingData(paraId)` for the evidence popover +- Modify `renderParagraphReport()` to add "查看支撑数据" buttons when `supporting_data[paraId]` exists +- Update `switchTab()` to handle `execution`, `datafiles`, `report` + +**`clean_style.css`**: +- Add `.round-card`, `.round-card-header`, `.round-card-body` styles +- Add `.data-file-card`, `.data-preview-table` styles +- Add `.supporting-data-btn`, `.supporting-data-popover` styles +- Remove `.carousel-*` styles + +## Data Models + +### Round_Data (Python dict) + +```python +{ + "round": int, # 1-indexed round number + "reasoning": str, # LLM reasoning text (may be empty) + "code": str, # Generated Python code + "result_summary": str, # One-line execution summary + "evidence_rows": list[dict], # Up to 10 rows as [{col: val, ...}] + "raw_log": str, # Full execution feedback text + "auto_exported_files": list[dict], # Auto-detected DataFrame exports + "prompt_saved_files": list[dict], # LLM-guided file saves +} +``` + +### File Metadata (Python dict) + +```python +{ + "filename": str, # e.g., "top_issues.csv" + "description": str, # Human-readable description + "rows": int, # Row count + "cols": int, # Column count (optional, may be 0) + "columns": list[str], # Column names (optional) + "size_bytes": int, # File size + "source": str, # "auto" | "prompt" — how the file was created +} +``` + +### SessionData Extension + +```python +class SessionData: + rounds: List[Dict] = [] # List of Round_Data dicts + data_files: List[Dict] = [] # List of File Metadata dicts +``` + +### API Response: GET /api/status (extended) + +```json +{ + "is_running": true, + "log": "...", + "has_report": false, + "rounds": [ + { + "round": 1, + "reasoning": "正在执行阶段1...", + "code": "import pandas as pd\n...", + "result_summary": "执行成功,输出 DataFrame (150行×8列)", + "evidence_rows": [{"车型": "...", "模块": "..."}], + "raw_log": "..." + } + ], + "progress_percentage": 25.0, + "current_round": 1, + "max_rounds": 20, + "status_message": "第1/20轮分析中..." +} +``` + +### API Response: GET /api/data-files + +```json +{ + "files": [ + { + "filename": "top_issues.csv", + "description": "各类型TOP问题聚合统计", + "rows": 25, + "cols": 6, + "size_bytes": 2048 + } + ] +} +``` + +### API Response: GET /api/report (extended) + +```json +{ + "content": "...", + "base_path": "/outputs/session_xxx", + "paragraphs": [...], + "supporting_data": { + "p-3": [{"车型": "A", "模块": "TSP", "数量": 42}], + "p-7": [{"问题类型": "远控", "占比": "35%"}] + } +} +``` + +## Correctness Properties + +*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.* + +### Property 1: Round_Data Structural Completeness and Ordering + +*For any* sequence of analysis rounds (varying in count from 1 to N, with varying execution results including successes, failures, and missing YAML fields), every Round_Data object appended to `SessionData.rounds` SHALL contain all required fields (`round`, `reasoning`, `code`, `result_summary`, `evidence_rows`, `raw_log`) with correct types, and the list SHALL preserve insertion order (i.e., `rounds[i].round <= rounds[i+1].round` for all consecutive pairs). + +**Validates: Requirements 1.1, 1.3, 1.4** + +### Property 2: Evidence Capture Bounded and Correctly Serialized + +*For any* DataFrame of arbitrary size (0 to 10,000 rows, 1 to 50 columns) produced by code execution, the evidence capture SHALL return a list of at most 10 dictionaries, where each dictionary's keys exactly match the DataFrame's column names, and the list length equals `min(10, len(dataframe))`. + +**Validates: Requirements 4.1, 4.2, 4.3** + +### Property 3: Filename Deduplication Uniqueness + +*For any* sequence of auto-export operations (1 to 20) targeting the same variable name in the same session directory, all generated filenames SHALL be unique (no two exports produce the same filename), and no previously existing file SHALL be overwritten. + +**Validates: Requirements 5.3** + +### Property 4: Auto-Export Metadata Completeness + +*For any* newly detected DataFrame variable (with arbitrary variable name, row count, column count, and column names), the auto-export metadata dict SHALL contain all required fields (`variable_name`, `filename`, `rows`, `cols`, `columns`) with values matching the source DataFrame's actual properties. + +**Validates: Requirements 5.4, 5.5** + +### Property 5: DATA_FILE_SAVED Marker Parsing Round-Trip + +*For any* valid filename string (alphanumeric, Chinese characters, underscores, hyphens, with .csv or .xlsx extension), any positive integer row count, and any non-empty description string, formatting these values into the standardized marker format `[DATA_FILE_SAVED] filename: {name}, rows: {count}, description: {desc}` and then parsing the marker SHALL recover the original filename, row count, and description exactly. + +**Validates: Requirements 6.3** + +### Property 6: Data File Preview Bounded Rows + +*For any* CSV file containing 0 to 10,000 rows and 1 to 50 columns, the preview function SHALL return a result with `columns` matching the file's column names exactly, and `rows` containing at most 5 dictionaries, where each dictionary's keys match the column names. + +**Validates: Requirements 7.2** + +### Property 7: Evidence Annotation Parsing Correctness + +*For any* Markdown report text containing a mix of paragraphs with and without `` annotations (where N varies from 1 to 100), the annotation parser SHALL: (a) correctly extract the round number for every annotated paragraph, (b) exclude non-annotated paragraphs from the `supporting_data` mapping, and (c) produce a mapping where each key is a valid paragraph ID and each value references a valid round number. + +**Validates: Requirements 11.3, 11.4** + +### Property 8: SessionData JSON Serialization Round-Trip + +*For any* `SessionData` instance with arbitrary `rounds` (list of Round_Data dicts) and `data_files` (list of file metadata dicts), serializing these to JSON and deserializing back SHALL produce lists that are equal to the originals. + +**Validates: Requirements 12.4** + +## Error Handling + +### CodeExecutor Errors +- **DataFrame evidence capture failure**: If `.head(10).to_dict(orient='records')` raises an exception (e.g., mixed types, memory issues), catch the exception and return an empty `evidence_rows` list. Log a warning but do not fail the execution. +- **Auto-export failure**: If CSV writing fails for a detected DataFrame (e.g., permission error, disk full), catch the exception, log a warning with the variable name, and skip that export. Other detected DataFrames should still be exported. +- **Marker parsing failure**: If a `[DATA_FILE_SAVED]` line doesn't match the expected format, skip it silently. Malformed markers should not crash the execution pipeline. + +### API Errors +- **Missing session**: All new endpoints return HTTP 404 with `{"detail": "Session not found"}` for invalid session IDs. +- **Missing file**: `GET /api/data-files/preview` and `GET /api/data-files/download` return HTTP 404 with `{"detail": "File not found: {filename}"}` when the requested file doesn't exist in the session directory. +- **Corrupt CSV**: If a CSV file can't be read by pandas during preview, return HTTP 500 with `{"detail": "Failed to read file: {error}"}`. + +### Frontend Errors +- **Polling with missing rounds**: If `rounds` is undefined or null in the status response, treat it as an empty array. Don't crash the rendering loop. +- **Evidence popover with empty data**: If `supporting_data[paraId]` is an empty array, don't show the button (same as missing). +- **Incremental rendering mismatch**: If `rounds.length < lastRenderedRound` (server restart scenario), reset `lastRenderedRound` to 0 and re-render all cards. + +### Agent Errors +- **Missing reasoning field**: Already handled — store empty string (Requirement 1.4). +- **Evidence annotation missing**: Already handled — paragraphs without annotations simply don't get supporting data buttons. This is by design, not an error. + +## Testing Strategy + +### Property-Based Tests (Hypothesis) + +The project already uses `hypothesis` with `max_examples=20` for fast execution (see `tests/test_properties.py`). New property tests will follow the same pattern. + +**Library**: `hypothesis` (already installed) +**Configuration**: `max_examples=100` minimum per property (increased from existing 20 for new properties) +**Tag format**: `Feature: analysis-dashboard-redesign, Property {N}: {title}` + +Properties to implement: +1. **Round_Data structural completeness** — Generate random execution results, verify Round_Data fields +2. **Evidence capture bounded** — Generate random DataFrames, verify evidence row count and format +3. **Filename deduplication** — Generate sequences of same-name exports, verify uniqueness +4. **Auto-export metadata** — Generate random DataFrames, verify metadata fields +5. **Marker parsing round-trip** — Generate random filenames/rows/descriptions, verify parse(format(x)) == x +6. **Preview bounded rows** — Generate random CSVs, verify preview row count and columns +7. **Evidence annotation parsing** — Generate random annotated Markdown, verify extraction +8. **SessionData JSON round-trip** — Generate random rounds/data_files, verify serialize/deserialize identity + +### Unit Tests + +- Prompt content assertions (6.1, 6.2, 11.2): Verify prompt strings contain required instruction text +- SessionData initialization (12.1, 12.2): Verify new attributes exist with correct defaults +- API response shape (2.1, 2.3): Verify status endpoint returns rounds array and log field +- Tab switching (9.4): Verify switchTab handles new tab identifiers + +### Integration Tests + +- End-to-end round capture: Run a mini analysis session, verify rounds are populated +- Data file API flow: Create files, call list/preview/download endpoints, verify responses +- Report evidence linking: Generate a report with annotations, call report API, verify supporting_data mapping + +### Manual Testing + +- UI layout verification (3.1-3.6, 8.1-8.5, 9.1-9.3, 10.1-10.4): Visual inspection of tab layout, round cards, data file cards, inline images, and supporting data popovers diff --git a/.kiro/specs/analysis-dashboard-redesign/requirements.md b/.kiro/specs/analysis-dashboard-redesign/requirements.md new file mode 100644 index 0000000..a8ea48f --- /dev/null +++ b/.kiro/specs/analysis-dashboard-redesign/requirements.md @@ -0,0 +1,159 @@ +# Requirements Document + +## Introduction + +This feature redesigns the Analysis Dashboard from the current 3-tab layout (Live Log, Report, Gallery) to a new 3-tab layout (Execution Process, Data Files, Report) with richer functionality. The redesign introduces structured round-by-round execution cards, intermediate data file browsing, inline image display within the report, and a data evidence/supporting data feature that links analysis conclusions to the specific data rows that support them. The Gallery tab is removed; its functionality is absorbed into the Report tab. + +## Glossary + +- **Dashboard**: The main analysis output panel in the web frontend (`index.html`) containing tabs for viewing analysis results. +- **Execution_Process_Tab**: The new first tab (执行过程) replacing the Live Log tab, displaying analysis rounds as collapsible cards. +- **Round_Card**: A collapsible UI card within the Execution_Process_Tab representing one analysis round, containing reasoning, code, result summary, data evidence, and raw log. +- **Data_Files_Tab**: The new second tab (数据文件) showing intermediate data files produced during analysis. +- **Report_Tab**: The enhanced third tab (报告) with inline images and supporting data links. +- **Data_Evidence**: Specific data rows extracted during analysis that support a particular analytical conclusion or claim. +- **CodeExecutor**: The Python class (`utils/code_executor.py`) responsible for executing generated analysis code in an IPython environment. +- **DataAnalysisAgent**: The Python class (`data_analysis_agent.py`) orchestrating the multi-round LLM-driven analysis workflow. +- **SessionData**: The Python class (`web/main.py`) tracking per-session state including running status, output directory, and analysis results. +- **Status_API**: The `GET /api/status` endpoint polled every 2 seconds by the frontend to retrieve analysis progress. +- **Data_Files_API**: The new set of API endpoints (`GET /api/data-files`, `GET /api/data-files/preview`, `GET /api/data-files/download`) for listing, previewing, and downloading intermediate data files. +- **Round_Data**: A structured JSON object representing one analysis round, containing fields for reasoning, code, execution result summary, data evidence rows, and raw log output. +- **Auto_Detection**: The mechanism by which CodeExecutor automatically detects new DataFrames created during code execution and exports them as files. +- **Prompt_Guidance**: Instructions embedded in the system prompt that direct the LLM to proactively save intermediate analysis results as files. + +## Requirements + +### Requirement 1: Structured Round Data Capture + +**User Story:** As a user, I want each analysis round's data to be captured in a structured format, so that the frontend can render rich execution cards instead of raw log text. + +#### Acceptance Criteria + +1. WHEN an analysis round completes, THE DataAnalysisAgent SHALL produce a Round_Data object containing the following fields: round number, AI reasoning text, generated code, execution result summary, data evidence rows (list of dictionaries), and raw log output. +2. WHEN the DataAnalysisAgent processes an LLM response with a YAML `reasoning` field, THE DataAnalysisAgent SHALL extract and store the reasoning text in the Round_Data object for that round. +3. THE DataAnalysisAgent SHALL append each completed Round_Data object to a list stored on the SessionData instance, preserving insertion order. +4. IF the LLM response does not contain a parseable `reasoning` field, THEN THE DataAnalysisAgent SHALL store an empty string as the reasoning text in the Round_Data object. + +### Requirement 2: Structured Status API Response + +**User Story:** As a frontend developer, I want the status API to return structured round data, so that I can render execution cards in real time. + +#### Acceptance Criteria + +1. WHEN the frontend polls `GET /api/status`, THE Status_API SHALL return a JSON response containing a `rounds` array of Round_Data objects in addition to the existing fields (`is_running`, `has_report`, `progress_percentage`, `current_round`, `max_rounds`, `status_message`). +2. WHEN a new analysis round completes between two polling intervals, THE Status_API SHALL include the newly completed Round_Data object in the `rounds` array on the next poll response. +3. THE Status_API SHALL continue to return the `log` field containing raw log text for backward compatibility. + +### Requirement 3: Execution Process Tab UI + +**User Story:** As a user, I want to see each analysis round as a collapsible card with reasoning, code, results, and data evidence, so that I can understand the step-by-step analysis process. + +#### Acceptance Criteria + +1. THE Dashboard SHALL display an "执行过程" (Execution Process) tab as the first tab, replacing the current "Live Log" tab. +2. WHEN the Execution_Process_Tab is active, THE Dashboard SHALL render one Round_Card for each entry in the `rounds` array returned by the Status_API. +3. THE Round_Card SHALL default to a collapsed state showing only the round number and a one-line execution result summary. +4. WHEN a user clicks on a collapsed Round_Card, THE Dashboard SHALL expand the card to reveal: AI reasoning text, generated code (in a collapsible sub-section), execution result summary, data evidence section (labeled "本轮数据案例"), and raw log output (in a collapsible sub-section). +5. WHEN a new Round_Data object appears in the polling response, THE Dashboard SHALL append a new Round_Card to the Execution_Process_Tab without removing or re-rendering existing cards. +6. WHILE analysis is running, THE Dashboard SHALL auto-scroll the Execution_Process_Tab to keep the latest Round_Card visible. + +### Requirement 4: Data Evidence Capture + +**User Story:** As a user, I want to see the specific data rows that support each analytical conclusion, so that I can verify claims made by the AI agent. + +#### Acceptance Criteria + +1. WHEN the CodeExecutor executes code that produces a DataFrame result, THE CodeExecutor SHALL capture up to 10 representative rows from that DataFrame as the data evidence for the current round. +2. THE CodeExecutor SHALL serialize data evidence rows as a list of dictionaries (one dictionary per row, keys being column names) and include the list in the execution result returned to the DataAnalysisAgent. +3. IF the code execution does not produce a DataFrame result, THEN THE CodeExecutor SHALL return an empty list as the data evidence. +4. THE DataAnalysisAgent SHALL include the data evidence list in the Round_Data object for the corresponding round. + +### Requirement 5: DataFrame Auto-Detection and Export + +**User Story:** As a user, I want intermediate DataFrames created during analysis to be automatically saved as files, so that I can browse and download them from the Data Files tab. + +#### Acceptance Criteria + +1. WHEN code execution completes, THE CodeExecutor SHALL compare the set of DataFrame variables in the IPython namespace before and after execution to detect newly created DataFrames. +2. WHEN a new DataFrame variable is detected, THE CodeExecutor SHALL export the DataFrame to the session output directory as a CSV file named `{variable_name}.csv`. +3. IF a file with the same name already exists in the session output directory, THEN THE CodeExecutor SHALL append a numeric suffix (e.g., `_1`, `_2`) to avoid overwriting. +4. THE CodeExecutor SHALL record metadata for each auto-exported file: variable name, filename, row count, column count, and column names. +5. WHEN auto-export completes, THE CodeExecutor SHALL include the exported file metadata in the execution result returned to the DataAnalysisAgent. + +### Requirement 6: Prompt Guidance for Intermediate File Saving + +**User Story:** As a user, I want the LLM to proactively save intermediate analysis results as files, so that important intermediate datasets are available for review. + +#### Acceptance Criteria + +1. THE system prompt (`prompts.py`) SHALL include instructions directing the LLM to save intermediate analysis results (filtered subsets, aggregation tables, clustering results) as CSV or XLSX files in the `session_output_dir`. +2. THE system prompt SHALL instruct the LLM to print a standardized marker line after saving each file, in the format: `[DATA_FILE_SAVED] filename: {name}, rows: {count}, description: {desc}`. +3. WHEN the CodeExecutor detects a `[DATA_FILE_SAVED]` marker in the execution output, THE CodeExecutor SHALL parse the marker and record the file metadata (filename, row count, description). + +### Requirement 7: Data Files API + +**User Story:** As a frontend developer, I want API endpoints to list, preview, and download intermediate data files, so that the Data Files tab can display and serve them. + +#### Acceptance Criteria + +1. WHEN the frontend requests `GET /api/data-files?session_id={id}`, THE Data_Files_API SHALL return a JSON array of file entries, each containing: filename, description, row count, column count, and file size in bytes. +2. WHEN the frontend requests `GET /api/data-files/preview?session_id={id}&filename={name}`, THE Data_Files_API SHALL return a JSON object containing: column names (list of strings), and up to 5 data rows (list of dictionaries). +3. WHEN the frontend requests `GET /api/data-files/download?session_id={id}&filename={name}`, THE Data_Files_API SHALL return the file as a downloadable attachment with the appropriate MIME type (`text/csv` for CSV, `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` for XLSX). +4. IF the requested file does not exist, THEN THE Data_Files_API SHALL return HTTP 404 with a descriptive error message. + +### Requirement 8: Data Files Tab UI + +**User Story:** As a user, I want to browse intermediate data files produced during analysis, preview their contents, and download them individually. + +#### Acceptance Criteria + +1. THE Dashboard SHALL display a "数据文件" (Data Files) tab as the second tab. +2. WHEN the Data_Files_Tab is active, THE Dashboard SHALL fetch the file list from `GET /api/data-files` and render each file as a card showing: filename, description, and row count. +3. WHEN a user clicks on a file card, THE Dashboard SHALL fetch the preview from `GET /api/data-files/preview` and display a table showing column headers and up to 5 data rows. +4. WHEN a user clicks the download button on a file card, THE Dashboard SHALL initiate a file download via `GET /api/data-files/download`. +5. WHILE analysis is running, THE Dashboard SHALL refresh the file list on each polling cycle to show newly created files. + +### Requirement 9: Gallery Removal and Inline Images in Report + +**User Story:** As a user, I want images displayed inline within report paragraphs instead of in a separate Gallery tab, so that visual evidence is presented in context. + +#### Acceptance Criteria + +1. THE Dashboard SHALL remove the "Gallery" tab from the tab bar. +2. THE Dashboard SHALL remove the gallery carousel UI (carousel container, navigation buttons, image info panel) from the HTML. +3. THE Report_Tab SHALL render images inline within report paragraphs using standard Markdown image syntax (``), as already supported by the existing `marked.js` rendering. +4. THE `switchTab` function in `script.js` SHALL handle only the three new tab identifiers: `execution`, `datafiles`, and `report`. +5. THE frontend SHALL remove all gallery-related JavaScript functions (`loadGallery`, `renderGalleryImage`, `prevImage`, `nextImage`) and associated state variables (`galleryImages`, `currentImageIndex`). + +### Requirement 10: Supporting Data Button in Report + +**User Story:** As a user, I want report paragraphs that make data-driven claims to have a "查看支撑数据" button, so that I can view the evidence data that supports each conclusion. + +#### Acceptance Criteria + +1. WHEN the Report_Tab renders a paragraph of type `text` that has associated data evidence, THE Dashboard SHALL display a "查看支撑数据" (View Supporting Data) button below the paragraph content. +2. WHEN a user clicks the "查看支撑数据" button, THE Dashboard SHALL display a popover or modal showing the associated data evidence rows in a table format. +3. THE `GET /api/report` response SHALL include a `supporting_data` mapping (keyed by paragraph ID) containing the data evidence rows relevant to each paragraph. +4. IF a paragraph has no associated data evidence, THEN THE Dashboard SHALL not display the "查看支撑数据" button for that paragraph. + +### Requirement 11: Report-to-Evidence Linking in Backend + +**User Story:** As a backend developer, I want the system to associate data evidence from execution rounds with report paragraphs, so that the frontend can display supporting data buttons. + +#### Acceptance Criteria + +1. WHEN generating the final report, THE DataAnalysisAgent SHALL pass the collected data evidence from all rounds to the report generation prompt. +2. THE final report generation prompt SHALL instruct the LLM to annotate report paragraphs with round references (e.g., ``) when a paragraph's content is derived from a specific analysis round. +3. WHEN the `GET /api/report` endpoint parses the report, THE backend SHALL extract evidence annotations and build a `supporting_data` mapping by looking up the referenced round's data evidence from the SessionData. +4. IF a paragraph contains no evidence annotation, THEN THE backend SHALL exclude that paragraph from the `supporting_data` mapping. + +### Requirement 12: Session Data Model Extension + +**User Story:** As a backend developer, I want the SessionData model to store structured round data and data file metadata, so that the new API endpoints can serve this information. + +#### Acceptance Criteria + +1. THE SessionData class SHALL include a `rounds` attribute (list of Round_Data dictionaries) to store structured data for each completed analysis round. +2. THE SessionData class SHALL include a `data_files` attribute (list of file metadata dictionaries) to store information about intermediate data files. +3. WHEN a new data file is detected (via auto-detection or prompt-guided saving), THE DataAnalysisAgent SHALL append the file metadata to the SessionData `data_files` list. +4. THE SessionData class SHALL persist the `rounds` and `data_files` attributes to the session's `results.json` file upon analysis completion. diff --git a/.kiro/specs/analysis-dashboard-redesign/tasks.md b/.kiro/specs/analysis-dashboard-redesign/tasks.md new file mode 100644 index 0000000..83cf3bf --- /dev/null +++ b/.kiro/specs/analysis-dashboard-redesign/tasks.md @@ -0,0 +1,102 @@ +# Tasks: Analysis Dashboard Redesign + +## Phase 1: Backend Data Model + API Changes (Foundation) + +- [x] 1. Extend SessionData model + - [x] 1.1 Add `rounds: List[Dict]` attribute to `SessionData.__init__()` in `web/main.py`, initialized to empty list + - [x] 1.2 Add `data_files: List[Dict]` attribute to `SessionData.__init__()` in `web/main.py`, initialized to empty list + - [x] 1.3 Update `_reconstruct_session()` to load `rounds` and `data_files` from `results.json` when reconstructing historical sessions + - [x] 1.4 Update `run_analysis_task()` to persist `session.rounds` and `session.data_files` to `results.json` on analysis completion + +- [x] 2. Update Status API response + - [x] 2.1 Add `rounds` field to `GET /api/status` response dict, returning `session.rounds` + - [x] 2.2 Verify backward compatibility: ensure `log`, `is_running`, `has_report`, `progress_percentage`, `current_round`, `max_rounds`, `status_message` fields remain unchanged + +- [x] 3. Add Data Files API endpoints + - [x] 3.1 Implement `GET /api/data-files` endpoint: return `session.data_files` merged with fallback directory scan for CSV/XLSX files, each entry containing filename, description, rows, cols, size_bytes + - [x] 3.2 Implement `GET /api/data-files/preview` endpoint: read CSV/XLSX via pandas, return `{columns: [...], rows: [...first 5 rows as dicts...]}`; return 404 if file not found + - [x] 3.3 Implement `GET /api/data-files/download` endpoint: return `FileResponse` with correct MIME type (`text/csv` or `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`); return 404 if file not found + +- [x] 4. Enhance Report API for evidence linking + - [x] 4.1 Implement `_extract_evidence_annotations(paragraphs, session)` function: parse `` comments from paragraph content, look up `session.rounds[N-1].evidence_rows`, build `supporting_data` mapping keyed by paragraph ID + - [x] 4.2 Update `GET /api/report` to include `supporting_data` mapping in response JSON + +## Phase 2: CodeExecutor Enhancements + +- [x] 5. Add evidence capture to CodeExecutor + - [x] 5.1 In `execute_code()`, after successful execution, check if `result.result` is a DataFrame; if so, capture `result.result.head(10).to_dict(orient='records')` as `evidence_rows`; wrap in try/except returning empty list on failure + - [x] 5.2 Also check the last-assigned DataFrame variable in the namespace as a fallback evidence source when `result.result` is not a DataFrame + - [x] 5.3 Include `evidence_rows` key in the returned result dict + +- [x] 6. Add DataFrame auto-detection and export + - [x] 6.1 Before `shell.run_cell(code)`, snapshot DataFrame variables: `{name: id(obj) for name, obj in shell.user_ns.items() if isinstance(obj, pd.DataFrame)}` + - [x] 6.2 After execution, compare snapshots to detect new or changed DataFrame variables + - [x] 6.3 For each new DataFrame, export to `{output_dir}/{var_name}.csv` with numeric suffix deduplication if file exists + - [x] 6.4 Record metadata for each export: `{variable_name, filename, rows, cols, columns}` in `auto_exported_files` list + - [x] 6.5 Include `auto_exported_files` key in the returned result dict + +- [x] 7. Add DATA_FILE_SAVED marker parsing + - [x] 7.1 After execution, scan `captured.stdout` for lines matching `[DATA_FILE_SAVED] filename: {name}, rows: {count}, description: {desc}` + - [x] 7.2 Parse each marker line and record `{filename, rows, description}` in `prompt_saved_files` list + - [x] 7.3 Include `prompt_saved_files` key in the returned result dict + +## Phase 3: Agent Changes + +- [x] 8. Structured Round_Data construction in DataAnalysisAgent + - [x] 8.1 Add `_summarize_result(result)` method: produce one-line summary from execution result (e.g., "执行成功,输出 DataFrame (150行×8列)" or "执行失败: {error}") + - [x] 8.2 In `_handle_generate_code()`, construct `round_data` dict with fields: round, reasoning (from `yaml_data.get("reasoning", "")`), code, result_summary, evidence_rows, raw_log, auto_exported_files, prompt_saved_files + - [x] 8.3 After constructing round_data, append it to `SessionData.rounds` (via progress callback or direct reference) + - [x] 8.4 Merge file metadata from `auto_exported_files` and `prompt_saved_files` into `SessionData.data_files` + +- [x] 9. Update system prompts + - [x] 9.1 Add intermediate data saving instructions to `data_analysis_system_prompt` in `prompts.py`: instruct LLM to save intermediate results and print `[DATA_FILE_SAVED]` marker + - [x] 9.2 Add evidence annotation instructions to `final_report_system_prompt` in `prompts.py`: instruct LLM to add `` comments to report paragraphs + - [x] 9.3 Update `_build_final_report_prompt()` in `data_analysis_agent.py` to include collected evidence data from all rounds in the prompt context + +## Phase 4: Frontend Tab Restructuring + +- [x] 10. HTML restructuring + - [x] 10.1 In `index.html`, replace tab labels: "Live Log" → "执行过程", add "数据文件" tab, keep "Report"; remove "Gallery" tab + - [x] 10.2 Replace the `logsTab` div content with an Execution Process container (`executionTab`) containing a scrollable round-cards wrapper + - [x] 10.3 Add a `datafilesTab` div with a file-cards grid container and a preview panel area + - [x] 10.4 Remove the Gallery tab HTML: carousel container, navigation buttons, image info panel + +- [x] 11. JavaScript: Execution Process Tab + - [x] 11.1 Add `lastRenderedRound` state variable and `renderRoundCards(rounds)` function: compare `rounds.length` with `lastRenderedRound`, create and append new Round_Card DOM elements for new entries only + - [x] 11.2 Implement Round_Card HTML generation: collapsed state shows round number + result_summary; expanded state shows reasoning, code (collapsible), result_summary, evidence table ("本轮数据案例"), raw log (collapsible) + - [x] 11.3 Add click handler for Round_Card toggle (collapse/expand) + - [x] 11.4 Add auto-scroll logic: when analysis is running, scroll Execution Process container to bottom after appending new cards + +- [x] 12. JavaScript: Data Files Tab + - [x] 12.1 Implement `loadDataFiles()`: fetch `GET /api/data-files`, render file cards showing filename, description, row count + - [x] 12.2 Implement `previewDataFile(filename)`: fetch `GET /api/data-files/preview`, render a table with column headers and up to 5 rows + - [x] 12.3 Implement `downloadDataFile(filename)`: trigger download via `GET /api/data-files/download` + - [x] 12.4 In `startPolling()`, call `loadDataFiles()` on each polling cycle when Data Files tab is active or when analysis is running + +- [x] 13. JavaScript: Gallery removal and tab updates + - [x] 13.1 Remove gallery functions: `loadGallery`, `renderGalleryImage`, `prevImage`, `nextImage` and state variables `galleryImages`, `currentImageIndex` + - [x] 13.2 Update `switchTab()` to handle `execution`, `datafiles`, `report` identifiers instead of `logs`, `report`, `gallery` + - [x] 13.3 Update `startPolling()` to call `renderRoundCards()` with `data.rounds` on each polling cycle + +- [x] 14. JavaScript: Supporting data in Report + - [x] 14.1 Update `loadReport()` to store `supporting_data` mapping from API response + - [x] 14.2 Update `renderParagraphReport()` to add "查看支撑数据" button below paragraphs that have entries in `supporting_data` + - [x] 14.3 Implement `showSupportingData(paraId)`: display a popover/modal with evidence rows rendered as a table + +- [x] 15. CSS updates + - [x] 15.1 Add `.round-card`, `.round-card-header`, `.round-card-body`, `.round-card-collapsed`, `.round-card-expanded` styles + - [x] 15.2 Add `.data-file-card`, `.data-preview-table` styles + - [x] 15.3 Add `.supporting-data-btn`, `.supporting-data-popover` styles + - [x] 15.4 Remove `.carousel-*` styles (carousel-container, carousel-slide, carousel-btn, image-info, image-title, image-desc) + +## Phase 5: Property-Based Tests + +- [x] 16. Write property-based tests + - [x] 16.1 ~PBT~ Property 1: Round_Data structural completeness — generate random execution results, verify all required fields present with correct types and insertion order preserved + - [x] 16.2 ~PBT~ Property 2: Evidence capture bounded — generate random DataFrames (0-10000 rows, 1-50 cols), verify evidence_rows length <= 10 and each row dict has correct keys + - [x] 16.3 ~PBT~ Property 3: Filename deduplication — generate sequences of same-name exports (1-20), verify all filenames unique + - [x] 16.4 ~PBT~ Property 4: Auto-export metadata completeness — generate random DataFrames, verify metadata contains variable_name, filename, rows, cols, columns with correct values + - [x] 16.5 ~PBT~ Property 5: DATA_FILE_SAVED marker parsing round-trip — generate random filenames/rows/descriptions, verify parse(format(x)) == x + - [x] 16.6 ~PBT~ Property 6: Data file preview bounded rows — generate random CSVs (0-10000 rows), verify preview returns at most 5 rows with correct column names + - [x] 16.7 ~PBT~ Property 7: Evidence annotation parsing — generate random annotated Markdown, verify correct round extraction and non-annotated paragraph exclusion + - [x] 16.8 ~PBT~ Property 8: SessionData JSON round-trip — generate random rounds/data_files, verify serialize then deserialize produces equal data diff --git a/__init__.py b/__init__.py index ff71db5..b737d8c 100644 --- a/__init__.py +++ b/__init__.py @@ -5,9 +5,20 @@ Data Analysis Agent Package 一个基于LLM的智能数据分析代理,专门为Jupyter Notebook环境设计。 """ -from .core.notebook_agent import NotebookAgent -from .config.llm_config import LLMConfig -from .utils.code_executor import CodeExecutor +try: + from .core.notebook_agent import NotebookAgent +except (ImportError, ModuleNotFoundError): + NotebookAgent = None + +try: + from .config.llm_config import LLMConfig +except (ImportError, ModuleNotFoundError): + from config.llm_config import LLMConfig + +try: + from .utils.code_executor import CodeExecutor +except (ImportError, ModuleNotFoundError): + from utils.code_executor import CodeExecutor __version__ = "1.0.0" __author__ = "Data Analysis Agent Team" diff --git a/config/app_config.py b/config/app_config.py index 3e75b36..c95d76f 100644 --- a/config/app_config.py +++ b/config/app_config.py @@ -5,7 +5,7 @@ import os from dataclasses import dataclass, field -from typing import List, Optional +from typing import Optional @dataclass @@ -30,12 +30,6 @@ class AppConfig: # 代码执行配置 code_timeout: int = field(default=300) # 代码执行超时(秒) - allowed_imports: List[str] = field(default_factory=lambda: [ - 'pandas', 'numpy', 'matplotlib', 'seaborn', 'plotly', - 'scipy', 'sklearn', 'duckdb', 'datetime', 'json', - 'os', 're', 'pathlib', 'glob', 'typing', 'collections', - 'itertools', 'functools', 'warnings' - ]) # Web配置 web_host: str = field(default="0.0.0.0") diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..a4d552d --- /dev/null +++ b/conftest.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +"""Root conftest.py — configures pytest to find project modules.""" +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) diff --git a/data_analysis_agent.py b/data_analysis_agent.py index face4b7..4778e4f 100644 --- a/data_analysis_agent.py +++ b/data_analysis_agent.py @@ -10,20 +10,33 @@ import os import json +import re import yaml from typing import Dict, Any, List, Optional +from concurrent.futures import ThreadPoolExecutor, as_completed from utils.create_session_dir import create_session_output_dir from utils.format_execution_result import format_execution_result from utils.extract_code import extract_code_from_response -from utils.data_loader import load_and_profile_data +from utils.data_loader import load_and_profile_data, load_data_chunked, load_and_profile_data_smart from utils.llm_helper import LLMHelper from utils.code_executor import CodeExecutor from utils.script_generator import generate_reusable_script -from utils.data_privacy import build_safe_profile, build_local_profile, sanitize_execution_feedback +from utils.data_privacy import build_safe_profile, build_local_profile, sanitize_execution_feedback, generate_enriched_hint from config.llm_config import LLMConfig +from config.app_config import app_config from prompts import data_analysis_system_prompt, final_report_system_prompt, data_analysis_followup_prompt +# Regex patterns that indicate a data-context error (column/variable/DataFrame issues) +DATA_CONTEXT_PATTERNS = [ + r"KeyError:\s*['\"](.+?)['\"]", + r"ValueError.*(?:column|col|field)", + r"NameError.*(?:df|data|frame)", + r"(?:empty|no\s+data|0\s+rows)", + r"IndexError.*(?:out of range|out of bounds)", +] + + class DataAnalysisAgent: """ 数据分析智能体 @@ -66,6 +79,53 @@ class DataAnalysisAgent: self.data_profile_safe = "" # 存储安全画像(发给LLM) self.data_files = [] # 存储数据文件列表 self.user_requirement = "" # 存储用户需求 + self._progress_callback = None # 进度回调函数 + self._session_ref = None # Reference to SessionData for round tracking + + def set_session_ref(self, session): + """Set a reference to the SessionData instance for appending round data. + + Args: + session: The SessionData instance for the current analysis session. + """ + self._session_ref = session + + def set_progress_callback(self, callback): + """Set a callback function(current_round, max_rounds, message) for progress updates.""" + self._progress_callback = callback + + def _summarize_result(self, result: Dict[str, Any]) -> str: + """Produce a one-line summary from a code execution result. + + Args: + result: The execution result dict from CodeExecutor. + + Returns: + A concise summary string, e.g. "执行成功,输出 DataFrame (150行×8列)" + or "执行失败: KeyError: 'col_x'". + """ + if result.get("success"): + evidence_rows = result.get("evidence_rows", []) + if evidence_rows: + num_rows = len(evidence_rows) + num_cols = len(evidence_rows[0]) if evidence_rows else 0 + # Check auto_exported_files for more accurate row/col counts + auto_files = result.get("auto_exported_files", []) + if auto_files: + last_file = auto_files[-1] + num_rows = last_file.get("rows", num_rows) + num_cols = last_file.get("cols", num_cols) + return f"执行成功,输出 DataFrame ({num_rows}行×{num_cols}列)" + output = result.get("output", "") + if output: + first_line = output.strip().split("\n")[0][:80] + return f"执行成功: {first_line}" + return "执行成功" + else: + error = result.get("error", "未知错误") + if len(error) > 100: + error = error[:100] + "..." + return f"执行失败: {error}" def _process_response(self, response: str) -> Dict[str, Any]: """ @@ -181,6 +241,7 @@ class DataAnalysisAgent: """处理代码生成和执行动作""" # 从YAML数据中获取代码(更准确) code = yaml_data.get("code", "") + reasoning = yaml_data.get("reasoning", "") # 如果YAML中没有代码,尝试从响应中提取 if not code: @@ -190,7 +251,6 @@ class DataAnalysisAgent: if code: code = code.strip() if code.startswith("```"): - import re # 去除开头的 ```python 或 ``` code = re.sub(r"^```[a-zA-Z]*\n", "", code) # 去除结尾的 ``` @@ -211,6 +271,7 @@ class DataAnalysisAgent: return { "action": "generate_code", "code": code, + "reasoning": reasoning, "result": result, "feedback": feedback, "response": response, @@ -221,12 +282,146 @@ class DataAnalysisAgent: print("[WARN] 未从响应中提取到可执行代码,要求LLM重新生成") return { "action": "invalid_response", + "reasoning": reasoning, "error": "响应中缺少可执行代码", "response": response, "continue": True, } - def analyze(self, user_input: str, files: List[str] = None, session_output_dir: str = None, reset_session: bool = True, max_rounds: int = None) -> Dict[str, Any]: + def _classify_error(self, error_message: str) -> str: + """Classify execution error as data-context or other. + + Inspects the error message against DATA_CONTEXT_PATTERNS to determine + if the error is related to data context (missing columns, undefined + data variables, empty DataFrames, etc.). + + Args: + error_message: The error message string from code execution. + + Returns: + "data_context" if the error matches a data-context pattern, + "other" otherwise. + """ + for pattern in DATA_CONTEXT_PATTERNS: + if re.search(pattern, error_message, re.IGNORECASE): + return "data_context" + return "other" + + def _trim_conversation_history(self): + """Apply sliding window trimming to conversation history. + + Retains the first user message (original requirement + Safe_Profile) at + index 0, generates a compressed summary of old messages, and keeps only + the most recent ``conversation_window_size`` message pairs in full. + """ + window_size = app_config.conversation_window_size + max_messages = window_size * 2 # pairs of user+assistant messages + + if len(self.conversation_history) <= max_messages: + return # No trimming needed + + first_message = self.conversation_history[0] # Always retain + + # Determine trim boundary: skip first message + possible existing summary + start_idx = 1 + has_existing_summary = ( + len(self.conversation_history) > 1 + and self.conversation_history[1]["role"] == "user" + and self.conversation_history[1]["content"].startswith("[分析摘要]") + ) + if has_existing_summary: + start_idx = 2 + + # Messages to trim vs keep + messages_to_consider = self.conversation_history[start_idx:] + messages_to_trim = messages_to_consider[:-max_messages] + messages_to_keep = messages_to_consider[-max_messages:] + + if not messages_to_trim: + return + + # Generate summary of trimmed messages + summary = self._compress_trimmed_messages(messages_to_trim) + + # Rebuild history: first_message + summary + recent messages + self.conversation_history = [first_message] + if summary: + self.conversation_history.append({"role": "user", "content": summary}) + self.conversation_history.extend(messages_to_keep) + + def _compress_trimmed_messages(self, messages: list) -> str: + """Compress trimmed messages into a concise summary string. + + Extracts the action type from each assistant message and the execution + outcome (success / failure) from the subsequent user feedback message. + Code blocks and raw execution output are excluded. + + Args: + messages: List of conversation message dicts to compress. + + Returns: + A summary string prefixed with ``[分析摘要]``. + """ + summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"] + round_num = 0 + + for msg in messages: + content = msg["content"] + if msg["role"] == "assistant": + round_num += 1 + # Extract action type from YAML-like content + action = "generate_code" + if "action: \"collect_figures\"" in content or "action: collect_figures" in content: + action = "collect_figures" + elif "action: \"analysis_complete\"" in content or "action: analysis_complete" in content: + action = "analysis_complete" + summary_parts.append(f"- 轮次{round_num}: 动作={action}") + elif msg["role"] == "user" and "代码执行反馈" in content: + success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功" + if summary_parts and summary_parts[-1].startswith("- 轮次"): + summary_parts[-1] += f", 执行结果={success}" + + return "\n".join(summary_parts) + + def _profile_files_parallel(self, file_paths: list) -> tuple: + """Profile multiple files concurrently using ThreadPoolExecutor. + + Each file is profiled independently via ``build_safe_profile`` and + ``build_local_profile``. Results are collected and merged. If any + individual file fails, an error entry is included for that file and + profiling continues for the remaining files. + + Args: + file_paths: List of file paths to profile. + + Returns: + A tuple ``(safe_profile, local_profile)`` of merged markdown strings. + """ + max_workers = app_config.max_parallel_profiles + safe_profiles = [] + local_profiles = [] + + def profile_single(path): + safe = build_safe_profile([path]) + local = build_local_profile([path]) + return path, safe, local + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(profile_single, p): p for p in file_paths} + for future in as_completed(futures): + path = futures[future] + try: + _, safe, local = future.result() + safe_profiles.append(safe) + local_profiles.append(local) + except Exception as e: + error_entry = f"## 文件: {os.path.basename(path)}\n[ERROR] 分析失败: {e}\n\n" + safe_profiles.append(error_entry) + local_profiles.append(error_entry) + + return "\n".join(safe_profiles), "\n".join(local_profiles) + + def analyze(self, user_input: str, files: List[str] = None, session_output_dir: str = None, reset_session: bool = True, max_rounds: int = None, template_name: str = None) -> Dict[str, Any]: """ 开始分析流程 @@ -236,6 +431,7 @@ class DataAnalysisAgent: session_output_dir: 指定的会话输出目录(可选) reset_session: 是否重置会话 (True: 新开启分析; False: 在现有上下文中继续) max_rounds: 本次分析的最大轮数 (可选,如果不填则使用默认值) + template_name: 分析模板名称 (可选,如果提供则使用模板引导分析) Returns: 分析结果字典 @@ -244,6 +440,13 @@ class DataAnalysisAgent: # 确定本次运行的轮数限制 current_max_rounds = max_rounds if max_rounds is not None else self.max_rounds + # Template integration: prepend template prompt to user input if provided + if template_name: + from utils.analysis_templates import get_template + template = get_template(template_name) # Raises ValueError if invalid + template_prompt = template.get_full_prompt() + user_input = f"{template_prompt}\n\n{user_input}" + if reset_session: # --- 初始化新会话 --- self.conversation_history = [] @@ -272,11 +475,28 @@ class DataAnalysisAgent: if files: print("[SEARCH] 正在生成数据画像...") try: - data_profile_safe = build_safe_profile(files) - data_profile_local = build_local_profile(files) + if len(files) > 1: + # Parallel profiling for multiple files + data_profile_safe, data_profile_local = self._profile_files_parallel(files) + else: + data_profile_safe = build_safe_profile(files) + data_profile_local = build_local_profile(files) print("[OK] 数据画像生成完毕(安全级 + 本地级)") except Exception as e: print(f"[WARN] 数据画像生成失败: {e}") + + # Expose chunked iterators for large files in the Code_Executor namespace + for fp in files: + try: + if os.path.exists(fp): + file_size_mb = os.path.getsize(fp) / (1024 * 1024) + if file_size_mb > app_config.max_file_size_mb: + var_name = "chunked_iter_" + os.path.splitext(os.path.basename(fp))[0] + # Store a factory so the iterator can be re-created + self.executor.set_variable(var_name, lambda p=fp: load_data_chunked(p)) + print(f"[OK] 大文件 {os.path.basename(fp)} 的分块迭代器已注入为 {var_name}()") + except Exception as e: + print(f"[WARN] 注入分块迭代器失败 ({os.path.basename(fp)}): {e}") # 安全画像发给LLM,完整画像留给最终报告生成 self.data_profile = data_profile_local # 本地完整版用于最终报告 @@ -327,9 +547,23 @@ class DataAnalysisAgent: # 初始化连续失败计数器 consecutive_failures = 0 + # Per-round data-context retry counter + data_context_retries = 0 + last_retry_round = 0 while self.current_round < self.max_rounds: self.current_round += 1 + # Notify progress callback + if self._progress_callback: + self._progress_callback(self.current_round, self.max_rounds, f"第{self.current_round}/{self.max_rounds}轮分析中...") + # Reset data-context retry counter when entering a new round + if self.current_round != last_retry_round: + data_context_retries = 0 + + # Trim conversation history after the first round to bound token usage + if self.current_round > 1: + self._trim_conversation_history() + print(f"\n[LOOP] 第 {self.current_round} 轮分析") # 调用LLM生成响应 try: # 获取当前执行环境的变量信息 @@ -388,7 +622,40 @@ class DataAnalysisAgent: # 根据动作类型添加不同的反馈 if process_result["action"] == "generate_code": feedback = process_result.get("feedback", "") - # 对执行反馈进行脱敏,移除真实数据值后再发给LLM + result = process_result.get("result", {}) + execution_failed = not result.get("success", True) + + # --- Data-context retry logic --- + if execution_failed: + error_output = result.get("error", "") or feedback + error_class = self._classify_error(error_output) + + if error_class == "data_context" and data_context_retries < app_config.max_data_context_retries: + data_context_retries += 1 + last_retry_round = self.current_round + print(f"[RETRY] 数据上下文错误,重试 {data_context_retries}/{app_config.max_data_context_retries}") + # Generate enriched hint from safe profile + enriched_hint = generate_enriched_hint(error_output, self.data_profile_safe) + # Add enriched hint to conversation history (assistant response already added above) + self.conversation_history.append( + {"role": "user", "content": enriched_hint} + ) + # Record the failed attempt + self.analysis_results.append( + { + "round": self.current_round, + "code": process_result.get("code", ""), + "result": result, + "response": response, + "retry": True, + } + ) + # Retry within the same round: decrement round counter so the + # outer loop's increment brings us back to the same round number + self.current_round -= 1 + continue + + # Normal feedback path (no retry or non-data-context error or at limit) safe_feedback = sanitize_execution_feedback(feedback) self.conversation_history.append( {"role": "user", "content": f"代码执行反馈:\n{safe_feedback}"} @@ -403,6 +670,45 @@ class DataAnalysisAgent: "response": response, } ) + + # --- Construct Round_Data and append to session --- + result = process_result.get("result", {}) + round_data = { + "round": self.current_round, + "reasoning": process_result.get("reasoning", ""), + "code": process_result.get("code", ""), + "result_summary": self._summarize_result(result), + "evidence_rows": result.get("evidence_rows", []), + "raw_log": feedback, + "auto_exported_files": result.get("auto_exported_files", []), + "prompt_saved_files": result.get("prompt_saved_files", []), + } + + if self._session_ref: + self._session_ref.rounds.append(round_data) + # Merge file metadata into SessionData.data_files + for f in round_data.get("auto_exported_files", []): + if f.get("skipped"): + continue # Large DataFrame — not written to disk + self._session_ref.data_files.append({ + "filename": f.get("filename", ""), + "description": f"自动导出: {f.get('variable_name', '')}", + "rows": f.get("rows", 0), + "cols": f.get("cols", 0), + "columns": f.get("columns", []), + "size_bytes": 0, + "source": "auto", + }) + for f in round_data.get("prompt_saved_files", []): + self._session_ref.data_files.append({ + "filename": f.get("filename", ""), + "description": f.get("description", ""), + "rows": f.get("rows", 0), + "cols": 0, + "columns": [], + "size_bytes": 0, + "source": "prompt", + }) elif process_result["action"] == "collect_figures": # 记录图片收集结果 collected_figures = process_result.get("collected_figures", []) @@ -596,6 +902,23 @@ class DataAnalysisAgent: f"输出: {exec_result.get('output')[:]}\n\n" ) + # 构建各轮次证据数据摘要 + evidence_summary = "" + if self._session_ref and self._session_ref.rounds: + evidence_parts = [] + for rd in self._session_ref.rounds: + round_num = rd.get("round", 0) + summary = rd.get("result_summary", "") + evidence = rd.get("evidence_rows", []) + reasoning = rd.get("reasoning", "") + part = f"第{round_num}轮: {summary}" + if reasoning: + part += f"\n 推理: {reasoning[:200]}" + if evidence: + part += f"\n 数据样本({len(evidence)}行): {json.dumps(evidence[:3], ensure_ascii=False, default=str)}" + evidence_parts.append(part) + evidence_summary = "\n".join(evidence_parts) + # 使用 prompts.py 中的统一提示词模板,并添加相对路径使用说明 prompt = final_report_system_prompt.format( current_round=self.current_round, @@ -605,14 +928,24 @@ class DataAnalysisAgent: code_results_summary=code_results_summary, ) + # Append evidence data from all rounds for evidence annotation + if evidence_summary: + prompt += f""" + +**各轮次分析证据数据 (Evidence by Round)**: +以下是每轮分析的结果摘要和数据样本,请在报告中使用 `` 标注引用了哪一轮的数据: + +{evidence_summary} +""" + # 在提示词中明确要求使用相对路径 prompt += """ [FOLDER] **图片路径使用说明**: 报告和图片都在同一目录下,请在报告中使用相对路径引用图片: -- 格式: +- 格式: - 示例: -- 这样可以确保报告在不同环境下都能正确显示图片 +- 注意:必须使用实际生成的图片文件名,严禁使用占位符 """ return prompt diff --git a/prompts.py b/prompts.py index 57a6238..f43f4df 100644 --- a/prompts.py +++ b/prompts.py @@ -56,6 +56,16 @@ jupyter notebook环境当前变量: - **N-gram提取**:必须使用 `CountVectorizer(ngram_range=(2, 3))` 提取短语(如 "remote control")。 - **停用词过滤**:必须构建 `stop_words` 列表,剔除年份(2025)、通用动词(fix, check)、通用介词(the, for)等。 +**4. 中间数据保存规则**: +- 当你生成了有价值的中间数据(筛选子集、聚合表、聚类结果等),请主动保存为CSV/XLSX文件。 +- 保存后必须打印标记行:`[DATA_FILE_SAVED] filename: {{文件名}}, rows: {{行数}}, description: {{描述}}` +- 示例: + ```python + top_issues.to_csv(os.path.join(session_output_dir, "TOP问题汇总.csv"), index=False) + print(f"[DATA_FILE_SAVED] filename: TOP问题汇总.csv, rows: {{len(top_issues)}}, description: 各类型TOP问题聚合统计") + ``` +- 这些文件会自动出现在"数据文件"面板中,方便用户浏览和下载。 + --- **标准化分析SOP (Standard Operating Procedure)**: @@ -150,7 +160,7 @@ final_report_system_prompt = """你是一位**资深数据分析专家 (Senior D - **可视化证据链 (Visual Evidence)**: {figures_summary} -> **警告**:你必须仔细检查上述列表。如果在 `figures_summary` 中列出了图表,你的报告中就必须引用它。**严禁遗漏任何已生成的图表**。引用格式必须为 ``。 +> **警告**:你必须仔细检查上述列表。如果在 `figures_summary` 中列出了图表,你的报告中就必须引用它。**严禁遗漏任何已生成的图表**。引用格式必须为 ``。 ### 报告核心要求 1. **角色定位**: @@ -160,6 +170,11 @@ final_report_system_prompt = """你是一位**资深数据分析专家 (Senior D - **禁止**:使用第一人称(我、我们)、使用模糊推测词(大概、可能)。 - **强制**:客观陈述事实,使用专业术语(同比、环比、占比、TOPN),结论要有数据支撑。 3. **结构化输出**:必须严格遵守下方的 5 章节结构,确保逻辑严密。 +4. **证据标注规则**: + - 当报告段落的结论来源于某一轮分析的数据,请在段落末尾添加HTML注释标注:`` + - N 为产生该数据的分析轮次编号(从1开始) + - 示例:某段落描述了第3轮分析发现的车型分布规律,则在段落末尾添加 `` + - 这些标注不会在报告中显示,但会被系统用于关联支撑数据 ### 报告结构模板使用说明 (Template Instructions) - **固定格式 (Format)**:所有的 Markdown 标题 (`#`, `##`)、列表项前缀 (`- **...**`)、表格表头是必须保留的**骨架**。 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f0e4c8a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.pytest.ini_options] +pythonpath = ["."] +testpaths = ["tests"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..65140f2 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# tests package diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d491ceb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +""" +Conftest for property-based tests. +Ensures the project root is on sys.path for direct module imports. +""" +import sys +import os + +# Add project root to sys.path so we can import modules directly +# (e.g., `from config.app_config import AppConfig`) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) diff --git a/tests/test_dashboard_properties.py b/tests/test_dashboard_properties.py new file mode 100644 index 0000000..e10034d --- /dev/null +++ b/tests/test_dashboard_properties.py @@ -0,0 +1,651 @@ +# -*- coding: utf-8 -*- +""" +Property-based tests for analysis-dashboard-redesign features. +Uses hypothesis with max_examples=100 as specified in the design document. + +Run: python -m pytest tests/test_dashboard_properties.py -v +""" + +import os +import sys +import re +import json +import tempfile + +# Ensure project root is on path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest +import pandas as pd +from hypothesis import given, settings, assume +from hypothesis import strategies as st +from hypothesis.extra.pandas import column, data_frames, range_indexes + + +# --------------------------------------------------------------------------- +# Helpers / Strategies +# --------------------------------------------------------------------------- + +# Strategy for generating random execution results (success or failure) +execution_result_st = st.fixed_dictionaries({ + "success": st.booleans(), + "output": st.text(min_size=0, max_size=200), + "error": st.text(min_size=0, max_size=200), + "variables": st.just({}), + "evidence_rows": st.lists( + st.dictionaries( + keys=st.text(min_size=1, max_size=10, alphabet="abcdefghijklmnopqrstuvwxyz"), + values=st.one_of(st.integers(), st.text(min_size=0, max_size=20), st.floats(allow_nan=False)), + min_size=1, + max_size=5, + ), + min_size=0, + max_size=10, + ), + "auto_exported_files": st.just([]), + "prompt_saved_files": st.just([]), +}) + +# Strategy for reasoning text (may be empty, simulating missing YAML field) +reasoning_st = st.one_of(st.just(""), st.text(min_size=1, max_size=200)) + +# Strategy for code text +code_st = st.text(min_size=1, max_size=500, alphabet="abcdefghijklmnopqrstuvwxyz0123456789 =()._\n") + +# Strategy for feedback/raw_log text +feedback_st = st.text(min_size=0, max_size=300) + + +def build_round_data(round_num, reasoning, code, result, feedback): + """Construct a Round_Data dict the same way DataAnalysisAgent._handle_generate_code does.""" + def summarize_result(r): + if r.get("success"): + evidence_rows = r.get("evidence_rows", []) + if evidence_rows: + num_rows = len(evidence_rows) + num_cols = len(evidence_rows[0]) if evidence_rows else 0 + return f"执行成功,输出 DataFrame ({num_rows}行×{num_cols}列)" + output = r.get("output", "") + if output: + first_line = output.strip().split("\n")[0][:80] + return f"执行成功: {first_line}" + return "执行成功" + else: + error = r.get("error", "未知错误") + if len(error) > 100: + error = error[:100] + "..." + return f"执行失败: {error}" + + return { + "round": round_num, + "reasoning": reasoning, + "code": code, + "result_summary": summarize_result(result), + "evidence_rows": result.get("evidence_rows", []), + "raw_log": feedback, + "auto_exported_files": result.get("auto_exported_files", []), + "prompt_saved_files": result.get("prompt_saved_files", []), + } + + +# Regex for parsing DATA_FILE_SAVED markers (same as CodeExecutor) +_DATA_FILE_SAVED_RE = re.compile( + r"\[DATA_FILE_SAVED\]\s*filename:\s*(.+?),\s*rows:\s*(\d+),\s*description:\s*(.+)" +) + + +def parse_data_file_saved_markers(stdout_text): + """Parse [DATA_FILE_SAVED] marker lines — mirrors CodeExecutor._parse_data_file_saved_markers.""" + results = [] + for line in stdout_text.splitlines(): + m = _DATA_FILE_SAVED_RE.search(line) + if m: + results.append({ + "filename": m.group(1).strip(), + "rows": int(m.group(2)), + "description": m.group(3).strip(), + }) + return results + + +# Evidence annotation regex (same as web/main.py) +_EVIDENCE_PATTERN = re.compile(r"") + + +def split_report_to_paragraphs(markdown_content): + """Mirrors _split_report_to_paragraphs from web/main.py.""" + lines = markdown_content.split("\n") + paragraphs = [] + current_block = [] + current_type = "text" + para_id = 0 + + def flush_block(): + nonlocal para_id, current_block, current_type + text = "\n".join(current_block).strip() + if text: + paragraphs.append({ + "id": f"p-{para_id}", + "type": current_type, + "content": text, + }) + para_id += 1 + current_block = [] + current_type = "text" + + in_table = False + in_code = False + + for line in lines: + stripped = line.strip() + + if stripped.startswith("```"): + if in_code: + current_block.append(line) + flush_block() + in_code = False + continue + else: + flush_block() + current_block.append(line) + current_type = "code" + in_code = True + continue + + if in_code: + current_block.append(line) + continue + + if re.match(r"^#{1,6}\s", stripped): + flush_block() + current_block.append(line) + current_type = "heading" + flush_block() + continue + + if re.match(r"^!\[.*\]\(.*\)", stripped): + flush_block() + current_block.append(line) + current_type = "image" + flush_block() + continue + + if stripped.startswith("|"): + if not in_table: + flush_block() + in_table = True + current_type = "table" + current_block.append(line) + continue + else: + if in_table: + flush_block() + in_table = False + + if not stripped: + flush_block() + continue + + current_block.append(line) + + flush_block() + return paragraphs + + +def extract_evidence_annotations(paragraphs, rounds): + """Mirrors _extract_evidence_annotations from web/main.py, using a rounds list instead of session.""" + supporting_data = {} + for para in paragraphs: + content = para.get("content", "") + match = _EVIDENCE_PATTERN.search(content) + if match: + round_num = int(match.group(1)) + idx = round_num - 1 + if 0 <= idx < len(rounds): + evidence_rows = rounds[idx].get("evidence_rows", []) + if evidence_rows: + supporting_data[para["id"]] = evidence_rows + return supporting_data + + + +# =========================================================================== +# Property 1: Round_Data Structural Completeness (Task 16.1) +# Feature: analysis-dashboard-redesign, Property 1: Round_Data structural completeness +# Validates: Requirements 1.1, 1.3, 1.4 +# =========================================================================== + +ROUND_DATA_REQUIRED_FIELDS = { + "round": int, + "reasoning": str, + "code": str, + "result_summary": str, + "evidence_rows": list, + "raw_log": str, +} + + +@settings(max_examples=100) +@given( + num_rounds=st.integers(min_value=1, max_value=20), + results=st.lists(execution_result_st, min_size=1, max_size=20), + reasonings=st.lists(reasoning_st, min_size=1, max_size=20), + codes=st.lists(code_st, min_size=1, max_size=20), + feedbacks=st.lists(feedback_st, min_size=1, max_size=20), +) +def test_prop1_round_data_structural_completeness(num_rounds, results, reasonings, codes, feedbacks): + """Round_Data objects must contain all required fields with correct types and preserve insertion order. + + **Validates: Requirements 1.1, 1.3, 1.4** + """ + # Build a list of rounds using the same number of entries + count = min(num_rounds, len(results), len(reasonings), len(codes), len(feedbacks)) + rounds_list = [] + for i in range(count): + rd = build_round_data(i + 1, reasonings[i], codes[i], results[i], feedbacks[i]) + rounds_list.append(rd) + + # Verify all required fields present with correct types + for rd in rounds_list: + for field, expected_type in ROUND_DATA_REQUIRED_FIELDS.items(): + assert field in rd, f"Missing field: {field}" + assert isinstance(rd[field], expected_type), ( + f"Field '{field}' expected {expected_type.__name__}, got {type(rd[field]).__name__}" + ) + + # Verify insertion order preserved + for i in range(len(rounds_list) - 1): + assert rounds_list[i]["round"] <= rounds_list[i + 1]["round"], ( + f"Insertion order violated: round {rounds_list[i]['round']} > {rounds_list[i + 1]['round']}" + ) + + +# =========================================================================== +# Property 2: Evidence Capture Bounded (Task 16.2) +# Feature: analysis-dashboard-redesign, Property 2: Evidence capture bounded +# Validates: Requirements 4.1, 4.2, 4.3 +# =========================================================================== + +# Strategy for generating random DataFrames with 0-10000 rows and 1-50 columns +col_name_st = st.text( + min_size=1, max_size=10, + alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz_"), +).filter(lambda s: s[0] != "_") # column names shouldn't start with _ + + +@settings(max_examples=100) +@given( + num_rows=st.integers(min_value=0, max_value=10000), + num_cols=st.integers(min_value=1, max_value=50), +) +def test_prop2_evidence_capture_bounded(num_rows, num_cols): + """Evidence capture must return at most 10 rows with keys matching DataFrame columns. + + **Validates: Requirements 4.1, 4.2, 4.3** + """ + # Generate a DataFrame with the given dimensions + import numpy as np + columns = [f"col_{i}" for i in range(num_cols)] + if num_rows == 0: + df = pd.DataFrame(columns=columns) + else: + data = np.random.randint(0, 100, size=(num_rows, num_cols)) + df = pd.DataFrame(data, columns=columns) + + # Simulate the evidence capture logic: df.head(10).to_dict(orient='records') + evidence_rows = df.head(10).to_dict(orient="records") + + # Verify length constraints + assert len(evidence_rows) <= 10 + assert len(evidence_rows) == min(10, len(df)) + + # Verify each row dict has keys matching the DataFrame's column names + expected_keys = set(df.columns) + for row in evidence_rows: + assert set(row.keys()) == expected_keys + + +# =========================================================================== +# Property 3: Filename Deduplication (Task 16.3) +# Feature: analysis-dashboard-redesign, Property 3: Filename deduplication +# Validates: Requirements 5.3 +# =========================================================================== + +@settings(max_examples=100) +@given( + num_exports=st.integers(min_value=1, max_value=20), + var_name=st.text(min_size=1, max_size=20, alphabet="abcdefghijklmnopqrstuvwxyz_0123456789").filter( + lambda s: s[0].isalpha() + ), +) +def test_prop3_filename_deduplication(num_exports, var_name): + """All generated filenames from same-name exports must be unique. + + **Validates: Requirements 5.3** + """ + output_dir = tempfile.mkdtemp() + generated_filenames = [] + + for _ in range(num_exports): + # Simulate _export_dataframe dedup logic + base_filename = f"{var_name}.csv" + filepath = os.path.join(output_dir, base_filename) + + if os.path.exists(filepath): + suffix = 1 + while True: + dedup_filename = f"{var_name}_{suffix}.csv" + filepath = os.path.join(output_dir, dedup_filename) + if not os.path.exists(filepath): + base_filename = dedup_filename + break + suffix += 1 + + # Create the file to simulate the export + with open(filepath, "w") as f: + f.write("dummy") + + generated_filenames.append(base_filename) + + # Verify all filenames are unique + assert len(generated_filenames) == len(set(generated_filenames)), ( + f"Duplicate filenames found: {generated_filenames}" + ) + + +# =========================================================================== +# Property 4: Auto-Export Metadata Completeness (Task 16.4) +# Feature: analysis-dashboard-redesign, Property 4: Auto-export metadata completeness +# Validates: Requirements 5.4, 5.5 +# =========================================================================== + +@settings(max_examples=100) +@given( + var_name=st.text(min_size=1, max_size=20, alphabet="abcdefghijklmnopqrstuvwxyz_0123456789").filter( + lambda s: s[0].isalpha() + ), + num_rows=st.integers(min_value=0, max_value=1000), + num_cols=st.integers(min_value=1, max_value=50), +) +def test_prop4_auto_export_metadata_completeness(var_name, num_rows, num_cols): + """Auto-export metadata must contain all required fields with correct values. + + **Validates: Requirements 5.4, 5.5** + """ + import numpy as np + output_dir = tempfile.mkdtemp() + columns = [f"col_{i}" for i in range(num_cols)] + + if num_rows == 0: + df = pd.DataFrame(columns=columns) + else: + data = np.random.randint(0, 100, size=(num_rows, num_cols)) + df = pd.DataFrame(data, columns=columns) + + # Simulate _export_dataframe logic + base_filename = f"{var_name}.csv" + filepath = os.path.join(output_dir, base_filename) + + if os.path.exists(filepath): + suffix = 1 + while True: + dedup_filename = f"{var_name}_{suffix}.csv" + filepath = os.path.join(output_dir, dedup_filename) + if not os.path.exists(filepath): + base_filename = dedup_filename + break + suffix += 1 + + df.to_csv(filepath, index=False) + metadata = { + "variable_name": var_name, + "filename": base_filename, + "rows": len(df), + "cols": len(df.columns), + "columns": list(df.columns), + } + + # Verify all required fields present + for field in ("variable_name", "filename", "rows", "cols", "columns"): + assert field in metadata, f"Missing field: {field}" + + # Verify values match the source DataFrame + assert metadata["rows"] == len(df) + assert metadata["cols"] == len(df.columns) + assert metadata["columns"] == list(df.columns) + assert metadata["variable_name"] == var_name + + + +# =========================================================================== +# Property 5: DATA_FILE_SAVED Marker Parsing Round-Trip (Task 16.5) +# Feature: analysis-dashboard-redesign, Property 5: DATA_FILE_SAVED marker parsing round-trip +# Validates: Requirements 6.3 +# =========================================================================== + +# Strategy for filenames: alphanumeric + Chinese + underscores + hyphens, with extension +filename_base_st = st.text( + min_size=1, + max_size=30, + alphabet=st.sampled_from( + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "_-" + "数据分析结果汇总报告" + ), +).filter(lambda s: len(s.strip()) > 0 and "," not in s) + +filename_ext_st = st.sampled_from([".csv", ".xlsx"]) + +filename_st = st.builds(lambda base, ext: base.strip() + ext, filename_base_st, filename_ext_st) + +description_st = st.text( + min_size=1, + max_size=100, + alphabet=st.sampled_from( + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789 " + "各类型问题聚合统计分析结果" + ), +).filter(lambda s: len(s.strip()) > 0) + + +@settings(max_examples=100) +@given( + filename=filename_st, + rows=st.integers(min_value=1, max_value=1000000), + description=description_st, +) +def test_prop5_data_file_saved_marker_round_trip(filename, rows, description): + """Formatting then parsing a DATA_FILE_SAVED marker must recover original values. + + **Validates: Requirements 6.3** + """ + # Format the marker + marker = f"[DATA_FILE_SAVED] filename: {filename}, rows: {rows}, description: {description}" + + # Parse using the same logic as CodeExecutor + parsed = parse_data_file_saved_markers(marker) + + assert len(parsed) == 1, f"Expected 1 parsed result, got {len(parsed)}" + assert parsed[0]["filename"] == filename.strip() + assert parsed[0]["rows"] == rows + assert parsed[0]["description"] == description.strip() + + +# =========================================================================== +# Property 6: Data File Preview Bounded Rows (Task 16.6) +# Feature: analysis-dashboard-redesign, Property 6: Data file preview bounded rows +# Validates: Requirements 7.2 +# =========================================================================== + +@settings(max_examples=100) +@given( + num_rows=st.integers(min_value=0, max_value=10000), + num_cols=st.integers(min_value=1, max_value=50), +) +def test_prop6_data_file_preview_bounded_rows(num_rows, num_cols): + """Preview of a CSV file must return at most 5 rows with correct column names. + + **Validates: Requirements 7.2** + """ + import numpy as np + columns = [f"col_{i}" for i in range(num_cols)] + + if num_rows == 0: + df = pd.DataFrame(columns=columns) + else: + data = np.random.randint(0, 100, size=(num_rows, num_cols)) + df = pd.DataFrame(data, columns=columns) + + # Write to a temp CSV file + tmp_dir = tempfile.mkdtemp() + csv_path = os.path.join(tmp_dir, "test_data.csv") + df.to_csv(csv_path, index=False) + + # Read back using the same logic as the preview endpoint + preview_df = pd.read_csv(csv_path, nrows=5) + + # Verify at most 5 rows + assert len(preview_df) <= 5 + assert len(preview_df) == min(5, num_rows) + + # Verify column names match exactly + assert list(preview_df.columns) == columns + + +# =========================================================================== +# Property 7: Evidence Annotation Parsing (Task 16.7) +# Feature: analysis-dashboard-redesign, Property 7: Evidence annotation parsing +# Validates: Requirements 11.3, 11.4 +# =========================================================================== + +# Strategy for generating paragraphs with/without evidence annotations +annotated_paragraph_st = st.builds( + lambda text, round_num: f"{text} ", + st.text(min_size=1, max_size=100, alphabet="abcdefghijklmnopqrstuvwxyz .,!"), + st.integers(min_value=1, max_value=100), +) + +plain_paragraph_st = st.text( + min_size=1, + max_size=100, + alphabet="abcdefghijklmnopqrstuvwxyz .,!", +).filter(lambda s: "evidence:" not in s and len(s.strip()) > 0) + + +@settings(max_examples=100) +@given( + annotated=st.lists(annotated_paragraph_st, min_size=0, max_size=10), + plain=st.lists(plain_paragraph_st, min_size=0, max_size=10), +) +def test_prop7_evidence_annotation_parsing(annotated, plain): + """Annotated paragraphs must be correctly extracted; non-annotated must be excluded. + + **Validates: Requirements 11.3, 11.4** + """ + assume(len(annotated) + len(plain) > 0) + + # Build markdown by interleaving annotated and plain paragraphs + all_paragraphs = [] + for p in annotated: + all_paragraphs.append(("annotated", p)) + for p in plain: + all_paragraphs.append(("plain", p)) + + # Build markdown content with blank lines between paragraphs + markdown = "\n\n".join(text for _, text in all_paragraphs) + + # Parse into paragraphs + paragraphs = split_report_to_paragraphs(markdown) + + # Build fake rounds data (up to 100 rounds, each with some evidence) + rounds = [ + {"evidence_rows": [{"key": f"value_{i}"}]} + for i in range(100) + ] + + # Extract evidence annotations + supporting_data = extract_evidence_annotations(paragraphs, rounds) + + # Verify: annotated paragraphs with valid round numbers should be in supporting_data + for para in paragraphs: + content = para.get("content", "") + match = _EVIDENCE_PATTERN.search(content) + if match: + round_num = int(match.group(1)) + idx = round_num - 1 + if 0 <= idx < len(rounds) and rounds[idx].get("evidence_rows"): + assert para["id"] in supporting_data, ( + f"Annotated paragraph {para['id']} with round {round_num} not in supporting_data" + ) + else: + # Non-annotated paragraphs must NOT be in supporting_data + assert para["id"] not in supporting_data, ( + f"Non-annotated paragraph {para['id']} should not be in supporting_data" + ) + + +# =========================================================================== +# Property 8: SessionData JSON Round-Trip (Task 16.8) +# Feature: analysis-dashboard-redesign, Property 8: SessionData JSON round-trip +# Validates: Requirements 12.4 +# =========================================================================== + +# Strategy for Round_Data dicts +round_data_st = st.fixed_dictionaries({ + "round": st.integers(min_value=1, max_value=100), + "reasoning": st.text(min_size=0, max_size=200), + "code": st.text(min_size=0, max_size=200), + "result_summary": st.text(min_size=0, max_size=200), + "evidence_rows": st.lists( + st.dictionaries( + keys=st.text(min_size=1, max_size=10, alphabet="abcdefghijklmnopqrstuvwxyz"), + values=st.one_of( + st.integers(min_value=-1000, max_value=1000), + st.text(min_size=0, max_size=20), + ), + min_size=0, + max_size=5, + ), + min_size=0, + max_size=10, + ), + "raw_log": st.text(min_size=0, max_size=200), +}) + +# Strategy for file metadata dicts +file_metadata_st = st.fixed_dictionaries({ + "filename": st.text(min_size=1, max_size=30, alphabet="abcdefghijklmnopqrstuvwxyz0123456789_."), + "description": st.text(min_size=0, max_size=100), + "rows": st.integers(min_value=0, max_value=100000), + "cols": st.integers(min_value=0, max_value=100), + "columns": st.lists(st.text(min_size=1, max_size=10, alphabet="abcdefghijklmnopqrstuvwxyz"), max_size=10), + "size_bytes": st.integers(min_value=0, max_value=10000000), + "source": st.sampled_from(["auto", "prompt"]), +}) + + +@settings(max_examples=100) +@given( + rounds=st.lists(round_data_st, min_size=0, max_size=20), + data_files=st.lists(file_metadata_st, min_size=0, max_size=20), +) +def test_prop8_session_data_json_round_trip(rounds, data_files): + """Serializing rounds and data_files to JSON and back must produce equal data. + + **Validates: Requirements 12.4** + """ + data = { + "rounds": rounds, + "data_files": data_files, + } + + # Serialize using the same approach as the codebase + serialized = json.dumps(data, default=str) + deserialized = json.loads(serialized) + + assert deserialized["rounds"] == rounds + assert deserialized["data_files"] == data_files diff --git a/tests/test_phase1.py b/tests/test_phase1.py new file mode 100644 index 0000000..b76e8c5 --- /dev/null +++ b/tests/test_phase1.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for Phase 1: Backend Data Model + API Changes + +Run: python -m pytest tests/test_phase1.py -v +""" + +import os +import sys +import json +import tempfile + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest + + +# =========================================================================== +# Task 1: SessionData model extension +# =========================================================================== + +class TestSessionDataExtension: + def test_rounds_initialized_to_empty_list(self): + """Task 1.1: rounds attribute exists and defaults to []""" + from web.main import SessionData + session = SessionData("test-id") + assert hasattr(session, "rounds") + assert session.rounds == [] + assert isinstance(session.rounds, list) + + def test_data_files_initialized_to_empty_list(self): + """Task 1.2: data_files attribute exists and defaults to []""" + from web.main import SessionData + session = SessionData("test-id") + assert hasattr(session, "data_files") + assert session.data_files == [] + assert isinstance(session.data_files, list) + + def test_existing_fields_unchanged(self): + """Existing SessionData fields still work.""" + from web.main import SessionData + session = SessionData("test-id") + assert session.session_id == "test-id" + assert session.is_running is False + assert session.analysis_results == [] + assert session.current_round == 0 + assert session.max_rounds == 20 + + def test_reconstruct_session_loads_rounds_and_data_files(self): + """Task 1.3: _reconstruct_session loads rounds and data_files from results.json""" + from web.main import SessionManager + + with tempfile.TemporaryDirectory() as tmpdir: + session_dir = os.path.join(tmpdir, "session_test123") + os.makedirs(session_dir) + + test_rounds = [{"round": 1, "reasoning": "test", "code": "x=1"}] + test_data_files = [{"filename": "out.csv", "rows": 10}] + results = { + "analysis_results": [{"round": 1}], + "rounds": test_rounds, + "data_files": test_data_files, + } + with open(os.path.join(session_dir, "results.json"), "w") as f: + json.dump(results, f) + + sm = SessionManager() + session = sm._reconstruct_session("test123", session_dir) + + assert session.rounds == test_rounds + assert session.data_files == test_data_files + assert session.analysis_results == [{"round": 1}] + + def test_reconstruct_session_legacy_format(self): + """Task 1.3: _reconstruct_session handles legacy list format gracefully""" + from web.main import SessionManager + + with tempfile.TemporaryDirectory() as tmpdir: + session_dir = os.path.join(tmpdir, "session_legacy") + os.makedirs(session_dir) + + # Legacy format: results.json is a plain list + legacy_results = [{"round": 1, "code": "x=1"}] + with open(os.path.join(session_dir, "results.json"), "w") as f: + json.dump(legacy_results, f) + + sm = SessionManager() + session = sm._reconstruct_session("legacy", session_dir) + + assert session.analysis_results == legacy_results + assert session.rounds == [] + assert session.data_files == [] + + +# =========================================================================== +# Task 2: Status API response +# =========================================================================== + +class TestStatusAPIResponse: + def test_status_response_contains_rounds(self): + """Task 2.1: GET /api/status response includes rounds field""" + from web.main import SessionData, session_manager + + session = SessionData("status-test") + session.rounds = [{"round": 1, "reasoning": "r1"}] + with session_manager.lock: + session_manager.sessions["status-test"] = session + + try: + # Simulate what the endpoint returns + response = { + "is_running": session.is_running, + "log": "", + "has_report": session.generated_report is not None, + "rounds": session.rounds, + "current_round": session.current_round, + "max_rounds": session.max_rounds, + "progress_percentage": session.progress_percentage, + "status_message": session.status_message, + } + assert "rounds" in response + assert response["rounds"] == [{"round": 1, "reasoning": "r1"}] + finally: + with session_manager.lock: + del session_manager.sessions["status-test"] + + def test_status_backward_compat_fields(self): + """Task 2.2: Existing fields remain unchanged""" + from web.main import SessionData + + session = SessionData("compat-test") + session.status_message = "分析中" + session.progress_percentage = 50.0 + session.current_round = 5 + session.max_rounds = 20 + + response = { + "is_running": session.is_running, + "log": "", + "has_report": session.generated_report is not None, + "progress_percentage": session.progress_percentage, + "current_round": session.current_round, + "max_rounds": session.max_rounds, + "status_message": session.status_message, + "rounds": session.rounds, + } + + assert response["is_running"] is False + assert response["has_report"] is False + assert response["progress_percentage"] == 50.0 + assert response["current_round"] == 5 + assert response["max_rounds"] == 20 + assert response["status_message"] == "分析中" + assert "log" in response + + +# =========================================================================== +# Task 4: Evidence extraction +# =========================================================================== + +class TestEvidenceExtraction: + def test_extract_evidence_basic(self): + """Task 4.1: Parse evidence annotations and build supporting_data""" + from web.main import _extract_evidence_annotations, SessionData + + session = SessionData("ev-test") + session.rounds = [ + {"round": 1, "evidence_rows": [{"col": "val1"}]}, + {"round": 2, "evidence_rows": [{"col": "val2"}]}, + ] + + paragraphs = [ + {"id": "p-0", "type": "text", "content": "Some intro text"}, + {"id": "p-1", "type": "text", "content": "Analysis result "}, + {"id": "p-2", "type": "text", "content": "More analysis "}, + ] + + result = _extract_evidence_annotations(paragraphs, session) + + assert "p-0" not in result # no annotation + assert result["p-1"] == [{"col": "val1"}] + assert result["p-2"] == [{"col": "val2"}] + + def test_extract_evidence_no_annotations(self): + """Task 4.1: No annotations means empty mapping""" + from web.main import _extract_evidence_annotations, SessionData + + session = SessionData("ev-test2") + session.rounds = [{"round": 1, "evidence_rows": [{"a": 1}]}] + + paragraphs = [ + {"id": "p-0", "type": "text", "content": "No evidence here"}, + ] + + result = _extract_evidence_annotations(paragraphs, session) + assert result == {} + + def test_extract_evidence_out_of_range_round(self): + """Task 4.1: Round number beyond available rounds is ignored""" + from web.main import _extract_evidence_annotations, SessionData + + session = SessionData("ev-test3") + session.rounds = [{"round": 1, "evidence_rows": [{"a": 1}]}] + + paragraphs = [ + {"id": "p-0", "type": "text", "content": "Ref to round 5 "}, + ] + + result = _extract_evidence_annotations(paragraphs, session) + assert result == {} + + def test_extract_evidence_empty_evidence_rows(self): + """Task 4.1: Round with empty evidence_rows is excluded""" + from web.main import _extract_evidence_annotations, SessionData + + session = SessionData("ev-test4") + session.rounds = [{"round": 1, "evidence_rows": []}] + + paragraphs = [ + {"id": "p-0", "type": "text", "content": "Has annotation "}, + ] + + result = _extract_evidence_annotations(paragraphs, session) + assert result == {} + + def test_extract_evidence_whitespace_in_comment(self): + """Task 4.1: Handles whitespace variations in HTML comment""" + from web.main import _extract_evidence_annotations, SessionData + + session = SessionData("ev-test5") + session.rounds = [{"round": 1, "evidence_rows": [{"x": 42}]}] + + paragraphs = [ + {"id": "p-0", "type": "text", "content": "Text "}, + ] + + result = _extract_evidence_annotations(paragraphs, session) + assert result["p-0"] == [{"x": 42}] diff --git a/tests/test_phase2.py b/tests/test_phase2.py new file mode 100644 index 0000000..9e88d65 --- /dev/null +++ b/tests/test_phase2.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for Phase 2: CodeExecutor Enhancements + +Run: python -m pytest tests/test_phase2.py -v +""" + +import os +import sys +import tempfile + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest +import pandas as pd +from utils.code_executor import CodeExecutor + + +@pytest.fixture +def executor(tmp_path): + """Create a CodeExecutor with a temp output directory.""" + return CodeExecutor(output_dir=str(tmp_path)) + + +# =========================================================================== +# Task 5: Evidence capture +# =========================================================================== + +class TestEvidenceCapture: + def test_evidence_from_result_dataframe(self, executor): + """5.1: When result.result is a DataFrame, capture head(10) as evidence_rows.""" + code = "import pandas as pd\npd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})" + result = executor.execute_code(code) + assert result["success"] is True + assert "evidence_rows" in result + assert len(result["evidence_rows"]) == 3 + assert result["evidence_rows"][0] == {"a": 1, "b": 4} + + def test_evidence_capped_at_10(self, executor): + """5.1: Evidence rows are capped at 10.""" + code = "import pandas as pd\npd.DataFrame({'x': list(range(100))})" + result = executor.execute_code(code) + assert result["success"] is True + assert len(result["evidence_rows"]) == 10 + + def test_evidence_fallback_to_namespace(self, executor): + """5.2: When result.result is not a DataFrame, fallback to namespace.""" + code = "import pandas as pd\nmy_data = pd.DataFrame({'col': [10, 20]})\nprint('done')" + result = executor.execute_code(code) + assert result["success"] is True + assert len(result["evidence_rows"]) == 2 + assert result["evidence_rows"][0] == {"col": 10} + + def test_evidence_empty_when_no_dataframe(self, executor): + """5.3: Returns empty list when no DataFrame is produced.""" + executor.reset_environment() + code = "x = 42" + result = executor.execute_code(code) + assert result["success"] is True + assert result["evidence_rows"] == [] + + def test_evidence_key_in_failure(self, executor): + """5.3: evidence_rows key present even on failure.""" + code = "import not_a_real_module" + result = executor.execute_code(code) + assert "evidence_rows" in result + assert result["evidence_rows"] == [] + + +# =========================================================================== +# Task 6: DataFrame auto-detection and export +# =========================================================================== + +class TestDataFrameAutoExport: + def test_new_dataframe_exported(self, executor, tmp_path): + """6.1-6.4: New DataFrame is detected and exported to CSV.""" + code = "import pandas as pd\nresult_df = pd.DataFrame({'a': [1], 'b': [2]})" + result = executor.execute_code(code) + assert result["success"] is True + assert len(result["auto_exported_files"]) >= 1 + exported = result["auto_exported_files"][0] + assert exported["variable_name"] == "result_df" + assert exported["filename"] == "result_df.csv" + assert exported["rows"] == 1 + assert exported["cols"] == 2 + assert exported["columns"] == ["a", "b"] + # Verify file actually exists + assert os.path.exists(os.path.join(str(tmp_path), "result_df.csv")) + + def test_dedup_suffix(self, executor, tmp_path): + """6.3: Numeric suffix deduplication when file exists.""" + # Create first file + code1 = "import pandas as pd\nmy_df = pd.DataFrame({'x': [1]})" + result1 = executor.execute_code(code1) + assert result1["success"] is True + + # Reset the DataFrame to force a new id + code2 = "my_df = pd.DataFrame({'x': [2]})" + result2 = executor.execute_code(code2) + assert result2["success"] is True + exported_files = result2["auto_exported_files"] + assert len(exported_files) >= 1 + # The second export should have _1 suffix + assert exported_files[0]["filename"] == "my_df_1.csv" + + def test_skip_module_names(self, executor): + """6.1: Module-level names like pd, np are skipped.""" + code = "x = 42" # pd and np already in namespace from setup + result = executor.execute_code(code) + # Should not export pd or np as DataFrames + for f in result["auto_exported_files"]: + assert f["variable_name"] not in ("pd", "np", "plt", "sns") + + def test_auto_exported_files_key_in_result(self, executor): + """6.5: auto_exported_files key always present.""" + code = "x = 1" + result = executor.execute_code(code) + assert "auto_exported_files" in result + assert isinstance(result["auto_exported_files"], list) + + def test_changed_dataframe_detected(self, executor, tmp_path): + """6.2: Changed DataFrame (same name, new object) is detected.""" + code1 = "import pandas as pd\ndf_test = pd.DataFrame({'a': [1]})" + executor.execute_code(code1) + + code2 = "df_test = pd.DataFrame({'a': [1, 2, 3]})" + result2 = executor.execute_code(code2) + assert result2["success"] is True + exported = [f for f in result2["auto_exported_files"] if f["variable_name"] == "df_test"] + assert len(exported) == 1 + assert exported[0]["rows"] == 3 + + +# =========================================================================== +# Task 7: DATA_FILE_SAVED marker parsing +# =========================================================================== + +class TestDataFileSavedMarkerParsing: + def test_parse_single_marker(self, executor): + """7.1-7.2: Parse a single DATA_FILE_SAVED marker from stdout.""" + code = 'print("[DATA_FILE_SAVED] filename: output.csv, rows: 42, description: Test data")' + result = executor.execute_code(code) + assert result["success"] is True + assert len(result["prompt_saved_files"]) == 1 + parsed = result["prompt_saved_files"][0] + assert parsed["filename"] == "output.csv" + assert parsed["rows"] == 42 + assert parsed["description"] == "Test data" + + def test_parse_multiple_markers(self, executor): + """7.1-7.2: Parse multiple markers.""" + code = ( + 'print("[DATA_FILE_SAVED] filename: a.csv, rows: 10, description: File A")\n' + 'print("[DATA_FILE_SAVED] filename: b.xlsx, rows: 20, description: File B")' + ) + result = executor.execute_code(code) + assert result["success"] is True + assert len(result["prompt_saved_files"]) == 2 + assert result["prompt_saved_files"][0]["filename"] == "a.csv" + assert result["prompt_saved_files"][1]["filename"] == "b.xlsx" + + def test_no_markers(self, executor): + """7.3: No markers means empty list.""" + code = 'print("hello world")' + result = executor.execute_code(code) + assert result["success"] is True + assert result["prompt_saved_files"] == [] + + def test_prompt_saved_files_key_in_result(self, executor): + """7.3: prompt_saved_files key always present.""" + code = "x = 1" + result = executor.execute_code(code) + assert "prompt_saved_files" in result + assert isinstance(result["prompt_saved_files"], list) + + def test_malformed_marker_skipped(self, executor): + """7.1: Malformed markers are silently skipped.""" + code = 'print("[DATA_FILE_SAVED] this is not valid")' + result = executor.execute_code(code) + assert result["success"] is True + assert result["prompt_saved_files"] == [] + + def test_chinese_filename_and_description(self, executor): + """7.2: Chinese characters in filename and description.""" + code = 'print("[DATA_FILE_SAVED] filename: 数据汇总.csv, rows: 100, description: 各类型TOP问题聚合统计")' + result = executor.execute_code(code) + assert result["success"] is True + assert len(result["prompt_saved_files"]) == 1 + assert result["prompt_saved_files"][0]["filename"] == "数据汇总.csv" + assert result["prompt_saved_files"][0]["description"] == "各类型TOP问题聚合统计" + + +# =========================================================================== +# Return structure integrity +# =========================================================================== + +class TestReturnStructure: + def test_success_return_has_all_keys(self, executor): + """All 7 keys present on success.""" + result = executor.execute_code("x = 1") + expected_keys = {"success", "output", "error", "variables", + "evidence_rows", "auto_exported_files", "prompt_saved_files"} + assert expected_keys.issubset(set(result.keys())) + + def test_safety_failure_has_all_keys(self, executor): + """All 7 keys present on safety check failure.""" + result = executor.execute_code("import socket") + expected_keys = {"success", "output", "error", "variables", + "evidence_rows", "auto_exported_files", "prompt_saved_files"} + assert expected_keys.issubset(set(result.keys())) + + def test_execution_error_has_all_keys(self, executor): + """All 7 keys present on execution error.""" + result = executor.execute_code("1/0") + expected_keys = {"success", "output", "error", "variables", + "evidence_rows", "auto_exported_files", "prompt_saved_files"} + assert expected_keys.issubset(set(result.keys())) diff --git a/tests/test_phase3.py b/tests/test_phase3.py new file mode 100644 index 0000000..567c0cf --- /dev/null +++ b/tests/test_phase3.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for Phase 3: Agent Changes + +Run: python -m pytest tests/test_phase3.py -v +""" + +import os +import sys +import json + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest +from data_analysis_agent import DataAnalysisAgent +from prompts import data_analysis_system_prompt, final_report_system_prompt + + +# =========================================================================== +# Task 8.1: _summarize_result +# =========================================================================== + +class TestSummarizeResult: + @pytest.fixture + def agent(self): + """Create a minimal DataAnalysisAgent for testing.""" + agent = DataAnalysisAgent.__new__(DataAnalysisAgent) + agent._session_ref = None + return agent + + def test_success_with_evidence_rows(self, agent): + """8.1: Success with evidence rows produces DataFrame summary.""" + result = { + "success": True, + "evidence_rows": [{"a": 1, "b": 2}, {"a": 3, "b": 4}], + "auto_exported_files": [{"variable_name": "df", "filename": "df.csv", "rows": 150, "cols": 8, "columns": []}], + } + summary = agent._summarize_result(result) + assert "执行成功" in summary + assert "DataFrame" in summary + assert "150" in summary + assert "8" in summary + + def test_success_with_evidence_no_auto_files(self, agent): + """8.1: Success with evidence but no auto_exported_files uses evidence length.""" + result = { + "success": True, + "evidence_rows": [{"x": 1}, {"x": 2}, {"x": 3}], + "auto_exported_files": [], + } + summary = agent._summarize_result(result) + assert "执行成功" in summary + assert "DataFrame" in summary + + def test_success_with_output(self, agent): + """8.1: Success with output but no evidence shows first line.""" + result = { + "success": True, + "evidence_rows": [], + "output": "Hello World\nSecond line", + } + summary = agent._summarize_result(result) + assert "执行成功" in summary + assert "Hello World" in summary + + def test_success_no_output(self, agent): + """8.1: Success with no output or evidence.""" + result = {"success": True, "evidence_rows": [], "output": ""} + summary = agent._summarize_result(result) + assert summary == "执行成功" + + def test_failure_short_error(self, agent): + """8.1: Failure with short error message.""" + result = {"success": False, "error": "KeyError: 'col_x'"} + summary = agent._summarize_result(result) + assert "执行失败" in summary + assert "KeyError" in summary + + def test_failure_long_error_truncated(self, agent): + """8.1: Failure with long error is truncated to 100 chars.""" + long_error = "A" * 200 + result = {"success": False, "error": long_error} + summary = agent._summarize_result(result) + assert "执行失败" in summary + assert "..." in summary + # The error portion should be at most 103 chars (100 + "...") + error_part = summary.split("执行失败: ")[1] + assert len(error_part) <= 104 + + def test_failure_no_error_field(self, agent): + """8.1: Failure with missing error field.""" + result = {"success": False} + summary = agent._summarize_result(result) + assert "执行失败" in summary + + +# =========================================================================== +# Task 8.2-8.4: Round_Data construction and session integration +# =========================================================================== + +class TestRoundDataConstruction: + def test_handle_generate_code_returns_reasoning(self): + """8.2: _handle_generate_code returns reasoning from yaml_data.""" + agent = DataAnalysisAgent.__new__(DataAnalysisAgent) + agent._session_ref = None + # We need a minimal executor mock + from unittest.mock import MagicMock + agent.executor = MagicMock() + agent.executor.execute_code.return_value = { + "success": True, "output": "ok", "error": "", + "variables": {}, "evidence_rows": [], + "auto_exported_files": [], "prompt_saved_files": [], + } + yaml_data = {"code": "x = 1", "reasoning": "Testing reasoning field"} + result = agent._handle_generate_code("response text", yaml_data) + assert result["reasoning"] == "Testing reasoning field" + + def test_handle_generate_code_empty_reasoning(self): + """8.2: _handle_generate_code returns empty reasoning when not in yaml_data.""" + agent = DataAnalysisAgent.__new__(DataAnalysisAgent) + agent._session_ref = None + from unittest.mock import MagicMock + agent.executor = MagicMock() + agent.executor.execute_code.return_value = { + "success": True, "output": "", "error": "", + "variables": {}, "evidence_rows": [], + "auto_exported_files": [], "prompt_saved_files": [], + } + yaml_data = {"code": "x = 1"} + result = agent._handle_generate_code("response text", yaml_data) + assert result["reasoning"] == "" + + +# =========================================================================== +# Task 8.3: set_session_ref +# =========================================================================== + +class TestSetSessionRef: + def test_session_ref_default_none(self): + """8.3: _session_ref defaults to None.""" + agent = DataAnalysisAgent() + assert agent._session_ref is None + + def test_set_session_ref(self): + """8.3: set_session_ref stores the session reference.""" + agent = DataAnalysisAgent() + + class FakeSession: + rounds = [] + data_files = [] + + session = FakeSession() + agent.set_session_ref(session) + assert agent._session_ref is session + + +# =========================================================================== +# Task 9.1: Prompt - intermediate data saving instructions +# =========================================================================== + +class TestPromptDataSaving: + def test_data_saving_instructions_in_system_prompt(self): + """9.1: data_analysis_system_prompt contains DATA_FILE_SAVED instructions.""" + assert "[DATA_FILE_SAVED]" in data_analysis_system_prompt + assert "中间数据保存规则" in data_analysis_system_prompt + + def test_data_saving_example_in_prompt(self): + """9.1: Prompt contains example of saving and printing marker.""" + assert "to_csv" in data_analysis_system_prompt + assert "session_output_dir" in data_analysis_system_prompt + + +# =========================================================================== +# Task 9.2: Prompt - evidence annotation instructions +# =========================================================================== + +class TestPromptEvidenceAnnotation: + def test_evidence_annotation_in_report_prompt(self): + """9.2: final_report_system_prompt contains evidence annotation instructions.""" + assert "evidence:round_" in final_report_system_prompt + assert "证据标注规则" in final_report_system_prompt + + def test_evidence_annotation_example(self): + """9.2: Prompt contains example of evidence annotation.""" + assert "" in final_report_system_prompt + + +# =========================================================================== +# Task 9.3: _build_final_report_prompt includes evidence +# =========================================================================== + +class TestBuildFinalReportPromptEvidence: + def test_evidence_included_when_session_has_rounds(self): + """9.3: _build_final_report_prompt includes evidence data when rounds exist.""" + agent = DataAnalysisAgent.__new__(DataAnalysisAgent) + agent.analysis_results = [] + agent.current_round = 2 + agent.session_output_dir = "/tmp/test" + agent.data_profile = "test profile" + + class FakeSession: + rounds = [ + { + "round": 1, + "reasoning": "分析车型分布", + "result_summary": "执行成功,输出 DataFrame (10行×3列)", + "evidence_rows": [{"车型": "A", "数量": 42}], + }, + { + "round": 2, + "reasoning": "分析模块分布", + "result_summary": "执行成功", + "evidence_rows": [], + }, + ] + + agent._session_ref = FakeSession() + prompt = agent._build_final_report_prompt([]) + assert "各轮次分析证据数据" in prompt + assert "第1轮" in prompt + assert "第2轮" in prompt + assert "车型" in prompt + + def test_no_evidence_when_no_session_ref(self): + """9.3: _build_final_report_prompt works without session ref.""" + agent = DataAnalysisAgent.__new__(DataAnalysisAgent) + agent.analysis_results = [] + agent.current_round = 1 + agent.session_output_dir = "/tmp/test" + agent.data_profile = "test profile" + agent._session_ref = None + prompt = agent._build_final_report_prompt([]) + assert "各轮次分析证据数据" not in prompt diff --git a/tests/test_properties.py b/tests/test_properties.py new file mode 100644 index 0000000..d709aa2 --- /dev/null +++ b/tests/test_properties.py @@ -0,0 +1,285 @@ +# -*- coding: utf-8 -*- +""" +Property-based tests for agent-robustness-optimization features. +Uses hypothesis with reduced examples (max_examples=20) for fast execution. + +Run: python -m pytest tests/test_properties.py -v +""" + +import os +import sys +import re + +# Ensure project root is on path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest +from hypothesis import given, settings, assume +from hypothesis import strategies as st + +from utils.data_privacy import ( + _extract_column_from_error, + _lookup_column_in_profile, + generate_enriched_hint, +) +from utils.analysis_templates import get_template, list_templates, TEMPLATE_REGISTRY + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +DATA_CONTEXT_PATTERNS = [ + r"KeyError:\s*['\"](.+?)['\"]", + r"ValueError.*(?:column|col|field)", + r"NameError.*(?:df|data|frame)", + r"(?:empty|no\s+data|0\s+rows)", + r"IndexError.*(?:out of range|out of bounds)", +] + + +def classify_error(error_message: str) -> str: + """Mirror of DataAnalysisAgent._classify_error for testing without IPython.""" + for pattern in DATA_CONTEXT_PATTERNS: + if re.search(pattern, error_message, re.IGNORECASE): + return "data_context" + return "other" + + +SAMPLE_SAFE_PROFILE = """# 数据结构概览 (Schema Profile) + +## 文件: test.csv + +- **维度**: 100 行 x 3 列 +- **列名**: `车型, 模块, 问题类型` + +### 列结构: + +| 列名 | 数据类型 | 空值率 | 唯一值数 | 特征描述 | +|------|---------|--------|---------|----------| +| 车型 | object | 0.0% | 5 | 低基数分类(5类) | +| 模块 | object | 2.0% | 12 | 中基数分类(12类) | +| 问题类型 | object | 0.0% | 8 | 低基数分类(8类) | +""" + + +# =========================================================================== +# Property 1: Error Classification Correctness (Task 11.1) +# =========================================================================== + +# Strategy: generate error messages that contain data-context patterns +data_context_error_st = st.one_of( + st.from_regex(r"KeyError: '[a-zA-Z_]+'" , fullmatch=True), + st.from_regex(r'KeyError: "[a-zA-Z_]+"', fullmatch=True), + st.just("ValueError: column 'x' not found"), + st.just("NameError: name 'df' is not defined"), + st.just("empty DataFrame"), + st.just("0 rows returned"), + st.just("IndexError: index 5 is out of range"), +) + +non_data_error_st = st.one_of( + st.just("SyntaxError: invalid syntax"), + st.just("TypeError: unsupported operand"), + st.just("ZeroDivisionError: division by zero"), + st.just("ImportError: No module named 'foo'"), + st.text(min_size=1, max_size=50).filter( + lambda s: not any(re.search(p, s, re.IGNORECASE) for p in DATA_CONTEXT_PATTERNS) + ), +) + + +@settings(max_examples=20) +@given(err=data_context_error_st) +def test_prop1_data_context_errors_classified(err): + """Data-context error messages must be classified as 'data_context'.""" + assert classify_error(err) == "data_context" + + +@settings(max_examples=20) +@given(err=non_data_error_st) +def test_prop1_non_data_errors_classified(err): + """Non-data error messages must be classified as 'other'.""" + assert classify_error(err) == "other" + + +# =========================================================================== +# Property 3: Enriched Hint Contains Column Metadata Without Real Data (11.2) +# =========================================================================== + +known_columns = ["车型", "模块", "问题类型"] +column_st = st.sampled_from(known_columns) + + +@settings(max_examples=20) +@given(col=column_st) +def test_prop3_enriched_hint_contains_column_meta(col): + """Enriched hint for a known column must contain its metadata.""" + error_msg = f"KeyError: '{col}'" + hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE) + assert col in hint + assert "数据类型" in hint + assert "唯一值数量" in hint + assert "空值率" in hint + assert "特征描述" in hint + + +@settings(max_examples=20) +@given(col=column_st) +def test_prop3_enriched_hint_no_real_data(col): + """Enriched hint must NOT contain real data values (min/max/mean/sample rows).""" + error_msg = f"KeyError: '{col}'" + hint = generate_enriched_hint(error_msg, SAMPLE_SAFE_PROFILE) + # Should not contain statistical values or sample data + for forbidden in ["Min=", "Max=", "Mean=", "TOP 5 高频值"]: + assert forbidden not in hint + + +# =========================================================================== +# Property 4: Env Var Config Override (Task 11.3) +# =========================================================================== + +@settings(max_examples=10) +@given(val=st.integers(min_value=1, max_value=100)) +def test_prop4_env_override_max_data_context_retries(val): + """APP_MAX_DATA_CONTEXT_RETRIES env var must override config.""" + from config.app_config import AppConfig + os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] = str(val) + try: + config = AppConfig.from_env() + assert config.max_data_context_retries == val + finally: + del os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] + + +# =========================================================================== +# Property 5: Sliding Window Trimming Invariants (Task 11.4) +# =========================================================================== + +def make_history(n_pairs: int, first_msg: str = "initial requirement"): + """Build a fake conversation history with n_pairs of user+assistant messages.""" + history = [{"role": "user", "content": first_msg}] + for i in range(n_pairs): + history.append({"role": "assistant", "content": f'action: "generate_code"\ncode: | print({i})'}) + history.append({"role": "user", "content": f"代码执行反馈:\n成功 round {i}"}) + return history + + +@settings(max_examples=20) +@given( + n_pairs=st.integers(min_value=1, max_value=30), + window=st.integers(min_value=1, max_value=10), +) +def test_prop5_trimming_preserves_first_message(n_pairs, window): + """After trimming, the first user message is always at index 0.""" + history = make_history(n_pairs, first_msg="ORIGINAL_REQ") + max_messages = window * 2 + + if len(history) <= max_messages: + return # no trimming needed, invariant trivially holds + + first_message = history[0] + start_idx = 1 + has_summary = ( + len(history) > 1 + and history[1]["role"] == "user" + and history[1]["content"].startswith("[分析摘要]") + ) + if has_summary: + start_idx = 2 + + messages_to_consider = history[start_idx:] + messages_to_trim = messages_to_consider[:-max_messages] + messages_to_keep = messages_to_consider[-max_messages:] + + if not messages_to_trim: + return + + new_history = [first_message] + new_history.append({"role": "user", "content": "[分析摘要] summary"}) + new_history.extend(messages_to_keep) + + assert new_history[0]["content"] == "ORIGINAL_REQ" + assert len(new_history) <= max_messages + 2 # first + summary + window + + +# =========================================================================== +# Property 6: Trimming Summary Content (Task 11.5) +# =========================================================================== + +@settings(max_examples=20) +@given(n_pairs=st.integers(min_value=2, max_value=15)) +def test_prop6_summary_excludes_code_blocks(n_pairs): + """Compressed summary must not contain code blocks or raw output.""" + history = make_history(n_pairs) + # Simulate _compress_trimmed_messages logic + summary_parts = ["[分析摘要] 以下是之前分析轮次的概要:"] + round_num = 0 + for msg in history[1:]: # skip first + content = msg["content"] + if msg["role"] == "assistant": + round_num += 1 + action = "generate_code" + if "collect_figures" in content: + action = "collect_figures" + summary_parts.append(f"- 轮次{round_num}: 动作={action}") + elif msg["role"] == "user" and "代码执行反馈" in content: + success = "失败" if "[ERROR]" in content or "执行错误" in content else "成功" + if summary_parts and summary_parts[-1].startswith("- 轮次"): + summary_parts[-1] += f", 执行结果={success}" + + summary = "\n".join(summary_parts) + assert "```" not in summary + assert "print(" not in summary + assert "[分析摘要]" in summary + + +# =========================================================================== +# Property 7: Template Prompt Integration (Task 11.6) +# =========================================================================== + +valid_template_names = list(TEMPLATE_REGISTRY.keys()) + + +@settings(max_examples=len(valid_template_names)) +@given(name=st.sampled_from(valid_template_names)) +def test_prop7_template_prompt_prepended(name): + """For any valid template, get_full_prompt() output must be non-empty.""" + template = get_template(name) + prompt = template.get_full_prompt() + assert len(prompt) > 0 + assert template.name in prompt + + +# =========================================================================== +# Property 8: Invalid Template Name Raises Error (Task 11.7) +# =========================================================================== + +@settings(max_examples=20) +@given(name=st.text(min_size=1, max_size=30).filter(lambda s: s not in TEMPLATE_REGISTRY)) +def test_prop8_invalid_template_raises_error(name): + """Invalid template names must raise ValueError listing available templates.""" + with pytest.raises(ValueError) as exc_info: + get_template(name) + error_msg = str(exc_info.value) + # Must list available template names + for valid_name in TEMPLATE_REGISTRY: + assert valid_name in error_msg + + +# =========================================================================== +# Property 11: Parallel Profile Merge With Error Resilience (Task 11.8) +# =========================================================================== + +def test_prop11_parallel_profile_error_resilience(): + """Parallel profiling with mix of valid/invalid files includes all entries.""" + from utils.data_privacy import build_safe_profile, build_local_profile + + valid_file = "uploads/data_simple_200.csv" + invalid_file = "/nonexistent/fake_file.csv" + + # Test build_safe_profile handles missing files gracefully + safe = build_safe_profile([valid_file, invalid_file]) + assert "fake_file.csv" in safe # error entry present + if os.path.exists(valid_file): + assert "data_simple_200.csv" in safe # valid entry present diff --git a/tests/test_unit.py b/tests/test_unit.py new file mode 100644 index 0000000..69da91a --- /dev/null +++ b/tests/test_unit.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +""" +Unit and integration tests for agent-robustness-optimization features. + +Run: python -m pytest tests/test_unit.py -v +""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import re +import pytest + +from utils.data_privacy import ( + _extract_column_from_error, + _lookup_column_in_profile, + generate_enriched_hint, +) +from utils.analysis_templates import get_template, list_templates, TEMPLATE_REGISTRY +from config.app_config import AppConfig + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +DATA_CONTEXT_PATTERNS = [ + r"KeyError:\s*['\"](.+?)['\"]", + r"ValueError.*(?:column|col|field)", + r"NameError.*(?:df|data|frame)", + r"(?:empty|no\s+data|0\s+rows)", + r"IndexError.*(?:out of range|out of bounds)", +] + + +def classify_error(error_message: str) -> str: + for pattern in DATA_CONTEXT_PATTERNS: + if re.search(pattern, error_message, re.IGNORECASE): + return "data_context" + return "other" + + +SAMPLE_PROFILE = """| 列名 | 数据类型 | 空值率 | 唯一值数 | 特征描述 | +|------|---------|--------|---------|----------| +| 车型 | object | 0.0% | 5 | 低基数分类(5类) | +| 模块 | object | 2.0% | 12 | 中基数分类(12类) | +""" + + +# =========================================================================== +# Task 12.1: Unit tests for error classifier +# =========================================================================== + +class TestErrorClassifier: + def test_keyerror_single_quotes(self): + assert classify_error("KeyError: '车型'") == "data_context" + + def test_keyerror_double_quotes(self): + assert classify_error('KeyError: "model_name"') == "data_context" + + def test_valueerror_column(self): + assert classify_error("ValueError: column 'x' not in DataFrame") == "data_context" + + def test_nameerror_df(self): + assert classify_error("NameError: name 'df' is not defined") == "data_context" + + def test_empty_dataframe(self): + assert classify_error("empty DataFrame after filtering") == "data_context" + + def test_zero_rows(self): + assert classify_error("0 rows returned from query") == "data_context" + + def test_index_out_of_range(self): + assert classify_error("IndexError: index 10 is out of range") == "data_context" + + def test_syntax_error_is_other(self): + assert classify_error("SyntaxError: invalid syntax") == "other" + + def test_type_error_is_other(self): + assert classify_error("TypeError: unsupported operand") == "other" + + def test_generic_text_is_other(self): + assert classify_error("Something went wrong") == "other" + + def test_empty_string_is_other(self): + assert classify_error("") == "other" + + +# =========================================================================== +# Task 12.1 continued: Unit tests for column extraction and lookup +# =========================================================================== + +class TestColumnExtraction: + def test_extract_from_keyerror(self): + assert _extract_column_from_error("KeyError: '车型'") == "车型" + + def test_extract_from_column_phrase(self): + assert _extract_column_from_error("column '模块' not found") == "模块" + + def test_extract_none_for_generic(self): + assert _extract_column_from_error("SyntaxError: bad") is None + + def test_lookup_existing_column(self): + result = _lookup_column_in_profile("车型", SAMPLE_PROFILE) + assert result is not None + assert result["dtype"] == "object" + assert result["unique_count"] == "5" + + def test_lookup_missing_column(self): + assert _lookup_column_in_profile("不存在", SAMPLE_PROFILE) is None + + def test_lookup_none_column(self): + assert _lookup_column_in_profile(None, SAMPLE_PROFILE) is None + + +# =========================================================================== +# Task 12.2: Unit tests for conversation trimming at boundary conditions +# =========================================================================== + +class TestConversationTrimming: + def _make_history(self, n_pairs): + history = [{"role": "user", "content": "ORIGINAL"}] + for i in range(n_pairs): + history.append({"role": "assistant", "content": f"response {i}"}) + history.append({"role": "user", "content": f"feedback {i}"}) + return history + + def test_no_trimming_when_under_limit(self): + """History with 3 pairs and window=5 should not be trimmed.""" + history = self._make_history(3) # 1 + 6 = 7 messages + window = 5 + max_messages = window * 2 # 10 + assert len(history) <= max_messages # no trimming + + def test_trimming_at_exact_boundary(self): + """History exactly at 2*window should not be trimmed.""" + window = 3 + history = self._make_history(3) # 1 + 6 = 7 messages + max_messages = window * 2 # 6 + # 7 > 6, so trimming should happen + assert len(history) > max_messages + + def test_first_message_always_preserved(self): + """After trimming, first message must be preserved.""" + history = self._make_history(10) + window = 2 + max_messages = window * 2 + + first = history[0] + to_consider = history[1:] + to_keep = to_consider[-max_messages:] + + new_history = [first, {"role": "user", "content": "[分析摘要] ..."}] + new_history.extend(to_keep) + + assert new_history[0]["content"] == "ORIGINAL" + + def test_summary_replaces_old_summary(self): + """If a summary already exists at index 1, it should be replaced.""" + history = [ + {"role": "user", "content": "ORIGINAL"}, + {"role": "user", "content": "[分析摘要] old summary"}, + ] + for i in range(8): + history.append({"role": "assistant", "content": f"resp {i}"}) + history.append({"role": "user", "content": f"fb {i}"}) + + # Simulate trimming with existing summary + has_summary = history[1]["content"].startswith("[分析摘要]") + assert has_summary + start_idx = 2 if has_summary else 1 + assert start_idx == 2 + + +# =========================================================================== +# Task 12.3: Tests for template API +# =========================================================================== + +class TestTemplateSystem: + def test_list_templates_returns_all(self): + templates = list_templates() + assert len(templates) == len(TEMPLATE_REGISTRY) + names = {t["name"] for t in templates} + assert names == set(TEMPLATE_REGISTRY.keys()) + + def test_get_valid_template(self): + for name in TEMPLATE_REGISTRY: + t = get_template(name) + assert t.name # has a display name + steps = t.build_steps() + assert len(steps) > 0 + + def test_get_invalid_template_raises(self): + with pytest.raises(ValueError): + get_template("nonexistent_template_xyz") + + def test_template_prompt_not_empty(self): + for name in TEMPLATE_REGISTRY: + t = get_template(name) + prompt = t.get_full_prompt() + assert len(prompt) > 50 # should be substantial + + +# =========================================================================== +# Task 12.4: Tests for config +# =========================================================================== + +class TestAppConfig: + def test_defaults(self): + config = AppConfig() + assert config.max_data_context_retries == 2 + assert config.conversation_window_size == 10 + assert config.max_parallel_profiles == 4 + + def test_env_override(self): + os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] = "5" + os.environ["APP_CONVERSATION_WINDOW_SIZE"] = "20" + os.environ["APP_MAX_PARALLEL_PROFILES"] = "8" + try: + config = AppConfig.from_env() + assert config.max_data_context_retries == 5 + assert config.conversation_window_size == 20 + assert config.max_parallel_profiles == 8 + finally: + del os.environ["APP_MAX_DATA_CONTEXT_RETRIES"] + del os.environ["APP_CONVERSATION_WINDOW_SIZE"] + del os.environ["APP_MAX_PARALLEL_PROFILES"] diff --git a/utils/code_executor.py b/utils/code_executor.py index c2cab11..f8f85bf 100644 --- a/utils/code_executor.py +++ b/utils/code_executor.py @@ -4,6 +4,7 @@ """ import os +import re import sys import ast import traceback @@ -15,6 +16,7 @@ from IPython.utils.capture import capture_output import matplotlib import matplotlib.pyplot as plt import matplotlib.font_manager as fm +import pandas as pd class CodeExecutor: @@ -82,8 +84,27 @@ class CodeExecutor: "PIL", "random", "networkx", + "platform", } + # Maximum rows for auto-export; DataFrames larger than this are skipped + # to avoid heavy disk I/O on large datasets. + AUTO_EXPORT_MAX_ROWS = 50000 + + # Variable names to skip during DataFrame auto-export + # (common import aliases and built-in namespace names) + _SKIP_EXPORT_NAMES = { + "pd", "np", "plt", "sns", "os", "json", "sys", "re", "io", + "csv", "glob", "duckdb", "display", "math", "datetime", "time", + "warnings", "logging", "copy", "pickle", "pathlib", "collections", + "itertools", "functools", "operator", "random", "networkx", + } + + # Regex for parsing DATA_FILE_SAVED markers + _DATA_FILE_SAVED_RE = re.compile( + r"\[DATA_FILE_SAVED\]\s*filename:\s*(.+?),\s*rows:\s*(\d+),\s*description:\s*(.+)" + ) + def __init__(self, output_dir: str = "outputs"): """ 初始化代码执行器 @@ -318,6 +339,142 @@ from IPython.display import display return str(obj) + @staticmethod + def _sanitize_for_json(rows: List[Dict]) -> List[Dict]: + """Replace NaN/inf/-inf with None so the data is JSON-serializable.""" + import math + sanitized = [] + for row in rows: + clean = {} + for k, v in row.items(): + if isinstance(v, float) and (math.isnan(v) or math.isinf(v)): + clean[k] = None + else: + clean[k] = v + sanitized.append(clean) + return sanitized + + def _capture_evidence_rows(self, result, shell) -> List[Dict]: + """ + Capture up to 10 evidence rows from the execution result. + First checks result.result, then falls back to the last DataFrame in namespace. + """ + try: + # Primary: check if result.result is a DataFrame + if result.result is not None and isinstance(result.result, pd.DataFrame): + return self._sanitize_for_json( + result.result.head(10).to_dict(orient="records") + ) + except Exception: + pass + + # Fallback: find the last-assigned DataFrame variable in namespace + try: + last_df = None + for name, obj in shell.user_ns.items(): + if ( + not name.startswith("_") + and name not in self._SKIP_EXPORT_NAMES + and isinstance(obj, pd.DataFrame) + ): + last_df = obj + if last_df is not None: + return self._sanitize_for_json( + last_df.head(10).to_dict(orient="records") + ) + except Exception: + pass + + return [] + + def _snapshot_dataframes(self, shell) -> Dict[str, int]: + """Snapshot current DataFrame variables as {name: id(obj)}.""" + snapshot = {} + try: + for name, obj in shell.user_ns.items(): + if ( + not name.startswith("_") + and name not in self._SKIP_EXPORT_NAMES + and isinstance(obj, pd.DataFrame) + ): + snapshot[name] = id(obj) + except Exception: + pass + return snapshot + + def _detect_new_dataframes( + self, before: Dict[str, int], after: Dict[str, int] + ) -> List[str]: + """Return variable names of new or changed DataFrames.""" + new_or_changed = [] + for name, obj_id in after.items(): + if name not in before or before[name] != obj_id: + new_or_changed.append(name) + return new_or_changed + + def _export_dataframe(self, var_name: str, df) -> Optional[Dict[str, Any]]: + """ + Export a DataFrame to CSV with dedup suffix. Returns metadata dict or None. + Skips export for DataFrames exceeding AUTO_EXPORT_MAX_ROWS to avoid + heavy disk I/O on large datasets; only metadata is recorded. + """ + try: + rows_count = len(df) + cols_count = len(df.columns) + col_names = list(df.columns) + + # Skip writing large DataFrames to disk — record metadata only + if rows_count > self.AUTO_EXPORT_MAX_ROWS: + return { + "variable_name": var_name, + "filename": f"(skipped: {var_name} has {rows_count} rows)", + "rows": rows_count, + "cols": cols_count, + "columns": col_names, + "skipped": True, + } + + base_filename = f"{var_name}.csv" + filepath = os.path.join(self.output_dir, base_filename) + + # Dedup: if file exists, try _1, _2, ... + if os.path.exists(filepath): + suffix = 1 + while True: + dedup_filename = f"{var_name}_{suffix}.csv" + filepath = os.path.join(self.output_dir, dedup_filename) + if not os.path.exists(filepath): + base_filename = dedup_filename + break + suffix += 1 + + df.to_csv(filepath, index=False) + return { + "variable_name": var_name, + "filename": base_filename, + "rows": rows_count, + "cols": cols_count, + "columns": col_names, + } + except Exception: + return None + + def _parse_data_file_saved_markers(self, stdout_text: str) -> List[Dict[str, Any]]: + """Parse [DATA_FILE_SAVED] marker lines from captured stdout.""" + results = [] + try: + for line in stdout_text.splitlines(): + m = self._DATA_FILE_SAVED_RE.search(line) + if m: + results.append({ + "filename": m.group(1).strip(), + "rows": int(m.group(2)), + "description": m.group(3).strip(), + }) + except Exception: + pass + return results + def execute_code(self, code: str) -> Dict[str, Any]: """ 执行代码并返回结果 @@ -330,7 +487,10 @@ from IPython.display import display 'success': bool, 'output': str, 'error': str, - 'variables': Dict[str, Any] # 新生成的重要变量 + 'variables': Dict[str, Any], # 新生成的重要变量 + 'evidence_rows': List[Dict], # up to 10 evidence rows + 'auto_exported_files': List[Dict], # auto-detected DataFrame exports + 'prompt_saved_files': List[Dict], # parsed DATA_FILE_SAVED markers } """ # 检查代码安全性 @@ -341,12 +501,18 @@ from IPython.display import display "output": "", "error": f"代码安全检查失败: {safety_error}", "variables": {}, + "evidence_rows": [], + "auto_exported_files": [], + "prompt_saved_files": [], } # 记录执行前的变量 vars_before = set(self.shell.user_ns.keys()) try: + # --- Task 6.1: Snapshot DataFrame variables before execution --- + df_snapshot_before = self._snapshot_dataframes(self.shell) + # 使用IPython的capture_output来捕获所有输出 with capture_output() as captured: result = self.shell.run_cell(code) @@ -359,6 +525,9 @@ from IPython.display import display "output": captured.stdout, "error": f"执行前错误: {error_msg}", "variables": {}, + "evidence_rows": [], + "auto_exported_files": [], + "prompt_saved_files": self._parse_data_file_saved_markers(captured.stdout), } if result.error_in_exec: @@ -368,6 +537,9 @@ from IPython.display import display "output": captured.stdout, "error": f"执行错误: {error_msg}", "variables": {}, + "evidence_rows": [], + "auto_exported_files": [], + "prompt_saved_files": self._parse_data_file_saved_markers(captured.stdout), } # 获取输出 @@ -423,11 +595,36 @@ from IPython.display import display print(f"[WARN] [Auto-Save Global] 异常: {e}") # --- 自动保存机制 end --- + # --- Task 5: Evidence capture --- + evidence_rows = self._capture_evidence_rows(result, self.shell) + + # --- Task 6.2-6.4: DataFrame auto-detection and export --- + auto_exported_files = [] + try: + df_snapshot_after = self._snapshot_dataframes(self.shell) + new_df_names = self._detect_new_dataframes(df_snapshot_before, df_snapshot_after) + for var_name in new_df_names: + try: + df_obj = self.shell.user_ns[var_name] + meta = self._export_dataframe(var_name, df_obj) + if meta is not None: + auto_exported_files.append(meta) + except Exception: + pass + except Exception: + pass + + # --- Task 7: DATA_FILE_SAVED marker parsing --- + prompt_saved_files = self._parse_data_file_saved_markers(captured.stdout) + return { "success": True, "output": output, "error": "", "variables": important_new_vars, + "evidence_rows": evidence_rows, + "auto_exported_files": auto_exported_files, + "prompt_saved_files": prompt_saved_files, } except Exception as e: return { @@ -435,6 +632,9 @@ from IPython.display import display "output": captured.stdout if "captured" in locals() else "", "error": f"执行异常: {str(e)}\n{traceback.format_exc()}", "variables": {}, + "evidence_rows": [], + "auto_exported_files": [], + "prompt_saved_files": [], } def reset_environment(self): diff --git a/utils/data_loader.py b/utils/data_loader.py index 5f413fd..1983d11 100644 --- a/utils/data_loader.py +++ b/utils/data_loader.py @@ -154,6 +154,111 @@ def load_data_chunked(file_path: str, chunksize: Optional[int] = None) -> Iterat print(f"[ERROR] 读取Excel文件失败: {e}") +def _profile_chunked(file_path: str) -> str: + """ + Profile a large file by reading the first chunk plus sampled subsequent chunks. + + Uses ``load_data_chunked()`` to stream the file. The first chunk is kept + in full; every 5th subsequent chunk contributes up to 100 sampled rows. + A markdown profile is generated from the combined sample. + + Args: + file_path: Path to the data file. + + Returns: + A markdown string containing the sampled profile for this file. + """ + file_name = os.path.basename(file_path) + chunks_iter = load_data_chunked(file_path) + first_chunk = next(chunks_iter, None) + if first_chunk is None: + return f"## 文件: {file_name}\n\n[ERROR] 无法读取文件: {file_path}\n\n" + + sample_parts = [first_chunk] + for i, chunk in enumerate(chunks_iter): + if i % 5 == 0: # sample every 5th subsequent chunk + sample_parts.append(chunk.head(min(100, len(chunk)))) + + combined = pd.concat(sample_parts, ignore_index=True) + + # Build profile from the combined sample + profile = f"## 文件: {file_name}\n\n" + profile += f"- **注意**: 此画像基于抽样数据生成(首块 + 每5块采样100行)\n" + rows, cols = combined.shape + profile += f"- **样本维度**: {rows} 行 x {cols} 列\n" + profile += f"- **列名**: `{', '.join(combined.columns)}`\n\n" + profile += "### 列详细分布:\n" + + for col in combined.columns: + dtype = combined[col].dtype + null_count = combined[col].isnull().sum() + null_ratio = (null_count / rows) * 100 if rows > 0 else 0 + + profile += f"#### {col} ({dtype})\n" + if null_count > 0: + profile += f"- [WARN] 空值: {null_count} ({null_ratio:.1f}%)\n" + + if pd.api.types.is_numeric_dtype(dtype): + desc = combined[col].describe() + profile += f"- 统计: Min={desc['min']:.2f}, Max={desc['max']:.2f}, Mean={desc['mean']:.2f}\n" + elif pd.api.types.is_object_dtype(dtype) or pd.api.types.is_categorical_dtype(dtype): + unique_count = combined[col].nunique() + profile += f"- 唯一值数量: {unique_count}\n" + if unique_count > 0: + top_n = combined[col].value_counts().head(5) + top_items_str = ", ".join([f"{k}({v})" for k, v in top_n.items()]) + profile += f"- **TOP 5 高频值**: {top_items_str}\n" + elif pd.api.types.is_datetime64_any_dtype(dtype): + profile += f"- 范围: {combined[col].min()} 至 {combined[col].max()}\n" + + profile += "\n" + + return profile + + +def load_and_profile_data_smart(file_paths: list, max_file_size_mb: int = None) -> str: + """ + Smart data loader: selects chunked profiling for large files and full + profiling for small files based on a size threshold. + + Args: + file_paths: List of file paths to profile. + max_file_size_mb: Size threshold in MB. Files larger than this use + chunked profiling. Defaults to ``app_config.max_file_size_mb``. + + Returns: + A markdown string containing the combined data profile. + """ + if max_file_size_mb is None: + max_file_size_mb = app_config.max_file_size_mb + + profile_summary = "# 数据画像报告 (Data Profile)\n\n" + + if not file_paths: + return profile_summary + "未提供数据文件。" + + for file_path in file_paths: + if not os.path.exists(file_path): + profile_summary += f"## 文件: {os.path.basename(file_path)}\n\n" + profile_summary += f"[WARN] 文件不存在: {file_path}\n\n" + continue + + try: + file_size_mb = os.path.getsize(file_path) / (1024 * 1024) + if file_size_mb > max_file_size_mb: + profile_summary += _profile_chunked(file_path) + else: + # Use existing full-load profiling for this single file + profile_summary += load_and_profile_data([file_path]).replace( + "# 数据画像报告 (Data Profile)\n\n", "" + ) + except Exception as e: + profile_summary += f"## 文件: {os.path.basename(file_path)}\n\n" + profile_summary += f"[ERROR] 读取或分析文件失败: {str(e)}\n\n" + + return profile_summary + + def load_data_with_cache(file_path: str, force_reload: bool = False) -> Optional[pd.DataFrame]: """ 带缓存的数据加载 diff --git a/utils/data_privacy.py b/utils/data_privacy.py index 258016a..485cbbe 100644 --- a/utils/data_privacy.py +++ b/utils/data_privacy.py @@ -154,6 +154,82 @@ def sanitize_execution_feedback(feedback: str, max_lines: int = 30) -> str: return "\n".join(safe_lines) +def _extract_column_from_error(error_message: str) -> Optional[str]: + """Extract column name from error message patterns like KeyError: 'col_name'. + + Supports: + - KeyError: 'column_name' or KeyError: "column_name" + - column 'column_name' or column "column_name" (case-insensitive) + + Returns: + The extracted column name, or None if no column reference is found. + """ + match = re.search(r"KeyError:\s*['\"](.+?)['\"]", error_message) + if match: + return match.group(1) + match = re.search(r"column\s+['\"](.+?)['\"]", error_message, re.IGNORECASE) + if match: + return match.group(1) + return None + + +def _lookup_column_in_profile(column_name: Optional[str], safe_profile: str) -> Optional[dict]: + """Look up column metadata in the safe profile markdown table. + + Parses the markdown table rows produced by build_safe_profile() and returns + a dict with keys: dtype, null_rate, unique_count, description. + + Args: + column_name: The column name to look up (may be None). + safe_profile: The safe profile markdown string. + + Returns: + A dict of column metadata, or None if not found. + """ + if not column_name: + return None + for line in safe_profile.split("\n"): + if line.startswith("|") and column_name in line: + parts = [p.strip() for p in line.split("|") if p.strip()] + if len(parts) >= 5 and parts[0] == column_name: + return { + "dtype": parts[1], + "null_rate": parts[2], + "unique_count": parts[3], + "description": parts[4], + } + return None + + +def generate_enriched_hint(error_message: str, safe_profile: str) -> str: + """Generate an enriched hint from the safe profile for a data-context error. + + Extracts the referenced column name from the error, looks it up in the safe + profile markdown table, and returns a hint string containing only schema-level + metadata — no real data values. + + Args: + error_message: The error message from code execution. + safe_profile: The safe profile markdown string. + + Returns: + A hint string with retry context and column metadata (if found). + """ + column_name = _extract_column_from_error(error_message) + column_meta = _lookup_column_in_profile(column_name, safe_profile) + + hint = "[RETRY CONTEXT] 上一次代码执行因数据上下文错误失败。\n" + hint += f"错误信息: {error_message}\n" + if column_meta: + hint += f"相关列 '{column_name}' 的结构信息:\n" + hint += f" - 数据类型: {column_meta['dtype']}\n" + hint += f" - 唯一值数量: {column_meta['unique_count']}\n" + hint += f" - 空值率: {column_meta['null_rate']}\n" + hint += f" - 特征描述: {column_meta['description']}\n" + hint += "请根据以上结构信息修正代码,不要假设具体的数据值。" + return hint + + def _load_dataframe(file_path: str): """加载 DataFrame,支持多种格式和编码""" import os diff --git a/web/main.py b/web/main.py index 42114de..4bd23ef 100644 --- a/web/main.py +++ b/web/main.py @@ -5,6 +5,7 @@ import threading import glob import uuid import json +import re from datetime import datetime from typing import Optional, Dict, List from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Query @@ -12,6 +13,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse, JSONResponse from pydantic import BaseModel +import pandas as pd # Add parent directory to path to import agent modules sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -48,6 +50,8 @@ class SessionData: self.generated_report: Optional[str] = None self.log_file: Optional[str] = None self.analysis_results: List[Dict] = [] # Store analysis results for gallery + self.rounds: List[Dict] = [] # Structured Round_Data objects + self.data_files: List[Dict] = [] # File metadata dicts self.agent: Optional[DataAnalysisAgent] = None # Store the agent instance for follow-up # 新增:进度跟踪 @@ -128,7 +132,15 @@ class SessionManager: if os.path.exists(results_json): try: with open(results_json, "r") as f: - session.analysis_results = json.load(f) + data = json.load(f) + # Support both old format (plain list) and new format (dict with rounds/data_files) + if isinstance(data, dict): + session.analysis_results = data.get("analysis_results", []) + session.rounds = data.get("rounds", []) + session.data_files = data.get("data_files", []) + else: + # Legacy format: data is the analysis_results list directly + session.analysis_results = data except: pass @@ -190,7 +202,7 @@ app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs") # --- Helper Functions --- -def run_analysis_task(session_id: str, files: list, user_requirement: str, is_followup: bool = False): +def run_analysis_task(session_id: str, files: list, user_requirement: str, is_followup: bool = False, template_name: str = None): """在后台线程中运行分析任务""" session = session_manager.get_session(session_id) if not session: @@ -220,11 +232,22 @@ def run_analysis_task(session_id: str, files: list, user_requirement: str, is_fo agent = DataAnalysisAgent(llm_config, force_max_rounds=False, output_dir=base_output_dir) session.agent = agent + # Wire progress callback to update session progress fields + def progress_cb(current, total, message): + session.current_round = current + session.max_rounds = total + session.progress_percentage = round((current / total) * 100, 1) if total > 0 else 0 + session.status_message = message + + agent.set_progress_callback(progress_cb) + agent.set_session_ref(session) + result = agent.analyze( user_input=user_requirement, files=files, session_output_dir=session_output_dir, reset_session=True, + template_name=template_name, ) else: agent = session.agent @@ -232,6 +255,16 @@ def run_analysis_task(session_id: str, files: list, user_requirement: str, is_fo print("Error: Agent not initialized for follow-up.") return + # Wire progress callback for follow-up sessions + def progress_cb_followup(current, total, message): + session.current_round = current + session.max_rounds = total + session.progress_percentage = round((current / total) * 100, 1) if total > 0 else 0 + session.status_message = message + + agent.set_progress_callback(progress_cb_followup) + agent.set_session_ref(session) + result = agent.analyze( user_input=user_requirement, files=None, @@ -246,7 +279,11 @@ def run_analysis_task(session_id: str, files: list, user_requirement: str, is_fo # 持久化结果 with open(os.path.join(session_output_dir, "results.json"), "w") as f: - json.dump(session.analysis_results, f, default=str) + json.dump({ + "analysis_results": session.analysis_results, + "rounds": session.rounds, + "data_files": session.data_files, + }, f, default=str) except Exception as e: print(f"Error during analysis: {e}") @@ -260,6 +297,7 @@ def run_analysis_task(session_id: str, files: list, user_requirement: str, is_fo class StartRequest(BaseModel): requirement: str + template: Optional[str] = None class ChatRequest(BaseModel): session_id: str @@ -294,7 +332,7 @@ async def start_analysis(request: StartRequest, background_tasks: BackgroundTask files = [os.path.abspath(f) for f in files] # Only use absolute paths - background_tasks.add_task(run_analysis_task, session_id, files, request.requirement, is_followup=False) + background_tasks.add_task(run_analysis_task, session_id, files, request.requirement, is_followup=False, template_name=request.template) return {"status": "started", "session_id": session_id} @app.post("/api/chat") @@ -309,6 +347,19 @@ async def chat_analysis(request: ChatRequest, background_tasks: BackgroundTasks) background_tasks.add_task(run_analysis_task, request.session_id, [], request.message, is_followup=True) return {"status": "started"} +import math as _math + +def _sanitize_value(v): + """Replace NaN/inf with None for JSON safety.""" + if isinstance(v, float) and (_math.isnan(v) or _math.isinf(v)): + return None + if isinstance(v, dict): + return {k: _sanitize_value(val) for k, val in v.items()} + if isinstance(v, list): + return [_sanitize_value(item) for item in v] + return v + + @app.get("/api/status") async def get_status(session_id: str = Query(..., description="Session ID")): session = session_manager.get_session(session_id) @@ -320,13 +371,19 @@ async def get_status(session_id: str = Query(..., description="Session ID")): with open(session.log_file, "r", encoding="utf-8") as f: log_content = f.read() - return { + response_data = { "is_running": session.is_running, "log": log_content, "has_report": session.generated_report is not None, "report_path": session.generated_report, - "script_path": session.reusable_script # 新增:返回脚本路径 + "script_path": session.reusable_script, + "current_round": session.current_round, + "max_rounds": session.max_rounds, + "progress_percentage": session.progress_percentage, + "status_message": session.status_message, + "rounds": _sanitize_value(session.rounds), } + return JSONResponse(content=response_data) @app.get("/api/export") async def export_session(session_id: str = Query(..., description="Session ID")): @@ -394,8 +451,11 @@ async def get_report(session_id: str = Query(..., description="Session ID")): # 将报告按段落拆分,为前端润色功能提供结构化数据 paragraphs = _split_report_to_paragraphs(content) + + # Extract evidence annotations and build supporting_data mapping + supporting_data = _extract_evidence_annotations(paragraphs, session) - return {"content": content, "base_path": web_base_path, "paragraphs": paragraphs} + return {"content": content, "base_path": web_base_path, "paragraphs": paragraphs, "supporting_data": supporting_data} @app.get("/api/figures") async def get_figures(session_id: str = Query(..., description="Session ID")): @@ -467,6 +527,120 @@ async def download_script(session_id: str = Query(..., description="Session ID") # --- Tools API --- +@app.get("/api/templates") +async def list_available_templates(): + from utils.analysis_templates import list_templates + return {"templates": list_templates()} + + +# --- Data Files API --- + +@app.get("/api/data-files") +async def list_data_files(session_id: str = Query(..., description="Session ID")): + """Return session.data_files merged with fallback directory scan for CSV/XLSX files.""" + session = session_manager.get_session(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + # Start with known data_files from session + known_files = {f["filename"]: f for f in session.data_files} + + # Fallback directory scan for CSV/XLSX in output_dir + if session.output_dir and os.path.exists(session.output_dir): + # Collect original uploaded file basenames to exclude them + uploaded_basenames = set() + if hasattr(session, "file_list"): + for fp in session.file_list: + uploaded_basenames.add(os.path.basename(fp)) + + for pattern in ("*.csv", "*.xlsx"): + for fpath in glob.glob(os.path.join(session.output_dir, pattern)): + fname = os.path.basename(fpath) + if fname in uploaded_basenames: + continue + if fname not in known_files: + try: + size_bytes = os.path.getsize(fpath) + except OSError: + size_bytes = 0 + known_files[fname] = { + "filename": fname, + "description": "", + "rows": 0, + "cols": 0, + "size_bytes": size_bytes, + } + + return {"files": list(known_files.values())} + + +@app.get("/api/data-files/preview") +async def preview_data_file( + session_id: str = Query(..., description="Session ID"), + filename: str = Query(..., description="File name"), +): + """Read CSV/XLSX via pandas, return {columns, rows (first 5)}.""" + session = session_manager.get_session(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + if not session.output_dir: + raise HTTPException(status_code=404, detail=f"File not found: {filename}") + + file_path = os.path.join(session.output_dir, filename) + if not os.path.exists(file_path): + raise HTTPException(status_code=404, detail=f"File not found: {filename}") + + try: + if filename.lower().endswith(".xlsx"): + df = pd.read_excel(file_path, nrows=5) + else: + # Try utf-8-sig first (common for Chinese CSV exports), fall back to utf-8 + try: + df = pd.read_csv(file_path, nrows=5, encoding="utf-8-sig") + except UnicodeDecodeError: + try: + df = pd.read_csv(file_path, nrows=5, encoding="utf-8") + except UnicodeDecodeError: + df = pd.read_csv(file_path, nrows=5, encoding="gbk") + + columns = list(df.columns) + rows = df.head(5).to_dict(orient="records") + # Sanitize NaN/inf for JSON serialization + rows = [ + {k: (None if isinstance(v, float) and (_math.isnan(v) or _math.isinf(v)) else v) + for k, v in row.items()} + for row in rows + ] + return {"columns": columns, "rows": rows} + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to read file: {str(e)}") + + +@app.get("/api/data-files/download") +async def download_data_file( + session_id: str = Query(..., description="Session ID"), + filename: str = Query(..., description="File name"), +): + """Return FileResponse with correct MIME type.""" + session = session_manager.get_session(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + if not session.output_dir: + raise HTTPException(status_code=404, detail=f"File not found: {filename}") + + file_path = os.path.join(session.output_dir, filename) + if not os.path.exists(file_path): + raise HTTPException(status_code=404, detail=f"File not found: {filename}") + + if filename.lower().endswith(".xlsx"): + media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + else: + media_type = "text/csv" + + return FileResponse(path=file_path, filename=filename, media_type=media_type) + # --- 新增API端点 --- @@ -597,6 +771,30 @@ def _split_report_to_paragraphs(markdown_content: str) -> list: return paragraphs +def _extract_evidence_annotations(paragraphs: list, session) -> dict: + """Parse annotations from paragraph content. + + For each paragraph containing an evidence annotation, look up + session.rounds[N-1].evidence_rows and build a supporting_data mapping + keyed by paragraph ID. + """ + supporting_data = {} + evidence_pattern = re.compile(r"") + + for para in paragraphs: + content = para.get("content", "") + match = evidence_pattern.search(content) + if match: + round_num = int(match.group(1)) + # rounds are 1-indexed, list is 0-indexed + idx = round_num - 1 + if 0 <= idx < len(session.rounds): + evidence_rows = session.rounds[idx].get("evidence_rows", []) + if evidence_rows: + supporting_data[para["id"]] = evidence_rows + return supporting_data + + class PolishRequest(BaseModel): session_id: str paragraph_id: str diff --git a/web/static/clean_style.css b/web/static/clean_style.css index cea4780..6506574 100644 --- a/web/static/clean_style.css +++ b/web/static/clean_style.css @@ -322,6 +322,51 @@ body { font-size: 0.875rem; } +/* Template Cards */ +.template-cards { + display: flex; + flex-wrap: wrap; + gap: 0.35rem; +} + +.template-card { + flex: 1 1 calc(50% - 0.35rem); + min-width: 100px; + padding: 0.35rem 0.5rem; + border: 1px solid var(--border-color); + border-radius: 0.25rem; + cursor: pointer; + transition: all 0.15s; + background: #FFFFFF; +} + +.template-card:hover { + border-color: var(--primary-color); + background: #F0F7FF; +} + +.template-card.selected { + border-color: var(--primary-color); + background: #EFF6FF; + box-shadow: 0 0 0 1px rgba(37, 99, 235, 0.15); +} + +.template-card-title { + font-size: 0.78rem; + font-weight: 600; + color: var(--text-primary); + line-height: 1.2; +} + +.template-card-desc { + font-size: 0.68rem; + color: var(--text-secondary); + line-height: 1.2; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + /* Upload Area */ .upload-area { border: 2px dashed var(--border-color); @@ -404,15 +449,14 @@ body { flex-direction: column; } -#logsTab { - background-color: #1a1b26; - color: #a9b1d6; - font-family: 'JetBrains Mono', 'Menlo', 'Monaco', 'Courier New', monospace; - padding: 1.5rem; +/* Execution Tab */ +#executionTab { + background-color: #F9FAFB; + padding: 0.5rem; } .log-content { - font-family: inherit; + font-family: 'JetBrains Mono', 'Menlo', 'Monaco', 'Courier New', monospace; font-size: 0.85rem; white-space: pre-wrap; line-height: 1.6; @@ -444,96 +488,340 @@ body { display: none !important; } -/* Gallery Carousel */ -.carousel-container { - position: relative; - width: 100%; - flex: 1; - display: flex; - align-items: center; - justify-content: center; - background: #F3F4F6; - border-radius: 0.5rem; - overflow: hidden; - margin-bottom: 1rem; -} +/* ===== Round Card Styles ===== */ -.carousel-slide { - width: 100%; - height: 100%; +.round-cards-wrapper { display: flex; flex-direction: column; - align-items: center; - justify-content: center; - padding: 2rem; + gap: 0.75rem; + padding: 0.5rem; } -.carousel-slide img { - max-width: 100%; - max-height: 500px; - object-fit: contain; - border-radius: 0.25rem; - box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1); - transition: transform 0.2s; - background: white; -} - -.carousel-btn { - position: absolute; - top: 50%; - transform: translateY(-50%); - background: rgba(255, 255, 255, 0.9); +.round-card { border: 1px solid var(--border-color); - border-radius: 50%; - width: 44px; - height: 44px; + border-radius: 0.5rem; + background: #FFFFFF; + overflow: hidden; + transition: box-shadow 0.2s; +} + +.round-card:hover { + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08); +} + +.round-card.expanded { + border-color: var(--primary-color); + box-shadow: 0 2px 12px rgba(37, 99, 235, 0.1); +} + +.round-card-header { display: flex; align-items: center; - justify-content: center; + gap: 0.75rem; + padding: 0.75rem 1rem; cursor: pointer; - z-index: 10; - color: var(--text-primary); - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); - transition: all 0.2s; + background: #F9FAFB; + transition: background 0.2s; } -.carousel-btn:hover { - background: var(--primary-color); - color: white; - border-color: var(--primary-color); - transform: translateY(-50%) scale(1.1); +.round-card-header:hover { + background: #F0F7FF; } -.carousel-btn.prev { - left: 1rem; -} - -.carousel-btn.next { - right: 1rem; -} - -.image-info { - width: 100%; - text-align: center; - color: var(--text-primary); - background: white; - padding: 1rem; - border-radius: 0.5rem; - border: 1px solid var(--border-color); -} - -.image-title { +.round-number { font-weight: 600; - font-size: 1.1rem; - margin-bottom: 0.5rem; + font-size: 0.85rem; + color: var(--primary-color); + white-space: nowrap; +} + +.round-summary { + flex: 1; + font-size: 0.85rem; + color: var(--text-secondary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.round-toggle-icon { + color: var(--text-secondary); + font-size: 0.75rem; + transition: transform 0.2s; +} + +.round-card-body { + padding: 1rem; + border-top: 1px solid var(--border-color); + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.round-section { + display: flex; + flex-direction: column; + gap: 0.35rem; +} + +.round-section-title { + font-size: 0.8rem; + font-weight: 600; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.03em; +} + +.round-reasoning { + font-size: 0.9rem; + color: var(--text-primary); + line-height: 1.6; + white-space: pre-wrap; +} + +.round-result { + font-size: 0.9rem; + color: var(--text-primary); +} + +.round-details { + border: 1px solid var(--border-color); + border-radius: 0.375rem; + overflow: hidden; +} + +.round-details summary { + padding: 0.5rem 0.75rem; + font-size: 0.85rem; + font-weight: 500; + color: var(--text-secondary); + cursor: pointer; + background: #F9FAFB; + transition: background 0.2s; +} + +.round-details summary:hover { + background: #F0F7FF; color: var(--primary-color); } -.image-desc { - font-size: 0.9rem; +.round-code, +.round-raw-log { + margin: 0; + padding: 0.75rem; + font-family: 'JetBrains Mono', 'Menlo', 'Monaco', 'Courier New', monospace; + font-size: 0.8rem; + line-height: 1.5; + white-space: pre-wrap; + word-break: break-all; + background: #1a1b26; + color: #a9b1d6; + max-height: 300px; + overflow-y: auto; +} + +.evidence-table { + width: 100%; + border-collapse: collapse; + font-size: 0.8rem; + margin-top: 0.25rem; +} + +.evidence-table th, +.evidence-table td { + border: 1px solid var(--border-color); + padding: 0.35rem 0.5rem; + text-align: left; +} + +.evidence-table th { + background: #F3F4F6; + font-weight: 600; + color: var(--text-primary); + white-space: nowrap; +} + +.evidence-table td { color: var(--text-secondary); } +/* ===== Data File Card Styles ===== */ + +.file-cards-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); + gap: 0.75rem; + padding: 0.5rem; +} + +.data-file-card { + display: flex; + align-items: center; + gap: 0.75rem; + padding: 0.75rem 1rem; + border: 1px solid var(--border-color); + border-radius: 0.5rem; + background: #FFFFFF; + cursor: pointer; + transition: all 0.2s; +} + +.data-file-card:hover { + border-color: var(--primary-color); + background: #F0F7FF; + box-shadow: 0 2px 8px rgba(37, 99, 235, 0.08); +} + +.data-file-icon { + font-size: 1.5rem; + color: var(--primary-color); + width: 2rem; + text-align: center; + flex-shrink: 0; +} + +.data-file-info { + flex: 1; + min-width: 0; +} + +.data-file-name { + font-size: 0.9rem; + font-weight: 500; + color: var(--text-primary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.data-file-desc { + font-size: 0.8rem; + color: var(--text-secondary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.data-preview-panel { + margin: 0.75rem 0.5rem; + border: 1px solid var(--border-color); + border-radius: 0.5rem; + background: #FFFFFF; + overflow: hidden; +} + +.data-preview-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.5rem 0.75rem; + background: #F9FAFB; + border-bottom: 1px solid var(--border-color); + font-size: 0.9rem; + font-weight: 500; +} + +.data-preview-table { + width: 100%; + border-collapse: collapse; + font-size: 0.8rem; +} + +.data-preview-table th, +.data-preview-table td { + border: 1px solid var(--border-color); + padding: 0.4rem 0.6rem; + text-align: left; +} + +.data-preview-table th { + background: #F3F4F6; + font-weight: 600; + white-space: nowrap; +} + +/* ===== Supporting Data Styles ===== */ + +.supporting-data-btn { + display: inline-flex; + align-items: center; + gap: 0.35rem; + padding: 0.25rem 0.6rem; + font-size: 0.78rem; + border: 1px solid #BFDBFE; + border-radius: 1rem; + background: #EFF6FF; + color: var(--primary-color); + cursor: pointer; + transition: all 0.2s; + margin-top: 0.35rem; +} + +.supporting-data-btn:hover { + background: var(--primary-color); + color: white; + border-color: var(--primary-color); +} + +.supporting-data-modal { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background: rgba(0, 0, 0, 0.4); + display: flex; + align-items: center; + justify-content: center; + z-index: 1000; +} + +.supporting-data-content { + background: #FFFFFF; + border-radius: 0.5rem; + box-shadow: 0 20px 60px rgba(0, 0, 0, 0.15); + max-width: 700px; + width: 90%; + max-height: 80vh; + display: flex; + flex-direction: column; + overflow: hidden; +} + +.supporting-data-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.75rem 1rem; + border-bottom: 1px solid var(--border-color); + font-weight: 600; + font-size: 1rem; +} + +.supporting-data-header button { + background: none; + border: none; + font-size: 1.25rem; + cursor: pointer; + color: var(--text-secondary); + padding: 0.25rem; + line-height: 1; +} + +.supporting-data-header button:hover { + color: var(--text-primary); +} + +.supporting-data-body { + padding: 1rem; + overflow-y: auto; + flex: 1; +} + +.supporting-data-body .evidence-table { + font-size: 0.85rem; +} + /* ===== Report Paragraph Polishing ===== */ @@ -765,3 +1053,56 @@ body { grid-template-columns: 1fr; } } + + +/* ===== Progress Bar ===== */ + +.progress-bar-container { + padding: 0.5rem 0.75rem; + background: #F9FAFB; + border: 1px solid var(--border-color); + border-radius: 0.375rem; + margin-bottom: 0.5rem; +} + +.progress-bar-info { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 0.35rem; +} + +.progress-label { + font-size: 0.8rem; + font-weight: 600; + color: var(--text-primary); +} + +.progress-percent { + font-size: 0.8rem; + font-weight: 600; + color: var(--primary-color); +} + +.progress-bar-track { + width: 100%; + height: 8px; + background: #E5E7EB; + border-radius: 4px; + overflow: hidden; +} + +.progress-bar-fill { + height: 100%; + background: linear-gradient(90deg, var(--primary-color), #60A5FA); + border-radius: 4px; + transition: width 0.5s ease; + min-width: 0; +} + +.progress-message { + font-size: 0.75rem; + color: var(--text-secondary); + margin-top: 0.25rem; + min-height: 1em; +} diff --git a/web/static/index.html b/web/static/index.html index c24e979..167b781 100644 --- a/web/static/index.html +++ b/web/static/index.html @@ -76,8 +76,19 @@ +
Waiting to start...+ +
Waiting to start...
+