Add web session analysis platform with follow-up topics

This commit is contained in:
2026-03-09 22:23:00 +08:00
commit 17ce711e49
30 changed files with 10681 additions and 0 deletions

242
webapp/api.py Normal file
View File

@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
"""
FastAPI application for the data analysis platform.
"""
import os
import re
import uuid
from typing import List, Optional
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
from webapp.session_manager import SessionManager
from webapp.storage import Storage, utcnow_iso
from webapp.task_runner import TaskRunner
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")
UPLOADS_DIR = os.path.join(RUNTIME_DIR, "uploads")
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
DB_PATH = os.path.join(RUNTIME_DIR, "analysis_platform.db")
os.makedirs(UPLOADS_DIR, exist_ok=True)
os.makedirs(OUTPUTS_DIR, exist_ok=True)
storage = Storage(DB_PATH)
session_manager = SessionManager(OUTPUTS_DIR)
task_runner = TaskRunner(
storage=storage,
uploads_dir=UPLOADS_DIR,
outputs_dir=OUTPUTS_DIR,
session_manager=session_manager,
max_workers=2,
)
app = FastAPI(title="Data Analysis Platform API")
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
class CreateSessionRequest(BaseModel):
user_id: str = Field(..., min_length=1)
title: str = Field(..., min_length=1)
query: str = Field(..., min_length=1)
file_ids: List[str]
template_file_id: Optional[str] = None
class CreateTopicRequest(BaseModel):
user_id: str = Field(..., min_length=1)
query: str = Field(..., min_length=1)
def sanitize_filename(filename: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", filename).strip("._")
return cleaned or "upload.bin"
def ensure_session_access(session_id: str, user_id: str) -> dict:
session = storage.get_session(session_id, user_id)
if not session:
raise HTTPException(status_code=404, detail="Session not found")
return session
def ensure_task_access(task_id: str, user_id: str) -> dict:
task = storage.get_task(task_id, user_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return task
@app.get("/health")
def health():
return {"status": "ok"}
@app.get("/", response_class=HTMLResponse)
def index():
index_path = os.path.join(STATIC_DIR, "index.html")
with open(index_path, "r", encoding="utf-8") as f:
return HTMLResponse(f.read())
@app.post("/files/upload")
async def upload_files(
user_id: str = Form(...),
files: List[UploadFile] = File(...),
):
saved = []
user_dir = os.path.join(UPLOADS_DIR, user_id)
os.makedirs(user_dir, exist_ok=True)
for upload in files:
safe_name = sanitize_filename(upload.filename or "upload.bin")
stored_path = os.path.join(user_dir, f"{uuid.uuid4()}_{safe_name}")
with open(stored_path, "wb") as f:
while True:
chunk = await upload.read(1024 * 1024)
if not chunk:
break
f.write(chunk)
saved.append(storage.create_uploaded_file(user_id, upload.filename or safe_name, stored_path))
return {"files": saved}
@app.get("/files")
def list_files(user_id: str = Query(...)):
return {"files": storage.list_all_uploaded_files(user_id)}
@app.post("/sessions")
def create_session(request: CreateSessionRequest):
if not storage.list_uploaded_files(request.file_ids, request.user_id):
raise HTTPException(status_code=400, detail="No valid files found for session")
session = storage.create_session(
user_id=request.user_id,
title=request.title,
uploaded_file_ids=request.file_ids,
template_file_id=request.template_file_id,
)
task = storage.create_task(
session_id=session["id"],
user_id=request.user_id,
query=request.query,
uploaded_file_ids=request.file_ids,
template_file_id=request.template_file_id,
)
task_runner.submit(task["id"], request.user_id)
return {"session": session, "task": task}
@app.get("/sessions")
def list_sessions(user_id: str = Query(...)):
return {"sessions": storage.list_sessions(user_id)}
@app.get("/sessions/{session_id}")
def get_session(session_id: str, user_id: str = Query(...)):
session = ensure_session_access(session_id, user_id)
tasks = storage.list_session_tasks(session_id, user_id)
return {"session": session, "tasks": tasks}
@app.post("/sessions/{session_id}/topics")
def create_followup_topic(session_id: str, request: CreateTopicRequest):
session = ensure_session_access(session_id, request.user_id)
if session["status"] == "closed":
raise HTTPException(status_code=400, detail="Session is closed")
task = storage.create_task(
session_id=session_id,
user_id=request.user_id,
query=request.query,
uploaded_file_ids=session["uploaded_file_ids"],
template_file_id=session.get("template_file_id"),
)
task_runner.submit(task["id"], request.user_id)
return {"session": session, "task": task}
@app.post("/sessions/{session_id}/close")
def close_session(session_id: str, user_id: str = Query(...)):
session = ensure_session_access(session_id, user_id)
storage.update_session(session_id, status="closed", closed_at=utcnow_iso())
session_manager.close(session_id)
return {"session": storage.get_session(session_id, user_id)}
@app.get("/tasks")
def list_tasks(user_id: str = Query(...)):
return {"tasks": storage.list_tasks(user_id)}
@app.get("/tasks/{task_id}")
def get_task(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
return {"task": task}
@app.get("/tasks/{task_id}/report")
def get_task_report(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
report_path = task.get("report_file_path")
if not report_path or not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not available")
return FileResponse(report_path, media_type="text/markdown", filename=os.path.basename(report_path))
@app.get("/tasks/{task_id}/report/content")
def get_task_report_content(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
report_path = task.get("report_file_path")
if not report_path or not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not available")
with open(report_path, "r", encoding="utf-8") as f:
return {"content": f.read(), "filename": os.path.basename(report_path)}
@app.get("/tasks/{task_id}/artifacts")
def list_task_artifacts(task_id: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
session_output_dir = task.get("session_output_dir")
if not session_output_dir or not os.path.isdir(session_output_dir):
return {"artifacts": []}
artifacts = []
for name in sorted(os.listdir(session_output_dir)):
path = os.path.join(session_output_dir, name)
if not os.path.isfile(path):
continue
artifacts.append(
{
"name": name,
"size": os.path.getsize(path),
"is_image": name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")),
"url": f"/tasks/{task_id}/artifacts/{name}?user_id={user_id}",
}
)
return {"artifacts": artifacts}
@app.get("/tasks/{task_id}/artifacts/{artifact_name}")
def get_artifact(task_id: str, artifact_name: str, user_id: str = Query(...)):
task = ensure_task_access(task_id, user_id)
session_output_dir = task.get("session_output_dir")
if not session_output_dir:
raise HTTPException(status_code=404, detail="Artifact directory not available")
artifact_path = os.path.realpath(os.path.join(session_output_dir, artifact_name))
session_root = os.path.realpath(session_output_dir)
if artifact_path != session_root and not artifact_path.startswith(session_root + os.sep):
raise HTTPException(status_code=400, detail="Invalid artifact path")
if not os.path.exists(artifact_path):
raise HTTPException(status_code=404, detail="Artifact not found")
return FileResponse(artifact_path, filename=os.path.basename(artifact_path))