This commit is contained in:
2026-03-09 14:05:00 +08:00
commit 754e720ba7
105 changed files with 5890 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
# Weibo-HotSign Task Scheduler Service Dockerfile
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
default-libmysqlclient-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app/ ./app/
# Create non-root user for security
RUN groupadd -r appuser && useradd -r -g appuser appuser
USER appuser
# Expose port (optional, as scheduler doesn't need external access)
# EXPOSE 8000
# Start Celery Beat scheduler
CMD ["celery", "-A", "app.celery_app", "beat", "--loglevel=info"]

View File

@@ -0,0 +1,97 @@
"""
Weibo-HotSign Task Scheduler Service
Celery Beat configuration for scheduled sign-in tasks
"""
import os
from celery import Celery
from celery.schedules import crontab
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import asyncio
from datetime import datetime
from ..config import settings
# Create Celery app
celery_app = Celery(
"weibo_hot_sign_scheduler",
broker=settings.CELERY_BROKER_URL,
backend=settings.CELERY_RESULT_BACKEND,
include=["app.tasks.signin_tasks"]
)
# Celery configuration
celery_app.conf.update(
task_serializer="json",
accept_content=["json"],
result_serializer="json",
timezone="Asia/Shanghai",
enable_utc=True,
beat_schedule_filename="celerybeat-schedule",
beat_max_loop_interval=5,
)
# Database configuration for task scheduler
engine = create_async_engine(
settings.DATABASE_URL,
echo=settings.DEBUG,
pool_size=10,
max_overflow=20
)
AsyncSessionLocal = sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False
)
async def get_db():
"""Get database session for task scheduler"""
async with AsyncSessionLocal() as session:
try:
yield session
finally:
await session.close()
class TaskSchedulerService:
"""Service to manage scheduled tasks from database"""
def __init__(self):
self.engine = engine
async def load_scheduled_tasks(self):
"""Load enabled tasks from database and schedule them"""
from app.models.task_models import Task
try:
async with AsyncSessionLocal() as session:
# Query all enabled tasks
stmt = select(Task).where(Task.is_enabled == True)
result = await session.execute(stmt)
tasks = result.scalars().all()
print(f"📅 Loaded {len(tasks)} enabled tasks from database")
# Here we would dynamically add tasks to Celery Beat
# For now, we'll use static configuration in celery_config.py
return tasks
except Exception as e:
print(f"❌ Error loading tasks from database: {e}")
return []
# Synchronous wrapper for async function
def sync_load_tasks():
"""Synchronous wrapper to load tasks"""
service = TaskSchedulerService()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(service.load_scheduled_tasks())
finally:
loop.close()
# Import task modules to register them
from app.tasks import signin_tasks

View File

@@ -0,0 +1,47 @@
"""
Configuration for Task Scheduler Service
"""
import os
from pydantic_settings import BaseSettings
from typing import List
class Settings(BaseSettings):
"""Task Scheduler settings"""
# Database settings
DATABASE_URL: str = os.getenv(
"DATABASE_URL",
"mysql+aiomysql://weibo:123456789@43.134.68.207/weibo"
)
# Celery settings
CELERY_BROKER_URL: str = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0")
CELERY_RESULT_BACKEND: str = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/0")
# Task execution settings
MAX_CONCURRENT_TASKS: int = int(os.getenv("MAX_CONCURRENT_TASKS", "10"))
TASK_TIMEOUT_SECONDS: int = int(os.getenv("TASK_TIMEOUT_SECONDS", "300"))
# Scheduler settings
SCHEDULER_TIMEZONE: str = os.getenv("SCHEDULER_TIMEZONE", "Asia/Shanghai")
BEAT_SCHEDULE_FILE: str = os.getenv("BEAT_SCHEDULE_FILE", "/tmp/celerybeat-schedule")
# Retry settings
MAX_RETRY_ATTEMPTS: int = int(os.getenv("MAX_RETRY_ATTEMPTS", "3"))
RETRY_DELAY_SECONDS: int = int(os.getenv("RETRY_DELAY_SECONDS", "60"))
# Logging
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
DEBUG: bool = os.getenv("DEBUG", "False").lower() == "true"
# Service URLs
SIGNIN_EXECUTOR_URL: str = os.getenv("SIGNIN_EXECUTOR_URL", "http://signin-executor:8000")
PROXY_POOL_URL: str = os.getenv("PROXY_POOL_URL", "http://proxy-pool:8080")
BROWSER_AUTOMATION_URL: str = os.getenv("BROWSER_AUTOMATION_URL", "http://browser-automation:3001")
class Config:
case_sensitive = True
env_file = ".env"
settings = Settings()

View File

@@ -0,0 +1,196 @@
"""
Weibo-HotSign Sign-in Task Definitions
Celery tasks for scheduled sign-in operations
"""
import asyncio
import httpx
import json
import logging
from datetime import datetime
from typing import Dict, Any, Optional
from celery import current_task
from ..celery_app import celery_app
from ..config import settings
# Configure logger
logger = logging.getLogger(__name__)
@celery_app.task(bind=True, max_retries=3, default_retry_delay=60)
def execute_signin_task(self, task_id: str, account_id: str, cron_expression: str):
"""
Execute scheduled sign-in task for a specific account
This task is triggered by Celery Beat based on cron schedule
"""
logger.info(f"🎯 Starting sign-in task {task_id} for account {account_id}")
try:
# Update task status
current_task.update_state(
state="PROGRESS",
meta={
"current": 10,
"total": 100,
"status": "Initializing sign-in process...",
"account_id": account_id
}
)
# Call signin executor service
result = _call_signin_executor(account_id, task_id)
# Update task status
current_task.update_state(
state="SUCCESS",
meta={
"current": 100,
"total": 100,
"status": "Sign-in completed successfully",
"result": result,
"account_id": account_id
}
)
logger.info(f"✅ Sign-in task {task_id} completed successfully for account {account_id}")
return result
except Exception as exc:
logger.error(f"❌ Sign-in task {task_id} failed for account {account_id}: {exc}")
# Retry logic
if self.request.retries < settings.MAX_RETRY_ATTEMPTS:
logger.info(f"🔄 Retrying task {task_id} (attempt {self.request.retries + 1})")
raise self.retry(exc=exc, countdown=settings.RETRY_DELAY_SECONDS)
# Final failure
current_task.update_state(
state="FAILURE",
meta={
"current": 100,
"total": 100,
"status": f"Task failed after {settings.MAX_RETRY_ATTEMPTS} attempts",
"error": str(exc),
"account_id": account_id
}
)
raise exc
@celery_app.task
def schedule_daily_signin():
"""
Daily sign-in task - example of scheduled task
Can be configured in Celery Beat schedule
"""
logger.info("📅 Executing daily sign-in schedule")
# This would typically query database for accounts that need daily sign-in
# For demo purposes, we'll simulate processing multiple accounts
accounts = ["account_1", "account_2", "account_3"] # Mock account IDs
results = []
for account_id in accounts:
try:
# Submit individual sign-in task for each account
task = execute_signin_task.delay(
task_id=f"daily_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
account_id=account_id,
cron_expression="0 8 * * *" # Daily at 8 AM
)
results.append({
"account_id": account_id,
"task_id": task.id,
"status": "submitted"
})
except Exception as e:
logger.error(f"Failed to submit task for account {account_id}: {e}")
results.append({
"account_id": account_id,
"status": "failed",
"error": str(e)
})
return {
"scheduled_date": datetime.now().isoformat(),
"accounts_processed": len(accounts),
"results": results
}
@celery_app.task
def process_pending_tasks():
"""
Process pending sign-in tasks from database
This can be called manually or via external trigger
"""
logger.info("🔄 Processing pending sign-in tasks from database")
# In real implementation, this would:
# 1. Query database for tasks that need to be executed
# 2. Check if they're due based on cron expressions
# 3. Submit them to Celery for execution
try:
# Mock implementation - query enabled tasks
result = {
"processed_at": datetime.now().isoformat(),
"tasks_found": 5, # Mock number
"tasks_submitted": 3,
"tasks_skipped": 2,
"status": "completed"
}
logger.info(f"✅ Processed pending tasks: {result}")
return result
except Exception as e:
logger.error(f"❌ Failed to process pending tasks: {e}")
raise
def _call_signin_executor(account_id: str, task_id: str) -> Dict[str, Any]:
"""
Call the signin executor service to perform actual sign-in
"""
try:
signin_data = {
"task_id": task_id,
"account_id": account_id,
"timestamp": datetime.now().isoformat(),
"requested_by": "task_scheduler"
}
# Call signin executor service
with httpx.Client(timeout=30.0) as client:
response = client.post(
f"{settings.SIGNIN_EXECUTOR_URL}/api/v1/signin/execute",
json=signin_data
)
if response.status_code == 200:
result = response.json()
logger.info(f"Sign-in executor response: {result}")
return result
else:
raise Exception(f"Sign-in executor returned error: {response.status_code} - {response.text}")
except httpx.RequestError as e:
logger.error(f"Network error calling signin executor: {e}")
raise Exception(f"Failed to connect to signin executor: {e}")
except Exception as e:
logger.error(f"Error calling signin executor: {e}")
raise
# Periodic task definitions for Celery Beat
celery_app.conf.beat_schedule = {
"daily-signin-at-8am": {
"task": "app.tasks.signin_tasks.schedule_daily_signin",
"schedule": {
"hour": 8,
"minute": 0,
},
},
"process-pending-every-15-minutes": {
"task": "app.tasks.signin_tasks.process_pending_tasks",
"schedule": 900.0, # Every 15 minutes
},
}

View File

@@ -0,0 +1,18 @@
# Weibo-HotSign Task Scheduler Service Requirements
# Task Queue
celery==5.3.6
redis==5.0.1
# Database
sqlalchemy==2.0.23
aiomysql==0.2.0
PyMySQL==1.1.0
# Configuration
pydantic-settings==2.0.3
# HTTP Client
httpx==0.25.2
# Utilities
python-dotenv==1.0.0