扫码登录,获取cookies

This commit is contained in:
2026-03-09 16:10:29 +08:00
parent 754e720ba7
commit 8229208165
7775 changed files with 1150053 additions and 208 deletions

View File

@@ -4,22 +4,35 @@ Celery Beat configuration for scheduled sign-in tasks
"""
import os
from celery import Celery
from celery.schedules import crontab
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import sys
import asyncio
import logging
from typing import Dict, List
from datetime import datetime
from ..config import settings
from celery import Celery
from celery.schedules import crontab
from croniter import croniter
from sqlalchemy import select
# Add parent directory to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
from shared.models.base import AsyncSessionLocal
from shared.models.task import Task
from shared.models.account import Account
from shared.config import shared_settings
from .config import settings
logger = logging.getLogger(__name__)
# Create Celery app
celery_app = Celery(
"weibo_hot_sign_scheduler",
broker=settings.CELERY_BROKER_URL,
backend=settings.CELERY_RESULT_BACKEND,
include=["app.tasks.signin_tasks"]
include=["task_scheduler.app.tasks.signin_tasks"]
)
# Celery configuration
@@ -33,65 +46,168 @@ celery_app.conf.update(
beat_max_loop_interval=5,
)
# Database configuration for task scheduler
engine = create_async_engine(
settings.DATABASE_URL,
echo=settings.DEBUG,
pool_size=10,
max_overflow=20
)
AsyncSessionLocal = sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False
)
async def get_db():
"""Get database session for task scheduler"""
async with AsyncSessionLocal() as session:
try:
yield session
finally:
await session.close()
class TaskSchedulerService:
"""Service to manage scheduled tasks from database"""
def __init__(self):
self.engine = engine
self.scheduled_tasks: Dict[str, dict] = {}
async def load_scheduled_tasks(self):
"""Load enabled tasks from database and schedule them"""
from app.models.task_models import Task
async def load_scheduled_tasks(self) -> List[Task]:
"""
Load enabled tasks from database and register them to Celery Beat.
Returns list of loaded tasks.
"""
try:
async with AsyncSessionLocal() as session:
# Query all enabled tasks
stmt = select(Task).where(Task.is_enabled == True)
# Query all enabled tasks with their accounts
stmt = (
select(Task, Account)
.join(Account, Task.account_id == Account.id)
.where(Task.is_enabled == True)
)
result = await session.execute(stmt)
tasks = result.scalars().all()
task_account_pairs = result.all()
print(f"📅 Loaded {len(tasks)} enabled tasks from database")
logger.info(f"📅 Loaded {len(task_account_pairs)} enabled tasks from database")
# Here we would dynamically add tasks to Celery Beat
# For now, we'll use static configuration in celery_config.py
return tasks
# Register tasks to Celery Beat dynamically
beat_schedule = {}
for task, account in task_account_pairs:
try:
# Validate cron expression
if not croniter.is_valid(task.cron_expression):
logger.warning(f"Invalid cron expression for task {task.id}: {task.cron_expression}")
continue
# Create schedule entry
schedule_name = f"task_{task.id}"
beat_schedule[schedule_name] = {
"task": "task_scheduler.app.tasks.signin_tasks.execute_signin_task",
"schedule": self._parse_cron_to_celery(task.cron_expression),
"args": (task.id, task.account_id, task.cron_expression),
}
self.scheduled_tasks[task.id] = {
"task_id": task.id,
"account_id": task.account_id,
"cron_expression": task.cron_expression,
"account_status": account.status,
}
logger.info(f"✅ Registered task {task.id} for account {account.weibo_user_id} with cron: {task.cron_expression}")
except Exception as e:
logger.error(f"Failed to register task {task.id}: {e}")
continue
# Update Celery Beat schedule
celery_app.conf.beat_schedule.update(beat_schedule)
return [task for task, _ in task_account_pairs]
except Exception as e:
print(f"❌ Error loading tasks from database: {e}")
logger.error(f"❌ Error loading tasks from database: {e}")
return []
def _parse_cron_to_celery(self, cron_expression: str) -> crontab:
"""
Parse cron expression string to Celery crontab schedule.
Format: minute hour day month day_of_week
"""
parts = cron_expression.split()
if len(parts) != 5:
raise ValueError(f"Invalid cron expression: {cron_expression}")
return crontab(
minute=parts[0],
hour=parts[1],
day_of_month=parts[2],
month_of_year=parts[3],
day_of_week=parts[4],
)
async def add_task(self, task_id: str, account_id: str, cron_expression: str):
"""Dynamically add a new task to the schedule"""
try:
if not croniter.is_valid(cron_expression):
raise ValueError(f"Invalid cron expression: {cron_expression}")
schedule_name = f"task_{task_id}"
celery_app.conf.beat_schedule[schedule_name] = {
"task": "task_scheduler.app.tasks.signin_tasks.execute_signin_task",
"schedule": self._parse_cron_to_celery(cron_expression),
"args": (task_id, account_id, cron_expression),
}
self.scheduled_tasks[task_id] = {
"task_id": task_id,
"account_id": account_id,
"cron_expression": cron_expression,
}
logger.info(f"✅ Added task {task_id} to schedule")
except Exception as e:
logger.error(f"Failed to add task {task_id}: {e}")
raise
async def remove_task(self, task_id: str):
"""Dynamically remove a task from the schedule"""
try:
schedule_name = f"task_{task_id}"
if schedule_name in celery_app.conf.beat_schedule:
del celery_app.conf.beat_schedule[schedule_name]
logger.info(f"✅ Removed task {task_id} from schedule")
if task_id in self.scheduled_tasks:
del self.scheduled_tasks[task_id]
except Exception as e:
logger.error(f"Failed to remove task {task_id}: {e}")
raise
async def update_task(self, task_id: str, is_enabled: bool, cron_expression: str = None):
"""Update an existing task in the schedule"""
try:
if is_enabled:
# Re-add or update the task
async with AsyncSessionLocal() as session:
stmt = select(Task).where(Task.id == task_id)
result = await session.execute(stmt)
task = result.scalar_one_or_none()
if task:
await self.add_task(
task_id,
task.account_id,
cron_expression or task.cron_expression
)
else:
# Remove the task
await self.remove_task(task_id)
logger.info(f"✅ Updated task {task_id}, enabled={is_enabled}")
except Exception as e:
logger.error(f"Failed to update task {task_id}: {e}")
raise
# Global scheduler service instance
scheduler_service = TaskSchedulerService()
# Synchronous wrapper for async function
def sync_load_tasks():
"""Synchronous wrapper to load tasks"""
service = TaskSchedulerService()
"""Synchronous wrapper to load tasks on startup"""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(service.load_scheduled_tasks())
return loop.run_until_complete(scheduler_service.load_scheduled_tasks())
finally:
loop.close()
# Import task modules to register them
from app.tasks import signin_tasks
from .tasks import signin_tasks

View File

@@ -9,15 +9,9 @@ from typing import List
class Settings(BaseSettings):
"""Task Scheduler settings"""
# Database settings
DATABASE_URL: str = os.getenv(
"DATABASE_URL",
"mysql+aiomysql://weibo:123456789@43.134.68.207/weibo"
)
# Celery settings
CELERY_BROKER_URL: str = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0")
CELERY_RESULT_BACKEND: str = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/0")
CELERY_BROKER_URL: str = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
CELERY_RESULT_BACKEND: str = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
# Task execution settings
MAX_CONCURRENT_TASKS: int = int(os.getenv("MAX_CONCURRENT_TASKS", "10"))
@@ -40,6 +34,9 @@ class Settings(BaseSettings):
PROXY_POOL_URL: str = os.getenv("PROXY_POOL_URL", "http://proxy-pool:8080")
BROWSER_AUTOMATION_URL: str = os.getenv("BROWSER_AUTOMATION_URL", "http://browser-automation:3001")
# Redis pub/sub settings for task updates
REDIS_TASK_CHANNEL: str = os.getenv("REDIS_TASK_CHANNEL", "task_updates")
class Config:
case_sensitive = True
env_file = ".env"

View File

@@ -3,29 +3,143 @@ Weibo-HotSign Sign-in Task Definitions
Celery tasks for scheduled sign-in operations
"""
import os
import sys
import asyncio
import httpx
import json
import logging
import redis
from datetime import datetime
from typing import Dict, Any, Optional
from celery import current_task
from sqlalchemy import select
# Add parent directory to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from shared.models.base import AsyncSessionLocal
from shared.models.task import Task
from shared.models.account import Account
from shared.config import shared_settings
from ..celery_app import celery_app
from ..config import settings
# Configure logger
logger = logging.getLogger(__name__)
# Redis client for distributed locks (可选)
_redis_client = None
def get_redis_client():
"""获取 Redis 客户端,如果未启用则返回 None"""
global _redis_client
if not shared_settings.USE_REDIS:
return None
if _redis_client is None:
try:
_redis_client = redis.from_url(shared_settings.REDIS_URL, decode_responses=True)
except Exception as e:
logger.warning(f"Redis 连接失败: {e},分布式锁将被禁用")
return None
return _redis_client
# 内存锁(当 Redis 不可用时)
_memory_locks = {}
class DistributedLock:
"""分布式锁(支持 Redis 或内存模式)"""
def __init__(self, lock_key: str, timeout: int = 300):
"""
Initialize distributed lock
Args:
lock_key: Unique key for the lock
timeout: Lock timeout in seconds (default 5 minutes)
"""
self.lock_key = f"lock:{lock_key}"
self.timeout = timeout
self.acquired = False
self.redis_client = get_redis_client()
def acquire(self) -> bool:
"""
Acquire the lock using Redis SETNX or memory dict
Returns True if lock acquired, False otherwise
"""
try:
if self.redis_client:
# 使用 Redis
result = self.redis_client.set(self.lock_key, "1", nx=True, ex=self.timeout)
self.acquired = bool(result)
else:
# 使用内存锁(本地开发)
if self.lock_key not in _memory_locks:
_memory_locks[self.lock_key] = True
self.acquired = True
else:
self.acquired = False
return self.acquired
except Exception as e:
logger.error(f"Failed to acquire lock {self.lock_key}: {e}")
return False
def release(self):
"""Release the lock"""
if self.acquired:
try:
if self.redis_client:
# 使用 Redis
self.redis_client.delete(self.lock_key)
else:
# 使用内存锁
if self.lock_key in _memory_locks:
del _memory_locks[self.lock_key]
self.acquired = False
except Exception as e:
logger.error(f"Failed to release lock {self.lock_key}: {e}")
def __enter__(self):
"""Context manager entry"""
if not self.acquire():
raise Exception(f"Failed to acquire lock: {self.lock_key}")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit"""
self.release()
@celery_app.task(bind=True, max_retries=3, default_retry_delay=60)
def execute_signin_task(self, task_id: str, account_id: str, cron_expression: str):
"""
Execute scheduled sign-in task for a specific account
This task is triggered by Celery Beat based on cron schedule
Uses distributed lock to prevent duplicate execution
"""
logger.info(f"🎯 Starting sign-in task {task_id} for account {account_id}")
lock_key = f"signin_task:{task_id}:{account_id}"
lock = DistributedLock(lock_key, timeout=300)
# Try to acquire lock
if not lock.acquire():
logger.warning(f"⚠️ Task {task_id} for account {account_id} is already running, skipping")
return {
"status": "skipped",
"reason": "Task already running (distributed lock)",
"account_id": account_id,
"task_id": task_id
}
try:
logger.info(f"🎯 Starting sign-in task {task_id} for account {account_id}")
# Update task status
current_task.update_state(
state="PROGRESS",
@@ -37,6 +151,20 @@ def execute_signin_task(self, task_id: str, account_id: str, cron_expression: st
}
)
# Get account info from database
account_info = _get_account_from_db(account_id)
if not account_info:
raise Exception(f"Account {account_id} not found in database")
# Check if account is active
if account_info["status"] not in ["pending", "active"]:
logger.warning(f"Account {account_id} status is {account_info['status']}, skipping sign-in")
return {
"status": "skipped",
"reason": f"Account status is {account_info['status']}",
"account_id": account_id
}
# Call signin executor service
result = _call_signin_executor(account_id, task_id)
@@ -58,10 +186,15 @@ def execute_signin_task(self, task_id: str, account_id: str, cron_expression: st
except Exception as exc:
logger.error(f"❌ Sign-in task {task_id} failed for account {account_id}: {exc}")
# Retry logic
# Retry logic with exponential backoff
if self.request.retries < settings.MAX_RETRY_ATTEMPTS:
logger.info(f"🔄 Retrying task {task_id} (attempt {self.request.retries + 1})")
raise self.retry(exc=exc, countdown=settings.RETRY_DELAY_SECONDS)
retry_delay = settings.RETRY_DELAY_SECONDS * (2 ** self.request.retries)
logger.info(f"🔄 Retrying task {task_id} (attempt {self.request.retries + 1}) in {retry_delay}s")
# Release lock before retry
lock.release()
raise self.retry(exc=exc, countdown=retry_delay)
# Final failure
current_task.update_state(
@@ -75,74 +208,166 @@ def execute_signin_task(self, task_id: str, account_id: str, cron_expression: st
}
)
raise exc
finally:
# Always release lock
lock.release()
def _get_account_from_db(account_id: str) -> Optional[Dict[str, Any]]:
"""
Query account information from database (replaces mock data).
Returns account dict or None if not found.
"""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(_async_get_account(account_id))
finally:
loop.close()
async def _async_get_account(account_id: str) -> Optional[Dict[str, Any]]:
"""Async helper to query account from database"""
try:
async with AsyncSessionLocal() as session:
stmt = select(Account).where(Account.id == account_id)
result = await session.execute(stmt)
account = result.scalar_one_or_none()
if not account:
return None
return {
"id": account.id,
"user_id": account.user_id,
"weibo_user_id": account.weibo_user_id,
"remark": account.remark,
"status": account.status,
"encrypted_cookies": account.encrypted_cookies,
"iv": account.iv,
}
except Exception as e:
logger.error(f"Error querying account {account_id}: {e}")
return None
@celery_app.task
def schedule_daily_signin():
"""
Daily sign-in task - example of scheduled task
Can be configured in Celery Beat schedule
Daily sign-in task - queries database for enabled tasks
"""
logger.info("📅 Executing daily sign-in schedule")
# This would typically query database for accounts that need daily sign-in
# For demo purposes, we'll simulate processing multiple accounts
accounts = ["account_1", "account_2", "account_3"] # Mock account IDs
results = []
for account_id in accounts:
try:
# Submit individual sign-in task for each account
task = execute_signin_task.delay(
task_id=f"daily_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
account_id=account_id,
cron_expression="0 8 * * *" # Daily at 8 AM
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(_async_schedule_daily_signin())
finally:
loop.close()
async def _async_schedule_daily_signin():
"""Async helper to query and schedule tasks"""
try:
async with AsyncSessionLocal() as session:
# Query all enabled tasks
stmt = (
select(Task, Account)
.join(Account, Task.account_id == Account.id)
.where(Task.is_enabled == True)
.where(Account.status.in_(["pending", "active"]))
)
results.append({
"account_id": account_id,
"task_id": task.id,
"status": "submitted"
})
except Exception as e:
logger.error(f"Failed to submit task for account {account_id}: {e}")
results.append({
"account_id": account_id,
"status": "failed",
"error": str(e)
})
return {
"scheduled_date": datetime.now().isoformat(),
"accounts_processed": len(accounts),
"results": results
}
result = await session.execute(stmt)
task_account_pairs = result.all()
results = []
for task, account in task_account_pairs:
try:
# Submit individual sign-in task for each account
celery_task = execute_signin_task.delay(
task_id=task.id,
account_id=account.id,
cron_expression=task.cron_expression
)
results.append({
"account_id": account.id,
"task_id": celery_task.id,
"status": "submitted"
})
except Exception as e:
logger.error(f"Failed to submit task for account {account.id}: {e}")
results.append({
"account_id": account.id,
"status": "failed",
"error": str(e)
})
return {
"scheduled_date": datetime.now().isoformat(),
"accounts_processed": len(task_account_pairs),
"results": results
}
except Exception as e:
logger.error(f"Error in daily signin schedule: {e}")
raise
@celery_app.task
def process_pending_tasks():
"""
Process pending sign-in tasks from database
This can be called manually or via external trigger
Queries database for enabled tasks and submits them for execution
"""
logger.info("🔄 Processing pending sign-in tasks from database")
# In real implementation, this would:
# 1. Query database for tasks that need to be executed
# 2. Check if they're due based on cron expressions
# 3. Submit them to Celery for execution
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Mock implementation - query enabled tasks
result = {
"processed_at": datetime.now().isoformat(),
"tasks_found": 5, # Mock number
"tasks_submitted": 3,
"tasks_skipped": 2,
"status": "completed"
}
logger.info(f"✅ Processed pending tasks: {result}")
return result
return loop.run_until_complete(_async_process_pending_tasks())
finally:
loop.close()
async def _async_process_pending_tasks():
"""Async helper to process pending tasks"""
try:
async with AsyncSessionLocal() as session:
# Query enabled tasks that are due for execution
stmt = (
select(Task, Account)
.join(Account, Task.account_id == Account.id)
.where(Task.is_enabled == True)
.where(Account.status.in_(["pending", "active"]))
)
result = await session.execute(stmt)
task_account_pairs = result.all()
tasks_submitted = 0
tasks_skipped = 0
for task, account in task_account_pairs:
try:
# Submit task for execution
execute_signin_task.delay(
task_id=task.id,
account_id=account.id,
cron_expression=task.cron_expression
)
tasks_submitted += 1
except Exception as e:
logger.error(f"Failed to submit task {task.id}: {e}")
tasks_skipped += 1
result = {
"processed_at": datetime.now().isoformat(),
"tasks_found": len(task_account_pairs),
"tasks_submitted": tasks_submitted,
"tasks_skipped": tasks_skipped,
"status": "completed"
}
logger.info(f"✅ Processed pending tasks: {result}")
return result
except Exception as e:
logger.error(f"❌ Failed to process pending tasks: {e}")
raise