diff --git a/backend/task_scheduler/app/main.py b/backend/task_scheduler/app/main.py index ca5bca8..4d7b137 100644 --- a/backend/task_scheduler/app/main.py +++ b/backend/task_scheduler/app/main.py @@ -412,9 +412,8 @@ async def _async_do_signin(account_id: str, cron_expr: str = ""): log_entries = [] async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client: - # 预获取 XSRF token - await client.get("https://weibo.com/", headers=WEIBO_HEADERS, cookies=cookies) - xsrf = client.cookies.get("XSRF-TOKEN", "") + # 直接从 Cookie 获取 XSRF token,不访问首页 + xsrf = cookies.get("XSRF-TOKEN", "") logger.info(f"📦 预加载完成: account={account_id}, topics={len(topics)}, xsrf={'有' if xsrf else '无'}") @@ -478,8 +477,8 @@ async def _async_do_signin(account_id: str, cron_expr: str = ""): async def _fetch_topics(cookies: dict) -> list: """ 获取关注的超话列表。 - 带重试机制:微博可能触发风控,重试 3 次。 - 返回空列表表示获取失败(Cookie 失效或风控)。 + 直接请求 AJAX API,不访问首页(避免 SSO 重定向导致误判 Cookie 失效)。 + 带重试机制,重试 3 次。 """ import httpx @@ -487,35 +486,14 @@ async def _fetch_topics(cookies: dict) -> list: for attempt in range(max_retries): topics = [] try: - async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client: - resp = await client.get("https://weibo.com/", headers=WEIBO_HEADERS, cookies=cookies) - final_url = str(resp.url) - - # 最终落地在登录页 = Cookie 失效或风控 - if "login.sina.com.cn" in final_url or "passport.weibo.com" in final_url: - if attempt < max_retries - 1: - wait = (attempt + 1) * 3 - logger.warning(f"被重定向到登录页(尝试 {attempt+1}/{max_retries}),{wait}秒后重试... url={final_url[:120]}") - await asyncio.sleep(wait) - continue - else: - logger.warning(f"多次重试仍被重定向到登录页,Cookie 已失效: {final_url[:120]}") - return [] - - # 检查响应内容是否是通行证验证页(HTML 而非正常页面) - content_type = resp.headers.get("content-type", "") - if "text/html" in content_type and "通行证" in resp.text[:500]: - if attempt < max_retries - 1: - wait = (attempt + 1) * 3 - logger.warning(f"触发通行证验证(尝试 {attempt+1}/{max_retries}),{wait}秒后重试...") - await asyncio.sleep(wait) - continue - else: - logger.warning("多次重试仍触发通行证验证") - return [] - - xsrf = client.cookies.get("XSRF-TOKEN", "") - headers = {**WEIBO_HEADERS, "X-Requested-With": "XMLHttpRequest"} + async with httpx.AsyncClient(timeout=20, follow_redirects=False) as client: + # 直接请求超话 API,不经过首页 + # XSRF-TOKEN 从 Cookie 中的 XSRF-TOKEN 或通过 SUB 推导 + xsrf = cookies.get("XSRF-TOKEN", "") + headers = { + **WEIBO_HEADERS, + "X-Requested-With": "XMLHttpRequest", + } if xsrf: headers["X-XSRF-TOKEN"] = xsrf @@ -527,27 +505,33 @@ async def _fetch_topics(cookies: dict) -> list: headers=headers, cookies=cookies, ) - # 超话接口被重定向到登录页 - final_url = str(resp.url) - if "login.sina.com.cn" in final_url or "passport" in final_url: + # 被 302 重定向 = Cookie 失效 + if resp.status_code in (301, 302): + location = resp.headers.get("location", "") + logger.warning(f"超话API被重定向(尝试 {attempt+1}/{max_retries}): {location[:120]}") if attempt < max_retries - 1: - logger.warning(f"超话接口被重定向(尝试 {attempt+1}/{max_retries}),重试...") await asyncio.sleep((attempt + 1) * 3) break - else: - return [] + return [] try: data = resp.json() except Exception: - logger.warning(f"超话列表响应非 JSON(尝试 {attempt+1}): {resp.text[:200]}") + logger.warning(f"超话列表响应非 JSON(尝试 {attempt+1}): status={resp.status_code}, body={resp.text[:200]}") if attempt < max_retries - 1: await asyncio.sleep((attempt + 1) * 3) break return [] if data.get("ok") != 1: + # ok != 1 可能是未登录,也可能是没有超话 + if page == 1: + logger.warning(f"超话API返回 ok={data.get('ok')}(尝试 {attempt+1}): {str(data)[:200]}") + if attempt < max_retries - 1: + await asyncio.sleep((attempt + 1) * 3) + break break + tlist = data.get("data", {}).get("list", []) if not tlist: break diff --git a/test_fetch_topics.py b/test_fetch_topics.py index bb9f072..7da59ee 100644 --- a/test_fetch_topics.py +++ b/test_fetch_topics.py @@ -73,25 +73,14 @@ async def main(): await engine.dispose() - # 测试 1: 访问 weibo.com - print("\n--- 测试 1: GET https://weibo.com/ ---") - async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client: - resp = await client.get("https://weibo.com/", headers=WEIBO_HEADERS, cookies=cookies) - final_url = str(resp.url) - print(f" 状态码: {resp.status_code}") - print(f" 最终URL: {final_url}") - print(f" 是否登录页: {'login.sina.com.cn' in final_url or 'passport' in final_url}") - - if "login.sina.com.cn" in final_url or "passport" in final_url: - print("\n❌ Cookie 已失效,被重定向到登录页") - return - - xsrf = client.cookies.get("XSRF-TOKEN", "") - print(f" XSRF-TOKEN: {'有' if xsrf else '无'}") - - # 测试 2: 获取超话列表 - print("\n--- 测试 2: 获取超话列表 ---") - headers = {**WEIBO_HEADERS, "X-Requested-With": "XMLHttpRequest"} + # 测试 1: 直接请求超话 API(不经过首页) + print("\n--- 测试 1: 直接请求超话 API ---") + async with httpx.AsyncClient(timeout=20, follow_redirects=False) as client: + xsrf = cookies.get("XSRF-TOKEN", "") + headers = { + **WEIBO_HEADERS, + "X-Requested-With": "XMLHttpRequest", + } if xsrf: headers["X-XSRF-TOKEN"] = xsrf @@ -101,7 +90,19 @@ async def main(): headers=headers, cookies=cookies, ) print(f" 状态码: {resp.status_code}") - print(f" 最终URL: {resp.url}") + print(f" URL: {resp.url}") + + if resp.status_code in (301, 302): + print(f" 重定向到: {resp.headers.get('location', '')[:120]}") + print("\n❌ Cookie 已失效(API 被重定向)") + + # 对比:访问首页看看 + print("\n--- 对比: GET https://weibo.com/ ---") + async with httpx.AsyncClient(timeout=20, follow_redirects=True) as c2: + r2 = await c2.get("https://weibo.com/", headers=WEIBO_HEADERS, cookies=cookies) + print(f" 最终URL: {r2.url}") + print(f" 是否登录页: {'login.sina' in str(r2.url) or 'passport' in str(r2.url)}") + return try: data = resp.json() @@ -112,9 +113,11 @@ async def main(): title = t.get("topic_name", "") or t.get("title", "") print(f" - {title}") if topics: - print("\n✅ Cookie 有效,超话获取正常") + print("\n✅ Cookie 有效,超话获取正常(直接 API 模式)") + elif data.get("ok") == 1: + print("\n⚠️ Cookie 有效但没有关注超话") else: - print("\n⚠️ Cookie 可能有效但没有关注超话") + print(f"\n❌ API 返回异常: {str(data)[:200]}") except Exception as e: print(f" ❌ 响应非 JSON: {resp.text[:300]}") print(f"\n❌ 获取超话失败: {e}")