Files
assist/.kiro/skills/kb-audit/scripts/kb_audit.py

90 lines
2.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
知识库体检脚本
KnowledgeEntry 做简单统计 kb-audit Skill 调用
"""
import sys
from datetime import datetime, timedelta
from pathlib import Path
def add_project_root_to_path():
# 假定脚本位于 .claude/skills/kb-audit/scripts/ 下
script_path = Path(__file__).resolve()
project_root = script_path.parents[4]
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))
def main():
add_project_root_to_path()
from src.core.database import db_manager
from src.core.models import KnowledgeEntry
print("=== 知识库健康检查 ===\n")
with db_manager.get_session() as session:
total = session.query(KnowledgeEntry).count()
print(f"知识条目总数: {total}")
# 低置信度(<0.7
low_conf = (
session.query(KnowledgeEntry)
.filter(KnowledgeEntry.confidence_score.isnot(None))
.filter(KnowledgeEntry.confidence_score < 0.7)
.count()
)
print(f"低置信度条目数 (confidence_score < 0.7): {low_conf}")
# 使用次数极低usage_count < 3 或为 NULL
low_usage = (
session.query(KnowledgeEntry)
.filter(
(KnowledgeEntry.usage_count.is_(None))
| (KnowledgeEntry.usage_count < 3)
)
.count()
)
print(f"使用次数极低条目数 (usage_count < 3 或空): {low_usage}")
# 长期未更新(> 90 天)
cutoff = datetime.now() - timedelta(days=90)
old_entries = (
session.query(KnowledgeEntry)
.filter(
(KnowledgeEntry.updated_at.isnot(None))
& (KnowledgeEntry.updated_at < cutoff)
)
.count()
)
print(f"长期未更新条目数 (updated_at > 90 天未更新): {old_entries}")
print("\n示例问题条目(不含完整答案,仅展示前若干个):")
sample_entries = (
session.query(KnowledgeEntry)
.order_by(KnowledgeEntry.created_at.desc())
.limit(5)
.all()
)
for e in sample_entries:
q_preview = (e.question or "")[:40]
print(
f" ID={e.id}, category={e.category}, "
f"confidence={e.confidence_score}, usage={e.usage_count}, "
f"Q='{q_preview}...'"
)
print("\n提示:")
print(" - 建议优先审查低置信度且 usage_count 较高的条目;")
print(" - 对长期未更新且 usage_count 较高的条目,可考虑人工复查内容是否过时;")
print(" - 对 usage_count 极低且从未触发的条目,可考虑合并或归档。")
if __name__ == "__main__":
main()