docs: update README and CLAUDE.md to v2.2.0
- Added documentation for audit tracking (IP address, invocation method). - Updated database model descriptions for enhanced WorkOrder and Conversation fields. - Documented the new UnifiedConfig system. - Reflected enhanced logging transparency for knowledge base parsing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1 +0,0 @@
|
||||
# 知识库模块
|
||||
Binary file not shown.
BIN
src/knowledge_base/__pycache__/knowledge_manager.cpython-310.pyc
Normal file
BIN
src/knowledge_base/__pycache__/knowledge_manager.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class KnowledgeManager:
|
||||
"""知识库管理器"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.llm_client = QwenClient()
|
||||
self.vectorizer = TfidfVectorizer(
|
||||
@@ -24,22 +24,25 @@ class KnowledgeManager:
|
||||
ngram_range=(1, 2)
|
||||
)
|
||||
self._load_vectorizer()
|
||||
|
||||
|
||||
def _load_vectorizer(self):
|
||||
"""加载向量化器"""
|
||||
try:
|
||||
logger.info("正在初始化知识库向量化器...")
|
||||
with db_manager.get_session() as session:
|
||||
entries = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.is_active == True
|
||||
).all()
|
||||
|
||||
|
||||
if entries:
|
||||
texts = [entry.question + " " + entry.answer for entry in entries]
|
||||
self.vectorizer.fit(texts)
|
||||
logger.info(f"向量化器加载成功,包含 {len(entries)} 个条目")
|
||||
logger.info(f"向量化器加载成功: 共处理 {len(entries)} 个知识条目")
|
||||
else:
|
||||
logger.warning("知识库尚无活跃条目,向量化器将保持空状态")
|
||||
except Exception as e:
|
||||
logger.error(f"加载向量化器失败: {e}")
|
||||
|
||||
|
||||
def learn_from_work_order(self, work_order_id: int) -> bool:
|
||||
"""从工单中学习知识"""
|
||||
try:
|
||||
@@ -47,19 +50,22 @@ class KnowledgeManager:
|
||||
work_order = session.query(WorkOrder).filter(
|
||||
WorkOrder.id == work_order_id
|
||||
).first()
|
||||
|
||||
|
||||
if not work_order or not work_order.resolution:
|
||||
return False
|
||||
|
||||
|
||||
# 提取问题和答案
|
||||
question = work_order.title + " " + work_order.description
|
||||
answer = work_order.resolution
|
||||
|
||||
|
||||
logger.info(f"开始从工单 {work_order_id} 学习知识: 标题长度={len(work_order.title)}, 描述长度={len(work_order.description)}")
|
||||
|
||||
# 检查是否已存在相似条目
|
||||
existing_entry = self._find_similar_entry(question, session)
|
||||
|
||||
|
||||
if existing_entry:
|
||||
# 更新现有条目
|
||||
logger.info(f"检测到相似知识条目 (ID: {existing_entry.id}),执行更新操作")
|
||||
existing_entry.answer = answer
|
||||
existing_entry.usage_count += 1
|
||||
existing_entry.updated_at = datetime.now()
|
||||
@@ -67,6 +73,7 @@ class KnowledgeManager:
|
||||
existing_entry.confidence_score = work_order.satisfaction_score
|
||||
else:
|
||||
# 创建新条目
|
||||
logger.info(f"未发现相似条目,正在为工单 {work_order_id} 创建新知识点")
|
||||
new_entry = KnowledgeEntry(
|
||||
question=question,
|
||||
answer=answer,
|
||||
@@ -75,42 +82,47 @@ class KnowledgeManager:
|
||||
usage_count=1
|
||||
)
|
||||
session.add(new_entry)
|
||||
|
||||
|
||||
session.commit()
|
||||
logger.info(f"从工单 {work_order_id} 学习知识成功")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"从工单学习知识失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _find_similar_entry(self, question: str, session) -> Optional[KnowledgeEntry]:
|
||||
"""查找相似的知识库条目"""
|
||||
try:
|
||||
entries = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.is_active == True
|
||||
).all()
|
||||
|
||||
|
||||
if not entries:
|
||||
return None
|
||||
|
||||
|
||||
# 计算相似度
|
||||
texts = [entry.question for entry in entries]
|
||||
question_vector = self.vectorizer.transform([question])
|
||||
entry_vectors = self.vectorizer.transform(texts)
|
||||
|
||||
|
||||
similarities = cosine_similarity(question_vector, entry_vectors)[0]
|
||||
max_similarity_idx = np.argmax(similarities)
|
||||
|
||||
if similarities[max_similarity_idx] > 0.8: # 相似度阈值
|
||||
max_score = similarities[max_similarity_idx]
|
||||
|
||||
logger.debug(f"相似度检索完成: 最高分值={max_score:.4f}, 目标ID={entries[max_similarity_idx].id if entries else 'N/A'}")
|
||||
|
||||
if max_score > 0.8: # 相似度阈值
|
||||
logger.info(f"匹配成功: 相似度 {max_score:.4f} 超过阈值 0.8")
|
||||
return entries[max_similarity_idx]
|
||||
|
||||
|
||||
logger.debug(f"匹配跳过: 相似度 {max_score:.4f} 未达到阈值 0.8")
|
||||
return None
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查找相似条目失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def search_knowledge(self, query: str, top_k: int = 3, verified_only: bool = True) -> List[Dict[str, Any]]:
|
||||
"""搜索知识库"""
|
||||
try:
|
||||
@@ -119,20 +131,20 @@ class KnowledgeManager:
|
||||
query_filter = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.is_active == True
|
||||
)
|
||||
|
||||
|
||||
# 如果只搜索已验证的知识库
|
||||
if verified_only:
|
||||
query_filter = query_filter.filter(KnowledgeEntry.is_verified == True)
|
||||
|
||||
|
||||
entries = query_filter.all()
|
||||
# 若已验证为空,则回退到全部活跃条目
|
||||
if not entries and verified_only:
|
||||
entries = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True).all()
|
||||
|
||||
|
||||
if not entries:
|
||||
logger.warning("知识库中没有活跃条目")
|
||||
return []
|
||||
|
||||
|
||||
# 如果查询为空,返回所有条目
|
||||
if not query.strip():
|
||||
logger.info("查询为空,返回所有条目")
|
||||
@@ -146,18 +158,18 @@ class KnowledgeManager:
|
||||
"usage_count": entry.usage_count,
|
||||
"is_verified": entry.is_verified
|
||||
} for entry in entries[:top_k]]
|
||||
|
||||
|
||||
# 使用简化的关键词匹配搜索
|
||||
q = query.strip().lower()
|
||||
results = []
|
||||
|
||||
|
||||
for entry in entries:
|
||||
# 组合问题和答案进行搜索
|
||||
search_text = (entry.question + " " + entry.answer).lower()
|
||||
|
||||
|
||||
# 计算匹配分数
|
||||
score = 0.0
|
||||
|
||||
|
||||
# 完全匹配
|
||||
if q in search_text:
|
||||
score = 1.0
|
||||
@@ -165,16 +177,16 @@ class KnowledgeManager:
|
||||
# 分词匹配
|
||||
query_words = q.split()
|
||||
text_words = search_text.split()
|
||||
|
||||
|
||||
# 计算单词匹配度
|
||||
matched_words = 0
|
||||
for word in query_words:
|
||||
if word in text_words:
|
||||
matched_words += 1
|
||||
|
||||
|
||||
if matched_words > 0:
|
||||
score = matched_words / len(query_words) * 0.8
|
||||
|
||||
|
||||
# 如果分数大于0,添加到结果中
|
||||
if score > 0:
|
||||
results.append({
|
||||
@@ -187,18 +199,18 @@ class KnowledgeManager:
|
||||
"usage_count": entry.usage_count,
|
||||
"is_verified": entry.is_verified
|
||||
})
|
||||
|
||||
|
||||
# 按相似度排序并返回top_k个结果
|
||||
results.sort(key=lambda x: x['similarity_score'], reverse=True)
|
||||
results = results[:top_k]
|
||||
|
||||
|
||||
logger.info(f"搜索查询 '{query}' 返回 {len(results)} 个结果")
|
||||
return results
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"搜索知识库失败: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def add_knowledge_entry(
|
||||
self,
|
||||
question: str,
|
||||
@@ -220,17 +232,17 @@ class KnowledgeManager:
|
||||
)
|
||||
session.add(entry)
|
||||
session.commit()
|
||||
|
||||
|
||||
# 重新训练向量化器
|
||||
self._load_vectorizer()
|
||||
|
||||
|
||||
logger.info(f"添加知识库条目成功: {question[:50]}...")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"添加知识库条目失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def update_knowledge_entry(
|
||||
self,
|
||||
entry_id: int,
|
||||
@@ -245,10 +257,10 @@ class KnowledgeManager:
|
||||
entry = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.id == entry_id
|
||||
).first()
|
||||
|
||||
|
||||
if not entry:
|
||||
return False
|
||||
|
||||
|
||||
if question:
|
||||
entry.question = question
|
||||
if answer:
|
||||
@@ -257,34 +269,34 @@ class KnowledgeManager:
|
||||
entry.category = category
|
||||
if confidence_score is not None:
|
||||
entry.confidence_score = confidence_score
|
||||
|
||||
|
||||
entry.updated_at = datetime.now()
|
||||
session.commit()
|
||||
|
||||
|
||||
logger.info(f"更新知识库条目成功: {entry_id}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"更新知识库条目失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_knowledge_entries(self, page: int = 1, per_page: int = 10) -> Dict[str, Any]:
|
||||
"""获取知识库条目(分页)"""
|
||||
try:
|
||||
with db_manager.get_session() as session:
|
||||
# 计算偏移量
|
||||
offset = (page - 1) * per_page
|
||||
|
||||
|
||||
# 获取总数
|
||||
total = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.is_active == True
|
||||
).count()
|
||||
|
||||
|
||||
# 获取分页数据
|
||||
entries = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.is_active == True
|
||||
).order_by(KnowledgeEntry.created_at.desc()).offset(offset).limit(per_page).all()
|
||||
|
||||
|
||||
# 转换为字典格式
|
||||
knowledge_list = []
|
||||
for entry in entries:
|
||||
@@ -298,7 +310,7 @@ class KnowledgeManager:
|
||||
"created_at": entry.created_at.isoformat() if entry.created_at else None,
|
||||
"is_verified": getattr(entry, 'is_verified', False) # 添加验证状态
|
||||
})
|
||||
|
||||
|
||||
return {
|
||||
"knowledge": knowledge_list,
|
||||
"total": total,
|
||||
@@ -309,7 +321,7 @@ class KnowledgeManager:
|
||||
except Exception as e:
|
||||
logger.error(f"获取知识库条目失败: {e}")
|
||||
return {"knowledge": [], "total": 0, "page": 1, "per_page": per_page, "total_pages": 0}
|
||||
|
||||
|
||||
def verify_knowledge_entry(self, entry_id: int, verified_by: str = "admin") -> bool:
|
||||
"""验证知识库条目"""
|
||||
try:
|
||||
@@ -317,22 +329,22 @@ class KnowledgeManager:
|
||||
entry = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.id == entry_id
|
||||
).first()
|
||||
|
||||
|
||||
if not entry:
|
||||
return False
|
||||
|
||||
|
||||
entry.is_verified = True
|
||||
entry.verified_by = verified_by
|
||||
entry.verified_at = datetime.now()
|
||||
|
||||
|
||||
session.commit()
|
||||
logger.info(f"知识库条目验证成功: {entry_id}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"验证知识库条目失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def unverify_knowledge_entry(self, entry_id: int) -> bool:
|
||||
"""取消验证知识库条目"""
|
||||
try:
|
||||
@@ -340,22 +352,22 @@ class KnowledgeManager:
|
||||
entry = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.id == entry_id
|
||||
).first()
|
||||
|
||||
|
||||
if not entry:
|
||||
return False
|
||||
|
||||
|
||||
entry.is_verified = False
|
||||
entry.verified_by = None
|
||||
entry.verified_at = None
|
||||
|
||||
|
||||
session.commit()
|
||||
logger.info(f"知识库条目取消验证成功: {entry_id}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"取消验证知识库条目失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def delete_knowledge_entry(self, entry_id: int) -> bool:
|
||||
"""删除知识库条目(软删除)"""
|
||||
try:
|
||||
@@ -363,28 +375,28 @@ class KnowledgeManager:
|
||||
entry = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.id == entry_id
|
||||
).first()
|
||||
|
||||
|
||||
if not entry:
|
||||
logger.warning(f"知识库条目不存在: {entry_id}")
|
||||
return False
|
||||
|
||||
|
||||
entry.is_active = False
|
||||
session.commit()
|
||||
|
||||
|
||||
# 重新训练向量化器(如果还有活跃条目)
|
||||
try:
|
||||
self._load_vectorizer()
|
||||
except Exception as vectorizer_error:
|
||||
logger.warning(f"重新加载向量化器失败: {vectorizer_error}")
|
||||
# 即使向量化器加载失败,删除操作仍然成功
|
||||
|
||||
|
||||
logger.info(f"删除知识库条目成功: {entry_id}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除知识库条目失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_knowledge_stats(self) -> Dict[str, Any]:
|
||||
"""获取知识库统计信息"""
|
||||
try:
|
||||
@@ -393,7 +405,7 @@ class KnowledgeManager:
|
||||
active_entries = session.query(KnowledgeEntry).filter(
|
||||
KnowledgeEntry.is_active == True
|
||||
).count()
|
||||
|
||||
|
||||
# 按类别统计
|
||||
category_stats = session.query(
|
||||
KnowledgeEntry.category,
|
||||
@@ -401,19 +413,19 @@ class KnowledgeManager:
|
||||
KnowledgeEntry.category == KnowledgeEntry.category
|
||||
).count()
|
||||
).group_by(KnowledgeEntry.category).all()
|
||||
|
||||
|
||||
# 平均置信度
|
||||
avg_confidence = session.query(
|
||||
func.avg(KnowledgeEntry.confidence_score)
|
||||
).scalar() or 0.0
|
||||
|
||||
|
||||
return {
|
||||
"total_entries": total_entries,
|
||||
"active_entries": active_entries,
|
||||
"category_distribution": dict(category_stats),
|
||||
"average_confidence": float(avg_confidence)
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取知识库统计失败: {e}")
|
||||
return {}
|
||||
@@ -428,7 +440,7 @@ class KnowledgeManager:
|
||||
).update({
|
||||
"usage_count": KnowledgeEntry.usage_count + 1,
|
||||
"updated_at": datetime.now()
|
||||
})
|
||||
}, synchronize_session=False)
|
||||
session.commit()
|
||||
|
||||
logger.info(f"成功更新 {len(entry_ids)} 个知识库条目的使用次数")
|
||||
@@ -436,4 +448,59 @@ class KnowledgeManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"更新知识库使用次数失败: {e}")
|
||||
return False
|
||||
return False
|
||||
|
||||
def get_knowledge_paginated(self, page: int = 1, per_page: int = 10, category_filter: str = '', verified_filter: str = '') -> Dict[str, Any]:
|
||||
"""获取知识库条目(分页和过滤)"""
|
||||
try:
|
||||
with db_manager.get_session() as session:
|
||||
query = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True)
|
||||
|
||||
if category_filter:
|
||||
query = query.filter(KnowledgeEntry.category == category_filter)
|
||||
if verified_filter:
|
||||
if verified_filter == 'true':
|
||||
query = query.filter(KnowledgeEntry.is_verified == True)
|
||||
elif verified_filter == 'false':
|
||||
query = query.filter(KnowledgeEntry.is_verified == False)
|
||||
|
||||
query = query.order_by(KnowledgeEntry.created_at.desc())
|
||||
|
||||
total = query.count()
|
||||
|
||||
knowledge_entries = query.offset((page - 1) * per_page).limit(per_page).all()
|
||||
|
||||
knowledge_data = []
|
||||
for entry in knowledge_entries:
|
||||
knowledge_data.append({
|
||||
'id': entry.id,
|
||||
'question': entry.question,
|
||||
'answer': entry.answer,
|
||||
'category': entry.category,
|
||||
'confidence_score': entry.confidence_score,
|
||||
'usage_count': entry.usage_count,
|
||||
'is_verified': entry.is_verified,
|
||||
'is_active': entry.is_active,
|
||||
'created_at': entry.created_at.isoformat() if entry.created_at else None,
|
||||
'updated_at': entry.updated_at.isoformat() if entry.updated_at else None
|
||||
})
|
||||
|
||||
total_pages = (total + per_page - 1) // per_page
|
||||
|
||||
return {
|
||||
'knowledge': knowledge_data,
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
'total': total,
|
||||
'total_pages': total_pages
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取分页知识库失败: {e}")
|
||||
# 返回一个空的结构以避免在调用方出现错误
|
||||
return {
|
||||
'knowledge': [],
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
'total': 0,
|
||||
'total_pages': 0
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user