refactor: 清理不需要的代码文件,添加.gitignore,优化项目结构
This commit is contained in:
@@ -125,16 +125,36 @@ class KnowledgeManager:
|
||||
query_filter = query_filter.filter(KnowledgeEntry.is_verified == True)
|
||||
|
||||
entries = query_filter.all()
|
||||
# 若已验证为空,则回退到全部活跃条目
|
||||
if not entries and verified_only:
|
||||
entries = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True).all()
|
||||
|
||||
if not entries:
|
||||
return []
|
||||
|
||||
# 计算相似度
|
||||
texts = [entry.question + " " + entry.answer for entry in entries]
|
||||
query_vector = self.vectorizer.transform([query])
|
||||
entry_vectors = self.vectorizer.transform(texts)
|
||||
|
||||
similarities = cosine_similarity(query_vector, entry_vectors)[0]
|
||||
|
||||
# 确保向量器已训练
|
||||
try:
|
||||
vocab_ok = hasattr(self.vectorizer, 'vocabulary_') and bool(self.vectorizer.vocabulary_)
|
||||
if not vocab_ok:
|
||||
self.vectorizer.fit(texts)
|
||||
query_vector = self.vectorizer.transform([query])
|
||||
entry_vectors = self.vectorizer.transform(texts)
|
||||
similarities = cosine_similarity(query_vector, entry_vectors)[0]
|
||||
except Exception as vec_err:
|
||||
logger.warning(f"TF-IDF搜索失败,回退到子串匹配: {vec_err}")
|
||||
# 回退:子串匹配评分
|
||||
similarities = []
|
||||
q = query.strip()
|
||||
for t in texts:
|
||||
if not q:
|
||||
similarities.append(0.0)
|
||||
else:
|
||||
score = 1.0 if q in t else 0.0
|
||||
similarities.append(score)
|
||||
similarities = np.array(similarities, dtype=float)
|
||||
|
||||
# 获取top_k个最相似的条目
|
||||
top_indices = np.argsort(similarities)[-top_k:][::-1]
|
||||
|
||||
Reference in New Issue
Block a user