feat: rag 서비스 Event Hub 연동 및 연관 회의록 API 추가

This commit is contained in:
djeon
2025-10-29 15:29:40 +09:00
parent 5859b1c498
commit ad7975efbd
20 changed files with 2855 additions and 22 deletions
+20 -10
View File
@@ -92,6 +92,8 @@ class RagMinutesDB:
if field in minutes_dict and minutes_dict[field]:
if isinstance(minutes_dict[field], datetime):
minutes_dict[field] = minutes_dict[field].isoformat()
minutes_dict.pop("embedding")
return RagMinutes(**minutes_dict)
@@ -189,7 +191,8 @@ class RagMinutesDB:
self,
query_embedding: List[float],
top_k: int = 5,
similarity_threshold: float = 0.7
similarity_threshold: float = 0.7,
exclude_minutes_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
벡터 유사도 검색
@@ -198,27 +201,34 @@ class RagMinutesDB:
query_embedding: 쿼리 임베딩 벡터
top_k: 반환할 최대 결과 수
similarity_threshold: 최소 유사도 임계값
exclude_minutes_id: 제외할 회의록 ID (연관 회의록 검색 시 자기 자신 제외)
Returns:
검색 결과 리스트
"""
with self.get_connection() as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
# 제외 조건 추가
exclude_condition = ""
params = [query_embedding, query_embedding, similarity_threshold, query_embedding, top_k]
if exclude_minutes_id:
exclude_condition = "AND minutes_id != %s"
# 파라미터 순서: 처음 4개는 embedding 검색용, 5번째는 exclude용, 6번째는 limit용
params = [query_embedding, query_embedding, similarity_threshold, exclude_minutes_id, query_embedding, top_k]
query = f"""
SELECT *,
1 - (embedding <=> %s::vector) as similarity_score
FROM rag_minutes
WHERE embedding IS NOT NULL
AND 1 - (embedding <=> %s::vector) >= %s
{exclude_condition}
ORDER BY embedding <=> %s::vector
LIMIT %s
""", (
query_embedding,
query_embedding,
similarity_threshold,
query_embedding,
top_k
))
"""
cur.execute(query, params)
results = []
for row in cur.fetchall():
@@ -229,7 +239,7 @@ class RagMinutesDB:
"similarity_score": float(similarity_score)
})
logger.info(f"벡터 검색 완료: {len(results)}개 결과")
logger.info(f"벡터 검색 완료: {len(results)}개 결과 (exclude: {exclude_minutes_id})")
return results
def search_by_keyword(