feat: init rag service

2026-07-28 12:39:09 +00:00 · 2025-10-29 05:54:08 +09:00
parent 44ae9c546f
commit 5d897cb845
54 changed files with 6425 additions and 0 deletions
@@ -0,0 +1,359 @@
+"""
+Azure AI Search 관련자료 DB
+"""
+from azure.core.credentials import AzureKeyCredential
+from azure.search.documents import SearchClient
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
+    SearchIndex,
+    SimpleField,
+    SearchableField,
+    SearchField,
+    VectorSearch,
+    HnswAlgorithmConfiguration,
+    VectorSearchProfile,
+    SemanticConfiguration,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticSearch,
+    SearchFieldDataType
+)
+from azure.search.documents.models import (
+    VectorizedQuery,
+    QueryType,
+    QueryCaptionType,
+    QueryAnswerType
+)
+from typing import List, Dict, Any, Optional
+import logging
+
+from ..models.document import DocumentChunk
+
+logger = logging.getLogger(__name__)
+
+
+class AzureAISearchDB:
+    """Azure AI Search 관련자료 데이터베이스"""
+
+    def __init__(
+        self,
+        endpoint: str,
+        api_key: str,
+        index_name: str = "meeting-minutes-index",
+        api_version: str = "2023-11-01"
+    ):
+        """
+        초기화
+
+        Args:
+            endpoint: Azure AI Search 엔드포인트
+            api_key: API 키
+            index_name: 인덱스 이름
+            api_version: API 버전
+        """
+        self.endpoint = endpoint
+        self.api_key = api_key
+        self.index_name = index_name
+
+        credential = AzureKeyCredential(api_key)
+        self.search_client = SearchClient(
+            endpoint=endpoint,
+            index_name=index_name,
+            credential=credential
+        )
+        self.index_client = SearchIndexClient(
+            endpoint=endpoint,
+            credential=credential
+        )
+
+    def create_index(self):
+        """
+        인덱스 생성 (스키마 정의)
+        """
+        # 필드 정의
+        fields = [
+            SimpleField(name="id", type=SearchFieldDataType.String, key=True),
+            SimpleField(name="documentId", type=SearchFieldDataType.String, filterable=True),
+            SimpleField(name="documentType", type=SearchFieldDataType.String, filterable=True, facetable=True),
+            SearchableField(name="title", type=SearchFieldDataType.String, analyzer_name="ko.lucene"),
+            SimpleField(name="folder", type=SearchFieldDataType.String, filterable=True, facetable=True),
+            SimpleField(name="createdDate", type=SearchFieldDataType.DateTimeOffset, filterable=True, sortable=True),
+            SearchField(
+                name="participants",
+                type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+                searchable=True,
+                filterable=True,
+                facetable=True
+            ),
+            SearchField(
+                name="keywords",
+                type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+                searchable=True,
+                facetable=True
+            ),
+            SimpleField(name="agendaId", type=SearchFieldDataType.String, filterable=True),
+            SearchableField(name="agendaTitle", type=SearchFieldDataType.String, analyzer_name="ko.lucene"),
+            SimpleField(name="chunkIndex", type=SearchFieldDataType.Int32, filterable=True, sortable=True),
+            SearchableField(name="content", type=SearchFieldDataType.String, analyzer_name="ko.lucene"),
+            SearchField(
+                name="contentVector",
+                type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+                searchable=True,
+                vector_search_dimensions=1536,
+                vector_search_profile_name="meeting-vector-profile"
+            ),
+            SimpleField(name="tokenCount", type=SearchFieldDataType.Int32, filterable=True)
+        ]
+
+        # 벡터 검색 설정
+        vector_search = VectorSearch(
+            profiles=[
+                VectorSearchProfile(
+                    name="meeting-vector-profile",
+                    algorithm_configuration_name="meeting-hnsw"
+                )
+            ],
+            algorithms=[
+                HnswAlgorithmConfiguration(
+                    name="meeting-hnsw",
+                    parameters={
+                        "m": 4,
+                        "efConstruction": 400,
+                        "efSearch": 500,
+                        "metric": "cosine"
+                    }
+                )
+            ]
+        )
+
+        # Semantic Search 설정
+        semantic_config = SemanticConfiguration(
+            name="meeting-semantic-config",
+            prioritized_fields=SemanticPrioritizedFields(
+                title_field=SemanticField(field_name="title"),
+                content_fields=[SemanticField(field_name="content")],
+                keywords_fields=[SemanticField(field_name="keywords")]
+            )
+        )
+
+        semantic_search = SemanticSearch(
+            configurations=[semantic_config]
+        )
+
+        # 인덱스 생성
+        index = SearchIndex(
+            name=self.index_name,
+            fields=fields,
+            vector_search=vector_search,
+            semantic_search=semantic_search
+        )
+
+        try:
+            self.index_client.create_or_update_index(index)
+            logger.info(f"Azure AI Search 인덱스 생성 완료: {self.index_name}")
+        except Exception as e:
+            logger.error(f"인덱스 생성 실패: {str(e)}")
+            raise
+
+    def upload_documents(self, chunks: List[DocumentChunk]) -> bool:
+        """
+        문서 업로드 (배치)
+
+        Args:
+            chunks: 문서 청크 리스트
+
+        Returns:
+            성공 여부
+        """
+        if not chunks:
+            return True
+
+        try:
+            # Pydantic 모델을 딕셔너리로 변환
+            documents = [chunk.dict() for chunk in chunks]
+
+            # 배치 업로드 (최대 1000개씩)
+            batch_size = 1000
+            for i in range(0, len(documents), batch_size):
+                batch = documents[i:i + batch_size]
+                result = self.search_client.upload_documents(documents=batch)
+
+                logger.info(f"배치 {i//batch_size + 1}: {len(batch)}개 문서 업로드 완료")
+
+            return True
+
+        except Exception as e:
+            logger.error(f"문서 업로드 실패: {str(e)}")
+            return False
+
+    def hybrid_search(
+        self,
+        query: str,
+        query_embedding: List[float],
+        top_k: int = 3,
+        folder: Optional[str] = None,
+        document_type: Optional[str] = None,
+        semantic_ranking: bool = True
+    ) -> List[Dict[str, Any]]:
+        """
+        Hybrid Search (Keyword + Vector + Semantic Ranking)
+
+        Args:
+            query: 검색 쿼리
+            query_embedding: 쿼리 임베딩 벡터
+            top_k: 반환할 최대 결과 수
+            folder: 폴더 필터
+            document_type: 문서 타입 필터
+            semantic_ranking: Semantic Ranking 사용 여부
+
+        Returns:
+            검색 결과 리스트
+        """
+        try:
+            # Vector Query
+            vector_query = VectorizedQuery(
+                vector=query_embedding,
+                k_nearest_neighbors=50,
+                fields="contentVector"
+            )
+
+            # 필터 생성
+            filter_parts = []
+            if folder:
+                filter_parts.append(f"folder eq '{folder}'")
+            if document_type:
+                filter_parts.append(f"documentType eq '{document_type}'")
+
+            filter_expression = " and ".join(filter_parts) if filter_parts else None
+
+            # 검색 옵션 설정
+            search_params = {
+                "search_text": query,
+                "vector_queries": [vector_query],
+                "select": ["documentId", "title", "createdDate", "content", "agendaTitle", "folder"],
+                "top": 50 if semantic_ranking else top_k,
+                "filter": filter_expression
+            }
+
+            # Semantic Ranking 활성화
+            if semantic_ranking:
+                search_params.update({
+                    "query_type": QueryType.SEMANTIC,
+                    "semantic_configuration_name": "meeting-semantic-config",
+                    "query_caption": QueryCaptionType.EXTRACTIVE,
+                    "query_answer": QueryAnswerType.EXTRACTIVE
+                })
+
+            # 검색 실행
+            results = self.search_client.search(**search_params)
+
+            # 결과 처리
+            search_results = []
+            for i, result in enumerate(results):
+                if i >= top_k:
+                    break
+
+                # Reranking Score (Semantic Ranking 또는 BM25 Score)
+                score = result.get("@search.reranker_score", result.get("@search.score", 0.0))
+
+                # 관련도 레벨 결정
+                if score >= 3.0:  # Semantic Ranking 점수 기준
+                    relevance_level = "HIGH"
+                elif score >= 2.0:
+                    relevance_level = "MEDIUM"
+                else:
+                    relevance_level = "LOW"
+
+                # Caption 추출 (Semantic Captions)
+                captions = result.get("@search.captions", [])
+                excerpt = captions[0].text if captions else result["content"][:300]
+
+                search_results.append({
+                    "document_id": result["documentId"],
+                    "title": result["title"],
+                    "document_type": result.get("documentType", "unknown"),
+                    "created_date": result.get("createdDate"),
+                    "relevance_score": min(score / 4.0, 1.0),  # 0~1 정규화
+                    "relevance_level": relevance_level,
+                    "content_excerpt": excerpt,
+                    "folder": result.get("folder")
+                })
+
+            return search_results
+
+        except Exception as e:
+            logger.error(f"Hybrid Search 실패: {str(e)}")
+            return []
+
+    def delete_documents_by_id(self, document_id: str) -> bool:
+        """
+        문서 ID로 모든 청크 삭제
+
+        Args:
+            document_id: 문서 ID
+
+        Returns:
+            성공 여부
+        """
+        try:
+            # 해당 문서의 모든 청크 조회
+            results = self.search_client.search(
+                search_text="*",
+                filter=f"documentId eq '{document_id}'",
+                select=["id"]
+            )
+
+            # 청크 ID 수집
+            chunk_ids = [{"id": result["id"]} for result in results]
+
+            if chunk_ids:
+                # 배치 삭제
+                self.search_client.delete_documents(documents=chunk_ids)
+                logger.info(f"문서 {document_id}의 {len(chunk_ids)}개 청크 삭제 완료")
+
+            return True
+
+        except Exception as e:
+            logger.error(f"문서 삭제 실패 ({document_id}): {str(e)}")
+            return False
+
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        인덱스 통계 조회
+
+        Returns:
+            통계 정보
+        """
+        try:
+            # 전체 문서 수 (중복 제거)
+            results = self.search_client.search(
+                search_text="*",
+                select=["documentId", "documentType"],
+                top=10000
+            )
+
+            document_ids = set()
+            type_counts = {}
+
+            for result in results:
+                doc_id = result.get("documentId")
+                doc_type = result.get("documentType", "unknown")
+
+                if doc_id:
+                    document_ids.add(doc_id)
+
+                type_counts[doc_type] = type_counts.get(doc_type, 0) + 1
+
+            return {
+                "total_documents": len(document_ids),
+                "total_chunks": sum(type_counts.values()),
+                "by_type": type_counts
+            }
+
+        except Exception as e:
+            logger.error(f"통계 조회 실패: {str(e)}")
+            return {
+                "total_documents": 0,
+                "total_chunks": 0,
+                "by_type": {}
+            }
@@ -0,0 +1,381 @@
+"""
+PostgreSQL + pgvector 용어집 DB
+"""
+import psycopg2
+from psycopg2.extras import RealDictCursor
+from typing import List, Optional, Dict, Any
+from contextlib import contextmanager
+import logging
+import json
+
+from ..models.term import Term
+from ..utils.embedding import cosine_similarity
+
+logger = logging.getLogger(__name__)
+
+
+class PostgresVectorDB:
+    """PostgreSQL + pgvector 용어집 데이터베이스"""
+
+    def __init__(self, connection_string: str):
+        """
+        초기화
+
+        Args:
+            connection_string: PostgreSQL 연결 문자열
+        """
+        self.connection_string = connection_string
+
+    @contextmanager
+    def get_connection(self):
+        """데이터베이스 연결 컨텍스트 매니저"""
+        conn = psycopg2.connect(self.connection_string)
+        try:
+            yield conn
+        finally:
+            conn.close()
+
+    @staticmethod
+    def _parse_embedding(embedding_str: Optional[str]) -> Optional[List[float]]:
+        """
+        PostgreSQL vector 타입을 Python 리스트로 변환
+
+        Args:
+            embedding_str: PostgreSQL에서 반환된 vector 문자열 (예: "[-0.003,0.01,...]")
+
+        Returns:
+            float 리스트 또는 None
+        """
+        if not embedding_str:
+            return None
+
+        try:
+            # vector 타입은 "[1,2,3]" 형태의 문자열로 반환됨
+            if isinstance(embedding_str, str):
+                return json.loads(embedding_str)
+            elif isinstance(embedding_str, list):
+                return embedding_str
+            return None
+        except (json.JSONDecodeError, ValueError) as e:
+            logger.error(f"임베딩 파싱 실패: {str(e)}")
+            return None
+
+    @staticmethod
+    def _row_to_term(row: Dict[str, Any]) -> Term:
+        """
+        데이터베이스 row를 Term 객체로 변환
+
+        Args:
+            row: 데이터베이스 row (dict)
+
+        Returns:
+            Term 객체
+        """
+        term_dict = dict(row)
+
+        # embedding 필드 파싱
+        if "embedding" in term_dict:
+            term_dict["embedding"] = PostgresVectorDB._parse_embedding(term_dict["embedding"])
+
+        term_dict.pop('embedding')
+        return Term(**term_dict)
+
+    def init_database(self):
+        """
+        데이터베이스 초기화 (테이블 및 인덱스 생성)
+        """
+        with self.get_connection() as conn:
+            with conn.cursor() as cur:
+                # pgvector 확장 설치
+                cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+                # terms 테이블 생성
+                cur.execute("""
+                    CREATE TABLE IF NOT EXISTS terms (
+                        term_id VARCHAR(100) PRIMARY KEY,
+                        term_name VARCHAR(200) NOT NULL,
+                        normalized_name VARCHAR(200) NOT NULL,
+                        category VARCHAR(100),
+                        definition TEXT NOT NULL,
+                        context TEXT,
+                        synonyms JSONB DEFAULT '[]',
+                        related_terms JSONB DEFAULT '[]',
+                        document_source JSONB,
+                        confidence_score DECIMAL(3,2) DEFAULT 0.0,
+                        usage_count INTEGER DEFAULT 0,
+                        last_updated VARCHAR(50),
+                        embedding vector(1536),
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                    )
+                """)
+
+                # 인덱스 생성
+                cur.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_terms_normalized_name
+                    ON terms(normalized_name)
+                """)
+
+                cur.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_terms_category
+                    ON terms(category)
+                """)
+
+                cur.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_terms_confidence
+                    ON terms(confidence_score DESC)
+                """)
+
+                # 벡터 유사도 검색용 인덱스 (IVFFlat)
+                cur.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_terms_embedding
+                    ON terms USING ivfflat (embedding vector_cosine_ops)
+                    WITH (lists = 100)
+                """)
+
+                # term_usage_logs 테이블 (사용 이력)
+                cur.execute("""
+                    CREATE TABLE IF NOT EXISTS term_usage_logs (
+                        log_id SERIAL PRIMARY KEY,
+                        term_id VARCHAR(100) REFERENCES terms(term_id) ON DELETE CASCADE,
+                        user_id VARCHAR(100),
+                        meeting_id VARCHAR(100),
+                        action VARCHAR(20),
+                        feedback_rating INTEGER,
+                        feedback_comment TEXT,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                    )
+                """)
+
+                cur.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_usage_term_id
+                    ON term_usage_logs(term_id, created_at DESC)
+                """)
+
+                conn.commit()
+                logger.info("PostgreSQL 데이터베이스 초기화 완료")
+
+    def insert_term(self, term: Term) -> bool:
+        """
+        용어 삽입
+
+        Args:
+            term: 용어 객체
+
+        Returns:
+            성공 여부
+        """
+        try:
+            with self.get_connection() as conn:
+                with conn.cursor() as cur:
+                    cur.execute("""
+                        INSERT INTO terms (
+                            term_id, term_name, normalized_name, category,
+                            definition, context, synonyms, related_terms,
+                            document_source, confidence_score, usage_count,
+                            last_updated, embedding
+                        ) VALUES (
+                            %s, %s, %s, %s, %s, %s, %s::jsonb, %s::jsonb,
+                            %s::jsonb, %s, %s, %s, %s::vector
+                        )
+                        ON CONFLICT (term_id) DO UPDATE SET
+                            term_name = EXCLUDED.term_name,
+                            normalized_name = EXCLUDED.normalized_name,
+                            category = EXCLUDED.category,
+                            definition = EXCLUDED.definition,
+                            context = EXCLUDED.context,
+                            synonyms = EXCLUDED.synonyms,
+                            related_terms = EXCLUDED.related_terms,
+                            document_source = EXCLUDED.document_source,
+                            confidence_score = EXCLUDED.confidence_score,
+                            usage_count = EXCLUDED.usage_count,
+                            last_updated = EXCLUDED.last_updated,
+                            embedding = EXCLUDED.embedding,
+                            updated_at = CURRENT_TIMESTAMP
+                    """, (
+                        term.term_id,
+                        term.term_name,
+                        term.normalized_name,
+                        term.category,
+                        term.definition,
+                        term.context,
+                        psycopg2.extras.Json(term.synonyms),
+                        psycopg2.extras.Json(term.related_terms),
+                        psycopg2.extras.Json(term.document_source.dict() if term.document_source else None),
+                        term.confidence_score,
+                        term.usage_count,
+                        term.last_updated,
+                        term.embedding
+                    ))
+
+                conn.commit()
+                return True
+
+        except Exception as e:
+            logger.error(f"용어 삽입 실패 ({term.term_id}): {str(e)}")
+            return False
+
+    def get_term_by_id(self, term_id: str) -> Optional[Term]:
+        """
+        ID로 용어 조회
+
+        Args:
+            term_id: 용어 ID
+
+        Returns:
+            용어 객체 또는 None
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""
+                    SELECT * FROM terms WHERE term_id = %s
+                """, (term_id,))
+
+                row = cur.fetchone()
+                if row:
+                    return self._row_to_term(row)
+                return None
+
+    def search_by_keyword(
+        self,
+        query: str,
+        top_k: int = 5,
+        confidence_threshold: float = 0.7
+    ) -> List[Dict[str, Any]]:
+        """
+        키워드 검색
+
+        Args:
+            query: 검색 쿼리
+            top_k: 반환할 최대 결과 수
+            confidence_threshold: 최소 신뢰도 임계값
+
+        Returns:
+            검색 결과 리스트
+        """
+        normalized_query = query.lower().strip()
+
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""
+                    SELECT *,
+                           CASE
+                               WHEN normalized_name = %s THEN 1.0
+                               WHEN normalized_name LIKE %s THEN 0.9
+                               WHEN term_name ILIKE %s THEN 0.8
+                               WHEN synonyms::text ILIKE %s THEN 0.7
+                               ELSE 0.5
+                           END as match_score
+                    FROM terms
+                    WHERE (
+                        normalized_name LIKE %s
+                        OR term_name ILIKE %s
+                        OR synonyms::text ILIKE %s
+                        OR definition ILIKE %s
+                    )
+                    AND confidence_score >= %s
+                    ORDER BY match_score DESC, confidence_score DESC, usage_count DESC
+                    LIMIT %s
+                """, (
+                    normalized_query,
+                    f"%{normalized_query}%",
+                    f"%{query}%",
+                    f"%{query}%",
+                    f"%{normalized_query}%",
+                    f"%{query}%",
+                    f"%{query}%",
+                    f"%{query}%",
+                    confidence_threshold,
+                    top_k
+                ))
+
+                results = []
+                for row in cur.fetchall():
+                    term_dict = dict(row)
+                    match_score = term_dict.pop("match_score")
+                    results.append({
+                        "term": self._row_to_term(term_dict),
+                        "relevance_score": float(match_score),
+                        "match_type": "keyword"
+                    })
+
+                return results
+
+    def search_by_vector(
+        self,
+        query_embedding: List[float],
+        top_k: int = 5,
+        confidence_threshold: float = 0.7
+    ) -> List[Dict[str, Any]]:
+        """
+        벡터 유사도 검색
+
+        Args:
+            query_embedding: 쿼리 임베딩 벡터
+            top_k: 반환할 최대 결과 수
+            confidence_threshold: 최소 신뢰도 임계값
+
+        Returns:
+            검색 결과 리스트
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""
+                    SELECT *,
+                           1 - (embedding <=> %s::vector) as similarity_score
+                    FROM terms
+                    WHERE confidence_score >= %s
+                      AND embedding IS NOT NULL
+                    ORDER BY embedding <=> %s::vector
+                    LIMIT %s
+                """, (
+                    query_embedding,
+                    confidence_threshold,
+                    query_embedding,
+                    top_k
+                ))
+
+                results = []
+                for row in cur.fetchall():
+                    term_dict = dict(row)
+                    similarity_score = term_dict.pop("similarity_score")
+                    results.append({
+                        "term": self._row_to_term(term_dict),
+                        "relevance_score": float(similarity_score),
+                        "match_type": "vector"
+                    })
+
+                return results
+
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        용어 통계 조회
+
+        Returns:
+            통계 정보
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                # 전체 통계
+                cur.execute("""
+                    SELECT
+                        COUNT(*) as total_terms,
+                        AVG(confidence_score) as avg_confidence
+                    FROM terms
+                """)
+                overall = cur.fetchone()
+
+                # 카테고리별 통계
+                cur.execute("""
+                    SELECT category, COUNT(*) as count
+                    FROM terms
+                    GROUP BY category
+                    ORDER BY count DESC
+                """)
+                by_category = {row["category"]: row["count"] for row in cur.fetchall()}
+
+                return {
+                    "total_terms": overall["total_terms"],
+                    "avg_confidence": float(overall["avg_confidence"]) if overall["avg_confidence"] else 0.0,
+                    "by_category": by_category
+                }
@@ -0,0 +1,338 @@
+"""
+RAG 회의록 데이터베이스
+"""
+import psycopg2
+from psycopg2.extras import RealDictCursor
+from typing import List, Optional, Dict, Any
+from contextlib import contextmanager
+import logging
+import json
+from datetime import datetime
+
+from ..models.minutes import RagMinutes, MinutesSection
+from ..utils.embedding import cosine_similarity
+
+logger = logging.getLogger(__name__)
+
+
+class RagMinutesDB:
+    """RAG 회의록 PostgreSQL + pgvector 데이터베이스"""
+
+    def __init__(self, connection_string: str):
+        """
+        초기화
+
+        Args:
+            connection_string: PostgreSQL 연결 문자열
+        """
+        self.connection_string = connection_string
+
+    @contextmanager
+    def get_connection(self):
+        """데이터베이스 연결 컨텍스트 매니저"""
+        conn = psycopg2.connect(self.connection_string)
+        try:
+            yield conn
+        finally:
+            conn.close()
+
+    @staticmethod
+    def _parse_embedding(embedding_str: Optional[str]) -> Optional[List[float]]:
+        """
+        PostgreSQL vector 타입을 Python 리스트로 변환
+
+        Args:
+            embedding_str: PostgreSQL에서 반환된 vector 문자열 (예: "[-0.003,0.01,...]")
+
+        Returns:
+            float 리스트 또는 None
+        """
+        if not embedding_str:
+            return None
+
+        try:
+            # vector 타입은 "[1,2,3]" 형태의 문자열로 반환됨
+            if isinstance(embedding_str, str):
+                return json.loads(embedding_str)
+            elif isinstance(embedding_str, list):
+                return embedding_str
+            return None
+        except (json.JSONDecodeError, ValueError) as e:
+            logger.error(f"임베딩 파싱 실패: {str(e)}")
+            return None
+
+    @staticmethod
+    def _row_to_minutes(row: Dict[str, Any]) -> RagMinutes:
+        """
+        데이터베이스 row를 RagMinutes 객체로 변환
+
+        Args:
+            row: 데이터베이스 row (dict)
+
+        Returns:
+            RagMinutes 객체
+        """
+        minutes_dict = dict(row)
+
+        # embedding 필드 파싱
+        if "embedding" in minutes_dict:
+            minutes_dict["embedding"] = RagMinutesDB._parse_embedding(minutes_dict["embedding"])
+
+        # sections 필드 파싱
+        if "sections" in minutes_dict and minutes_dict["sections"]:
+            sections_data = minutes_dict["sections"]
+            if isinstance(sections_data, str):
+                sections_data = json.loads(sections_data)
+            minutes_dict["sections"] = [MinutesSection(**section) for section in sections_data]
+        else:
+            minutes_dict["sections"] = []
+
+        # datetime 필드를 문자열로 변환
+        for field in ['scheduled_at', 'finalized_at', 'created_at', 'updated_at']:
+            if field in minutes_dict and minutes_dict[field]:
+                if isinstance(minutes_dict[field], datetime):
+                    minutes_dict[field] = minutes_dict[field].isoformat()
+
+        return RagMinutes(**minutes_dict)
+
+    def insert_minutes(self, minutes: RagMinutes) -> bool:
+        """
+        회의록 삽입 또는 업데이트
+
+        Args:
+            minutes: 회의록 객체
+
+        Returns:
+            성공 여부
+        """
+        try:
+            with self.get_connection() as conn:
+                with conn.cursor() as cur:
+                    # sections를 JSON으로 변환
+                    sections_json = [section.dict() for section in minutes.sections]
+
+                    cur.execute("""
+                        INSERT INTO rag_minutes (
+                            meeting_id, title, purpose, description, scheduled_at,
+                            location, organizer_id, minutes_id, minutes_status,
+                            minutes_version, created_by, finalized_by, finalized_at,
+                            sections, full_content, embedding
+                        ) VALUES (
+                            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
+                            %s::jsonb, %s, %s::vector
+                        )
+                        ON CONFLICT (minutes_id) DO UPDATE SET
+                            meeting_id = EXCLUDED.meeting_id,
+                            title = EXCLUDED.title,
+                            purpose = EXCLUDED.purpose,
+                            description = EXCLUDED.description,
+                            scheduled_at = EXCLUDED.scheduled_at,
+                            location = EXCLUDED.location,
+                            organizer_id = EXCLUDED.organizer_id,
+                            minutes_status = EXCLUDED.minutes_status,
+                            minutes_version = EXCLUDED.minutes_version,
+                            finalized_by = EXCLUDED.finalized_by,
+                            finalized_at = EXCLUDED.finalized_at,
+                            sections = EXCLUDED.sections,
+                            full_content = EXCLUDED.full_content,
+                            embedding = EXCLUDED.embedding,
+                            updated_at = CURRENT_TIMESTAMP
+                    """, (
+                        minutes.meeting_id,
+                        minutes.title,
+                        minutes.purpose,
+                        minutes.description,
+                        minutes.scheduled_at,
+                        minutes.location,
+                        minutes.organizer_id,
+                        minutes.minutes_id,
+                        minutes.minutes_status,
+                        minutes.minutes_version,
+                        minutes.created_by,
+                        minutes.finalized_by,
+                        minutes.finalized_at,
+                        psycopg2.extras.Json(sections_json),
+                        minutes.full_content,
+                        minutes.embedding
+                    ))
+
+                conn.commit()
+                logger.info(f"회의록 저장 성공: {minutes.minutes_id}")
+                return True
+
+        except Exception as e:
+            logger.error(f"회의록 저장 실패 ({minutes.minutes_id}): {str(e)}")
+            return False
+
+    def get_minutes_by_id(self, minutes_id: str) -> Optional[RagMinutes]:
+        """
+        ID로 회의록 조회
+
+        Args:
+            minutes_id: 회의록 ID
+
+        Returns:
+            회의록 객체 또는 None
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""
+                    SELECT * FROM rag_minutes WHERE minutes_id = %s
+                """, (minutes_id,))
+
+                row = cur.fetchone()
+                if row:
+                    return self._row_to_minutes(row)
+                return None
+
+    def search_by_vector(
+        self,
+        query_embedding: List[float],
+        top_k: int = 5,
+        similarity_threshold: float = 0.7
+    ) -> List[Dict[str, Any]]:
+        """
+        벡터 유사도 검색
+
+        Args:
+            query_embedding: 쿼리 임베딩 벡터
+            top_k: 반환할 최대 결과 수
+            similarity_threshold: 최소 유사도 임계값
+
+        Returns:
+            검색 결과 리스트
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""
+                    SELECT *,
+                           1 - (embedding <=> %s::vector) as similarity_score
+                    FROM rag_minutes
+                    WHERE embedding IS NOT NULL
+                      AND 1 - (embedding <=> %s::vector) >= %s
+                    ORDER BY embedding <=> %s::vector
+                    LIMIT %s
+                """, (
+                    query_embedding,
+                    query_embedding,
+                    similarity_threshold,
+                    query_embedding,
+                    top_k
+                ))
+
+                results = []
+                for row in cur.fetchall():
+                    minutes_dict = dict(row)
+                    similarity_score = minutes_dict.pop("similarity_score")
+                    results.append({
+                        "minutes": self._row_to_minutes(minutes_dict),
+                        "similarity_score": float(similarity_score)
+                    })
+
+                logger.info(f"벡터 검색 완료: {len(results)}개 결과")
+                return results
+
+    def search_by_keyword(
+        self,
+        query: str,
+        top_k: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        키워드 검색
+
+        Args:
+            query: 검색 쿼리
+            top_k: 반환할 최대 결과 수
+
+        Returns:
+            검색 결과 리스트
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""
+                    SELECT *,
+                           ts_rank(to_tsvector('simple', full_content), plainto_tsquery('simple', %s)) as rank_score
+                    FROM rag_minutes
+                    WHERE to_tsvector('simple', full_content) @@ plainto_tsquery('simple', %s)
+                       OR title ILIKE %s
+                    ORDER BY rank_score DESC, finalized_at DESC
+                    LIMIT %s
+                """, (
+                    query,
+                    query,
+                    f"%{query}%",
+                    top_k
+                ))
+
+                results = []
+                for row in cur.fetchall():
+                    minutes_dict = dict(row)
+                    rank_score = minutes_dict.pop("rank_score", 0.0)
+                    results.append({
+                        "minutes": self._row_to_minutes(minutes_dict),
+                        "similarity_score": float(rank_score) if rank_score else 0.0
+                    })
+
+                logger.info(f"키워드 검색 완료: {len(results)}개 결과")
+                return results
+
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        통계 조회
+
+        Returns:
+            통계 정보
+        """
+        with self.get_connection() as conn:
+            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                # 전체 통계
+                cur.execute("""
+                    SELECT
+                        COUNT(*) as total_minutes,
+                        COUNT(DISTINCT meeting_id) as total_meetings,
+                        COUNT(DISTINCT created_by) as total_authors
+                    FROM rag_minutes
+                """)
+                overall = cur.fetchone()
+
+                # 최근 회의록
+                cur.execute("""
+                    SELECT finalized_at
+                    FROM rag_minutes
+                    WHERE finalized_at IS NOT NULL
+                    ORDER BY finalized_at DESC
+                    LIMIT 1
+                """)
+                latest = cur.fetchone()
+
+                return {
+                    "total_minutes": overall["total_minutes"],
+                    "total_meetings": overall["total_meetings"],
+                    "total_authors": overall["total_authors"],
+                    "latest_finalized_at": latest["finalized_at"].isoformat() if latest and latest["finalized_at"] else None
+                }
+
+    def delete_minutes(self, minutes_id: str) -> bool:
+        """
+        회의록 삭제
+
+        Args:
+            minutes_id: 회의록 ID
+
+        Returns:
+            성공 여부
+        """
+        try:
+            with self.get_connection() as conn:
+                with conn.cursor() as cur:
+                    cur.execute("""
+                        DELETE FROM rag_minutes WHERE minutes_id = %s
+                    """, (minutes_id,))
+
+                conn.commit()
+                logger.info(f"회의록 삭제 성공: {minutes_id}")
+                return True
+
+        except Exception as e:
+            logger.error(f"회의록 삭제 실패 ({minutes_id}): {str(e)}")
+            return False