ai-review/vector/app/utils/category_utils.py

# app/utils/category_utils.py
import re
from typing import Optional

def extract_food_category(category_name: str) -> str:
    """
    카테고리명에서 음식 종류를 추출합니다.
    '음식점 > 한식 > 육류,고기'에서 '한식'을 추출

    Args:
        category_name: 전체 카테고리명

    Returns:
        추출된 음식 종류
    """
    if not category_name:
        return ""

    # '>' 기준으로 분할하고 마지막 바로 전 요소 반환
    parts = category_name.split('>')
    if len(parts) >= 2:
        food_category = parts[-2].strip()  # 마지막 바로 전 값 사용
        return food_category
    elif len(parts) == 1:
        return parts[0].strip()  # 하나밖에 없으면 그것을 반환

    return category_name.strip()

def normalize_category(category: str) -> str:
    """
    카테고리를 정규화합니다.

    Args:
        category: 원본 카테고리

    Returns:
        정규화된 카테고리
    """
    if not category:
        return ""

    # 공백 제거 및 소문자 변환
    normalized = category.strip().lower()

    # 특수문자 제거 (콤마, 슬래시 등은 유지)
    normalized = re.sub(r'[^\w가-힣,/\s]', '', normalized)

    return normalized

def is_similar_category(category1: str, category2: str) -> bool:
    """
    두 카테고리가 유사한지 판단합니다.

    Args:
        category1: 첫 번째 카테고리
        category2: 두 번째 카테고리

    Returns:
        유사 여부
    """
    if not category1 or not category2:
        return False

    # 정규화
    norm1 = normalize_category(category1)
    norm2 = normalize_category(category2)

    # 완전 일치
    if norm1 == norm2:
        return True

    # 키워드 기반 유사성 검사
    keywords1 = set(norm1.replace(',', ' ').replace('/', ' ').split())
    keywords2 = set(norm2.replace(',', ' ').replace('/', ' ').split())

    # 교집합이 하나 이상 있으면 유사한 것으로 판단
    common_keywords = keywords1.intersection(keywords2)
    return len(common_keywords) > 0

def extract_main_category(category_name: str) -> str:
    """
    메인 카테고리를 추출합니다. (음식점 > 한식 에서 '한식' 추출)

    Args:
        category_name: 전체 카테고리명

    Returns:
        메인 카테고리
    """
    if not category_name:
        return ""

    parts = category_name.split('>')
    if len(parts) >= 2:
        return parts[1].strip()
    elif len(parts) == 1:
        return parts[0].strip()

    return ""

def clean_food_category_for_search(food_category: str) -> str:
    """
    음식 카테고리를 검색용 키워드로 정리합니다.

    Args:
        food_category: 원본 음식 카테고리

    Returns:
        정리된 검색 키워드
    """
    if not food_category:
        return "음식점"

    # 콤마와 슬래시를 공백으로 변경
    cleaned = food_category.replace(',', ' ').replace('/', ' ')

    # 불필요한 단어 제거
    stop_words = ['음식점', '요리', '전문점', '맛집']
    keywords = []

    for keyword in cleaned.split():
        keyword = keyword.strip()
        if keyword and keyword not in stop_words:
            keywords.append(keyword)

    # 키워드가 없으면 기본 검색어 사용
    if not keywords:
        return "음식점"

    return ' '.join(keywords)