hgzero/tools/optimize_interview.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
고객경험 인터뷰 결과 최적안 도출 스크립트
유사한 내용을 통합하여 핵심 인사이트 추출
"""

import re
from collections import defaultdict

def read_interview_file(filepath):
    """인터뷰 파일 읽기"""
    with open(filepath, 'r', encoding='utf-8') as f:
        return f.read()

def extract_interviews(content):
    """인터뷰 내용을 단계별로 추출"""
    stages = [
        "문제 인식",
        "솔루션 탐색",
        "도입 및 준비",
        "회의 참여",
        "회의록 작성",
        "검토 및 보완",
        "공유",
        "활용 및 추적",
        "성과 평가 및 개선"
    ]

    stage_data = defaultdict(lambda: {
        'actions': [],
        'positive_feelings': [],
        'negative_feelings': [],
        'thoughts': []
    })

    for stage in stages:
        pattern = rf"### \d+단계: {re.escape(stage)}(.*?)(?=### \d+단계:|## 인터뷰|\Z)"
        matches = re.findall(pattern, content, re.DOTALL)

        for match in matches:
            # 행동 추출
            action_pattern = r"\*\*행동:\*\*(.*?)(?=\*\*생각:|\*\*긍정적 느낌:|\Z)"
            action_match = re.search(action_pattern, match, re.DOTALL)
            if action_match:
                action = action_match.group(1).strip()
                if action:
                    stage_data[stage]['actions'].append(action)

            # 생각 추출
            thought_pattern = r"\*\*생각:\*\*(.*?)(?=\*\*긍정적 느낌:|\*\*부정적 느낌:|\Z)"
            thought_match = re.search(thought_pattern, match, re.DOTALL)
            if thought_match:
                thought = thought_match.group(1).strip().strip('"').strip('"').strip('"')
                if thought:
                    stage_data[stage]['thoughts'].append(thought)

            # 긍정적 느낌 추출
            pos_pattern = r"\*\*긍정적 느낌:\*\*(.*?)(?=\*\*부정적 느낌:|\-\-\-|\Z)"
            pos_match = re.search(pos_pattern, match, re.DOTALL)
            if pos_match:
                positive = pos_match.group(1).strip()
                if positive:
                    stage_data[stage]['positive_feelings'].append(positive)

            # 부정적 느낌 추출
            neg_pattern = r"\*\*부정적 느낌:\*\*(.*?)(?=\-\-\-|\Z)"
            neg_match = re.search(neg_pattern, match, re.DOTALL)
            if neg_match:
                negative = neg_match.group(1).strip()
                if negative:
                    stage_data[stage]['negative_feelings'].append(negative)

    return stages, stage_data

def group_similar_items(items):
    """유사한 항목을 키워드 기반으로 그룹화"""
    if not items:
        return []

    # 키워드 기반 그룹화
    groups = defaultdict(list)

    for item in items:
        cleaned = ' '.join(item.split())

        # 키워드 추출 (간단한 방식)
        keywords = []

        # 회의록 작성 관련 키워드
        if any(kw in cleaned for kw in ['회의록', '기록', '작성', '누락', '피드백']):
            keywords.append('회의록작성')
        if any(kw in cleaned for kw in ['지식', '용어', '이해', '모르', '전문']):
            keywords.append('지식부족')
        if any(kw in cleaned for kw in ['시간', '오래', '효율', '빠르']):
            keywords.append('시간소요')
        if any(kw in cleaned for kw in ['준비', '사전', '검토', '정리']):
            keywords.append('사전준비')
        if any(kw in cleaned for kw in ['검토', '확인', '검증', '수정']):
            keywords.append('검토보완')
        if any(kw in cleaned for kw in ['공유', '전달', '배포']):
            keywords.append('정보공유')
        if any(kw in cleaned for kw in ['활용', '참고', '추적']):
            keywords.append('활용추적')
        if any(kw in cleaned for kw in ['개선', '향상', '성장', '발전']):
            keywords.append('개선성장')
        if any(kw in cleaned for kw in ['불안', '걱정', '두려움', '스트레스']):
            keywords.append('불안감')
        if any(kw in cleaned for kw in ['자신감', '안도', '희망', '기대']):
            keywords.append('긍정감')
        if any(kw in cleaned for kw in ['템플릿', '도구', '자동화', 'AI']):
            keywords.append('도구활용')
        if any(kw in cleaned for kw in ['정확', '오류', '실수', '틀리']):
            keywords.append('정확성')
        if any(kw in cleaned for kw in ['협업', '동료', '팀', '도움']):
            keywords.append('협업')

        # 키워드가 없으면 기타로 분류
        if not keywords:
            keywords.append('기타')

        for keyword in keywords:
            groups[keyword].append(cleaned)

    # 각 그룹에서 대표 문장 선택 (가장 긴 것 또는 가장 포괄적인 것)
    result = []
    for keyword, group_items in groups.items():
        # 중복 제거
        unique_items = list(set(group_items))
        # 가장 포괄적인 항목 선택 (길이 기준)
        representative = max(unique_items, key=len) if unique_items else ""
        if representative and representative not in result:
            result.append(representative)

    return result

def generate_optimized_markdown(stages, stage_data):
    """최적화된 마크다운 표 생성"""
    md_content = "# 고객경험 인터뷰 결과 취합 (최적안)\n\n"
    md_content += "> 10명의 인터뷰 결과를 분석하여 유사한 내용을 통합하고 핵심 인사이트를 추출했습니다.\n\n"

    for stage in stages:
        md_content += f"## {stage}\n\n"
        md_content += "| 구분 | 내용 |\n"
        md_content += "|------|------|\n"

        data = stage_data[stage]

        # 행동 - 유사 항목 통합
        actions = group_similar_items(data['actions'])
        if actions:
            for i, action in enumerate(actions, 1):
                if i == 1:
                    md_content += f"| **행동** | {action} |\n"
                else:
                    md_content += f"| | {action} |\n"
        else:
            md_content += "| **행동** | - |\n"

        # 긍정적 느낌 - 유사 항목 통합
        pos_feelings = group_similar_items(data['positive_feelings'])
        if pos_feelings:
            for i, feeling in enumerate(pos_feelings, 1):
                if i == 1:
                    md_content += f"| **긍정적 느낌** | {feeling} |\n"
                else:
                    md_content += f"| | {feeling} |\n"
        else:
            md_content += "| **긍정적 느낌** | - |\n"

        # 부정적 느낌 - 유사 항목 통합
        neg_feelings = group_similar_items(data['negative_feelings'])
        if neg_feelings:
            for i, feeling in enumerate(neg_feelings, 1):
                if i == 1:
                    md_content += f"| **부정적 느낌** | {feeling} |\n"
                else:
                    md_content += f"| | {feeling} |\n"
        else:
            md_content += "| **부정적 느낌** | - |\n"

        # 전반적 의견 - 유사 항목 통합
        thoughts = group_similar_items(data['thoughts'])
        if thoughts:
            for i, thought in enumerate(thoughts, 1):
                if i == 1:
                    md_content += f"| **전반적 의견** | {thought} |\n"
                else:
                    md_content += f"| | {thought} |\n"
        else:
            md_content += "| **전반적 의견** | - |\n"

        md_content += "\n"

    return md_content

def main():
    """메인 실행 함수"""
    input_file = '/Users/adela/home/workspace/HGZero/define/고객경험인터뷰결과.md'
    output_file = '/Users/adela/home/workspace/HGZero/define/고객경험인터뷰결과취합.md'

    print("파일 읽는 중...")
    content = read_interview_file(input_file)

    print("인터뷰 내용 추출 중...")
    stages, stage_data = extract_interviews(content)

    print("유사 항목 통합 및 최적화 중...")
    md_output = generate_optimized_markdown(stages, stage_data)

    print(f"결과 파일 저장 중: {output_file}")
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(md_output)

    print("완료!")

    # 통계 출력
    print("\n=== 최적화 통계 ===")
    for stage in stages:
        data = stage_data[stage]
        optimized_actions = group_similar_items(data['actions'])
        optimized_pos = group_similar_items(data['positive_feelings'])
        optimized_neg = group_similar_items(data['negative_feelings'])
        optimized_thoughts = group_similar_items(data['thoughts'])

        print(f"{stage}:")
        print(f"  - 행동: {len(data['actions'])}개 → {len(optimized_actions)}개")
        print(f"  - 긍정적 느낌: {len(data['positive_feelings'])}개 → {len(optimized_pos)}개")
        print(f"  - 부정적 느낌: {len(data['negative_feelings'])}개 → {len(optimized_neg)}개")
        print(f"  - 전반적 의견: {len(data['thoughts'])}개 → {len(optimized_thoughts)}개")

if __name__ == "__main__":
    main()