hgzero/design/backend/api/stt-service-api.yaml

openapi: 3.0.3
info:
  title: STT Service API
  description: |
    회의록 작성 및 공유 개선 서비스 - STT Service API 명세

    **핵심 기능:**
    - 음성 녹음 시작/중지 관리
    - 실시간 음성-텍스트 변환 (스트리밍)
    - Azure Speech Service 통합

    **차별화 포인트:**
    - 기본 기능 (Hygiene Factor) - 경쟁사 대부분 제공
    - 실시간 스트리밍 처리로 즉각적인 자막 제공
    - **단순화**: 배치 처리 및 화자 식별 제거, 실시간 전용 기능
  version: 1.0.0
  contact:
    name: STT Service Team
    email: stt-team@example.com

servers:
  - url: https://api.example.com/stt/v1
    description: Production Server
  - url: https://dev-api.example.com/stt/v1
    description: Development Server
  - url: http://localhost:8084/api/v1
    description: Local Development Server

tags:
  - name: Recording
    description: 음성 녹음 관리 API
  - name: Transcription
    description: 음성-텍스트 변환 API

paths:
  /recordings/prepare:
    post:
      tags:
        - Recording
      summary: 회의 녹음 준비
      description: |
        회의 시작 시 녹음 세션을 준비하고 Azure Speech Service 초기화

        **처리 흐름:**
        1. 녹음 세션 검증 (중복 방지)
        2. DB에 녹음 정보 생성
        3. Azure Speech 인식기 초기화
        4. Blob Storage 저장 경로 생성
        5. RecordingStarted 이벤트 발행 (Kafka)
      operationId: prepareRecording
      x-user-story: UFR-STT-010
      x-controller: RecordingController
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PrepareRecordingRequest'
            examples:
              normal:
                summary: 일반 회의 녹음 준비
                value:
                  meetingId: "MTG-2025-001"
                  sessionId: "SESSION-12345"
                  language: "ko-KR"
                  attendeeCount: 5
      responses:
        '200':
          description: 녹음 준비 성공
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PrepareRecordingResponse'
              examples:
                success:
                  summary: 준비 성공
                  value:
                    recordingId: "REC-20250123-001"
                    sessionId: "SESSION-12345"
                    status: "READY"
                    streamUrl: "wss://api.example.com/stt/v1/ws/stt/SESSION-12345"
                    storagePath: "recordings/MTG-2025-001/SESSION-12345.wav"
                    estimatedInitTime: 1100
        '400':
          $ref: '#/components/responses/BadRequest'
        '409':
          description: 녹음 세션 중복
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                code: "RECORDING_ALREADY_EXISTS"
                message: "이미 진행 중인 녹음 세션이 있습니다"
                timestamp: "2025-01-23T10:30:00Z"
        '500':
          $ref: '#/components/responses/InternalServerError'
      security:
        - BearerAuth: []

  /recordings/{recordingId}/start:
    post:
      tags:
        - Recording
      summary: 음성 녹음 시작
      description: |
        준비된 녹음 세션의 실제 녹음을 시작

        **처리 흐름:**
        1. 녹음 상태를 'RECORDING'으로 업데이트
        2. 시작 시간 기록
        3. WebSocket 연결 활성화
      operationId: startRecording
      x-user-story: UFR-STT-010
      x-controller: RecordingController
      parameters:
        - $ref: '#/components/parameters/RecordingIdParam'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StartRecordingRequest'
            example:
              startedBy: "USER-123"
              recordingMode: "REAL_TIME"
      responses:
        '200':
          description: 녹음 시작 성공
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RecordingStatusResponse'
              example:
                recordingId: "REC-20250123-001"
                status: "RECORDING"
                startTime: "2025-01-23T10:30:00Z"
                duration: 0
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'
      security:
        - BearerAuth: []

  /recordings/{recordingId}/stop:
    post:
      tags:
        - Recording
      summary: 음성 녹음 중지
      description: |
        진행 중인 녹음을 중지하고 최종 파일 저장

        **처리 흐름:**
        1. 녹음 상태를 'STOPPED'으로 업데이트
        2. 종료 시간 및 총 시간 기록
        3. Azure Blob에 최종 파일 저장
        4. RecordingStopped 이벤트 발행
      operationId: stopRecording
      x-user-story: UFR-STT-010
      x-controller: RecordingController
      parameters:
        - $ref: '#/components/parameters/RecordingIdParam'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StopRecordingRequest'
            example:
              stoppedBy: "USER-123"
              reason: "MEETING_END"
      responses:
        '200':
          description: 녹음 중지 성공
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RecordingStatusResponse'
              example:
                recordingId: "REC-20250123-001"
                status: "STOPPED"
                startTime: "2025-01-23T10:30:00Z"
                endTime: "2025-01-23T11:00:00Z"
                duration: 1800
                fileSize: 172800000
                storagePath: "recordings/MTG-2025-001/SESSION-12345.wav"
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'
      security:
        - BearerAuth: []

  /recordings/{recordingId}:
    get:
      tags:
        - Recording
      summary: 녹음 정보 조회
      description: 특정 녹음 세션의 상세 정보 조회
      operationId: getRecording
      x-user-story: UFR-STT-010
      x-controller: RecordingController
      parameters:
        - $ref: '#/components/parameters/RecordingIdParam'
      responses:
        '200':
          description: 녹음 정보 조회 성공
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RecordingDetailResponse'
              example:
                recordingId: "REC-20250123-001"
                meetingId: "MTG-2025-001"
                sessionId: "SESSION-12345"
                status: "RECORDING"
                startTime: "2025-01-23T10:30:00Z"
                duration: 300
                speakerCount: 3
                segmentCount: 45
                storagePath: "recordings/MTG-2025-001/SESSION-12345.wav"
                language: "ko-KR"
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'
      security:
        - BearerAuth: []

  /transcripts/stream:
    post:
      tags:
        - Transcription
      summary: 실시간 음성-텍스트 변환 (스트리밍)
      description: |
        WebSocket을 통한 실시간 음성 스트림 변환

        **처리 흐름:**
        1. 음성 데이터 스트림 수신
        2. Azure Speech Service 실시간 인식
        3. 신뢰도 검증 (70% threshold)
        4. DB에 세그먼트 저장
        5. TranscriptSegmentReady 이벤트 발행 (Kafka)
        6. WebSocket으로 실시간 자막 전송

        **성능:**
        - 실시간 인식 지연: < 1초
        - 처리 시간: 1-3초
      operationId: streamTranscription
      x-user-story: UFR-STT-020
      x-controller: TranscriptController
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StreamTranscriptionRequest'
            example:
              recordingId: "REC-20250123-001"
              audioData: "base64_encoded_audio_chunk"
              timestamp: 1234567890
              chunkIndex: 42
      responses:
        '200':
          description: 변환 성공 (부분 결과)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TranscriptionSegmentResponse'
              example:
                transcriptId: "TRS-SEG-001"
                recordingId: "REC-20250123-001"
                text: "안녕하세요, 오늘 회의를 시작하겠습니다."
                timestamp: 1234567890
                duration: 3.5
                confidence: 0.92
                warningFlag: false
        '400':
          $ref: '#/components/responses/BadRequest'
        '500':
          $ref: '#/components/responses/InternalServerError'
      security:
        - BearerAuth: []


  /transcripts/{recordingId}:
    get:
      tags:
        - Transcription
      summary: 변환 텍스트 전체 조회
      description: |
        특정 녹음의 전체 변환 텍스트 조회

        **응답 데이터:**
        - 전체 텍스트
        - 세그먼트 목록
        - 타임스탬프 정보
        - 신뢰도 점수
      operationId: getTranscription
      x-user-story: UFR-STT-020
      x-controller: TranscriptController
      parameters:
        - $ref: '#/components/parameters/RecordingIdParam'
        - name: includeSegments
          in: query
          description: 세그먼트 상세 정보 포함 여부
          required: false
          schema:
            type: boolean
            default: false
      responses:
        '200':
          description: 변환 텍스트 조회 성공
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TranscriptionResponse'
              example:
                recordingId: "REC-20250123-001"
                fullText: "안녕하세요, 오늘 회의를 시작하겠습니다..."
                segmentCount: 120
                totalDuration: 1800
                averageConfidence: 0.88
                segments:
                  - transcriptId: "TRS-SEG-001"
                    text: "안녕하세요, 오늘 회의를 시작하겠습니다."
                    timestamp: 0
                    duration: 3.5
                    confidence: 0.92
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'
      security:
        - BearerAuth: []

components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: JWT 토큰 기반 인증

  parameters:
    RecordingIdParam:
      name: recordingId
      in: path
      description: 녹음 ID
      required: true
      schema:
        type: string
        example: "REC-20250123-001"

  schemas:
    PrepareRecordingRequest:
      type: object
      required:
        - meetingId
        - sessionId
      properties:
        meetingId:
          type: string
          description: 회의 ID
          example: "MTG-2025-001"
        sessionId:
          type: string
          description: 세션 ID
          example: "SESSION-12345"
        language:
          type: string
          description: 음성 인식 언어
          default: "ko-KR"
          enum:
            - ko-KR
            - en-US
            - ja-JP
          example: "ko-KR"
        attendeeCount:
          type: integer
          description: 참석자 수
          minimum: 1
          maximum: 50
          example: 5

    PrepareRecordingResponse:
      type: object
      properties:
        recordingId:
          type: string
          description: 생성된 녹음 ID
          example: "REC-20250123-001"
        sessionId:
          type: string
          description: 세션 ID
          example: "SESSION-12345"
        status:
          type: string
          description: 녹음 상태
          enum:
            - READY
            - RECORDING
            - STOPPED
            - ERROR
          example: "READY"
        streamUrl:
          type: string
          description: WebSocket 스트리밍 URL
          example: "wss://api.example.com/stt/v1/ws/stt/SESSION-12345"
        storagePath:
          type: string
          description: Azure Blob Storage 저장 경로
          example: "recordings/MTG-2025-001/SESSION-12345.wav"
        estimatedInitTime:
          type: integer
          description: 예상 초기화 시간 (ms)
          example: 1100

    StartRecordingRequest:
      type: object
      required:
        - startedBy
      properties:
        startedBy:
          type: string
          description: 녹음 시작자 사용자 ID
          example: "USER-123"
        recordingMode:
          type: string
          description: 녹음 모드
          enum:
            - REAL_TIME
            - BATCH
          default: "REAL_TIME"
          example: "REAL_TIME"

    StopRecordingRequest:
      type: object
      required:
        - stoppedBy
      properties:
        stoppedBy:
          type: string
          description: 녹음 중지자 사용자 ID
          example: "USER-123"
        reason:
          type: string
          description: 중지 사유
          enum:
            - MEETING_END
            - USER_REQUEST
            - ERROR
            - TIMEOUT
          example: "MEETING_END"

    RecordingStatusResponse:
      type: object
      properties:
        recordingId:
          type: string
          description: 녹음 ID
          example: "REC-20250123-001"
        status:
          type: string
          description: 녹음 상태
          enum:
            - READY
            - RECORDING
            - STOPPED
            - ERROR
          example: "RECORDING"
        startTime:
          type: string
          format: date-time
          description: 녹음 시작 시간
          example: "2025-01-23T10:30:00Z"
        endTime:
          type: string
          format: date-time
          description: 녹음 종료 시간
          example: "2025-01-23T11:00:00Z"
        duration:
          type: integer
          description: 녹음 시간 (초)
          example: 1800
        fileSize:
          type: integer
          description: 파일 크기 (bytes)
          example: 172800000
        storagePath:
          type: string
          description: 저장 경로
          example: "recordings/MTG-2025-001/SESSION-12345.wav"

    RecordingDetailResponse:
      type: object
      properties:
        recordingId:
          type: string
          description: 녹음 ID
          example: "REC-20250123-001"
        meetingId:
          type: string
          description: 회의 ID
          example: "MTG-2025-001"
        sessionId:
          type: string
          description: 세션 ID
          example: "SESSION-12345"
        status:
          type: string
          description: 녹음 상태
          enum:
            - READY
            - RECORDING
            - STOPPED
            - ERROR
          example: "RECORDING"
        startTime:
          type: string
          format: date-time
          description: 시작 시간
          example: "2025-01-23T10:30:00Z"
        endTime:
          type: string
          format: date-time
          description: 종료 시간
          example: "2025-01-23T11:00:00Z"
        duration:
          type: integer
          description: 녹음 시간 (초)
          example: 300
        segmentCount:
          type: integer
          description: 세그먼트 수
          example: 45
        storagePath:
          type: string
          description: 저장 경로
          example: "recordings/MTG-2025-001/SESSION-12345.wav"
        language:
          type: string
          description: 음성 인식 언어
          example: "ko-KR"

    StreamTranscriptionRequest:
      type: object
      required:
        - recordingId
        - audioData
        - timestamp
      properties:
        recordingId:
          type: string
          description: 녹음 ID
          example: "REC-20250123-001"
        audioData:
          type: string
          format: byte
          description: Base64 인코딩된 오디오 청크
          example: "UklGRiQAAABXQVZFZm10IBAAAAABA..."
        timestamp:
          type: integer
          description: 타임스탬프 (ms)
          example: 1234567890
        chunkIndex:
          type: integer
          description: 청크 순서 번호
          example: 42

    TranscriptionSegmentResponse:
      type: object
      properties:
        transcriptId:
          type: string
          description: 변환 텍스트 세그먼트 ID
          example: "TRS-SEG-001"
        recordingId:
          type: string
          description: 녹음 ID
          example: "REC-20250123-001"
        text:
          type: string
          description: 변환된 텍스트
          example: "안녕하세요, 오늘 회의를 시작하겠습니다."
        timestamp:
          type: integer
          description: 타임스탬프 (ms)
          example: 1234567890
        duration:
          type: number
          format: float
          description: 발언 시간 (초)
          example: 3.5
        confidence:
          type: number
          format: float
          description: 신뢰도 점수 (0-1)
          minimum: 0
          maximum: 1
          example: 0.92
        warningFlag:
          type: boolean
          description: 낮은 신뢰도 경고 플래그 (< 60%)
          example: false


    TranscriptionSegment:
      type: object
      properties:
        text:
          type: string
          description: 변환된 텍스트
          example: "안녕하세요, 오늘 회의를 시작하겠습니다."
        timestamp:
          type: integer
          description: 시작 타임스탬프 (ms)
          example: 1234567890
        duration:
          type: number
          format: float
          description: 발언 시간 (초)
          example: 3.5
        confidence:
          type: number
          format: float
          description: 신뢰도 점수 (0-1)
          example: 0.92

    TranscriptionCompleteResponse:
      type: object
      properties:
        jobId:
          type: string
          description: 배치 작업 ID
          example: "JOB-20250123-001"
        recordingId:
          type: string
          description: 녹음 ID
          example: "REC-20250123-001"
        status:
          type: string
          description: 작업 상태
          enum:
            - COMPLETED
            - FAILED
          example: "COMPLETED"
        segmentCount:
          type: integer
          description: 총 세그먼트 수
          example: 120
        totalDuration:
          type: integer
          description: 총 시간 (초)
          example: 1800
        averageConfidence:
          type: number
          format: float
          description: 평균 신뢰도 점수
          example: 0.88

    TranscriptionResponse:
      type: object
      properties:
        recordingId:
          type: string
          description: 녹음 ID
          example: "REC-20250123-001"
        fullText:
          type: string
          description: 전체 변환 텍스트
          example: "김철수: 안녕하세요...\n이영희: 네, 안녕하세요..."
        segmentCount:
          type: integer
          description: 총 세그먼트 수
          example: 120
        totalDuration:
          type: integer
          description: 총 시간 (초)
          example: 1800
        averageConfidence:
          type: number
          format: float
          description: 평균 신뢰도 점수
          example: 0.88
        segments:
          type: array
          description: 세그먼트 목록
          items:
            $ref: '#/components/schemas/TranscriptionSegmentDetail'

    TranscriptionSegmentDetail:
      type: object
      properties:
        transcriptId:
          type: string
          description: 세그먼트 ID
          example: "TRS-SEG-001"
        text:
          type: string
          description: 변환된 텍스트
          example: "안녕하세요, 오늘 회의를 시작하겠습니다."
        timestamp:
          type: integer
          description: 타임스탬프 (ms)
          example: 0
        duration:
          type: number
          format: float
          description: 발언 시간 (초)
          example: 3.5
        confidence:
          type: number
          format: float
          description: 신뢰도 점수
          example: 0.92

    ErrorResponse:
      type: object
      properties:
        code:
          type: string
          description: 오류 코드
          example: "RECORDING_NOT_FOUND"
        message:
          type: string
          description: 오류 메시지
          example: "녹음을 찾을 수 없습니다"
        timestamp:
          type: string
          format: date-time
          description: 오류 발생 시간
          example: "2025-01-23T10:30:00Z"
        path:
          type: string
          description: 요청 경로
          example: "/api/v1/recordings/REC-999"

  responses:
    BadRequest:
      description: 잘못된 요청
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            code: "INVALID_REQUEST"
            message: "요청 파라미터가 올바르지 않습니다"
            timestamp: "2025-01-23T10:30:00Z"

    NotFound:
      description: 리소스를 찾을 수 없음
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            code: "RECORDING_NOT_FOUND"
            message: "녹음을 찾을 수 없습니다"
            timestamp: "2025-01-23T10:30:00Z"

    InternalServerError:
      description: 서버 내부 오류
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            code: "INTERNAL_SERVER_ERROR"
            message: "서버 오류가 발생했습니다"
            timestamp: "2025-01-23T10:30:00Z"