STT 서비스 음성 인식 및 AI 제안사항 표시 기능 구현

- PCM 16kHz 포맷 지원으로 Azure Speech 인식 성공 - WebSocket 실시간 전송 기능 추가 - DB 저장 로직 제거 (AI 서비스에서 제안사항 저장) - AI SSE 기반 제안사항 표시 테스트 페이지 추가 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-07-28 06:49:09 +00:00 · 2025-10-28 10:12:55 +09:00
parent 7e30f6b82e
commit 2c3bc432b3
16 changed files with 1610 additions and 76 deletions
@@ -33,7 +33,7 @@
          <entry key="CORS_ALLOWED_ORIGINS" value="http://localhost:*" />

          <!-- Azure Speech Service Configuration -->
-          <entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="" />
+          <entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="DubvGv3uV28knr8xlONVBzNvQADh1wW1dGTMRx4x3U5CLy8D1DgEJQQJ99BJACYeBjFXJ3w3AAAYACOGBVa7" />
          <entry key="AZURE_SPEECH_REGION" value="eastus" />
          <entry key="AZURE_SPEECH_LANGUAGE" value="ko-KR" />

@@ -34,8 +34,8 @@
          <entry key="CORS_ALLOWED_ORIGINS" value="http://localhost:3000,http://localhost:8080,http://localhost:8084" />
          
          <!-- Azure Speech Services 설정 -->
-          <entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="" />
-          <entry key="AZURE_SPEECH_REGION" value="koreacentral" />
+          <entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="DubvGv3uV28knr8xlONVBzNvQADh1wW1dGTMRx4x3U5CLy8D1DgEJQQJ99BJACYeBjFXJ3w3AAAYACOGBVa7" />
+          <entry key="AZURE_SPEECH_REGION" value="eastus" />
          <entry key="AZURE_SPEECH_LANGUAGE" value="ko-KR" />
          
          <!-- Azure Blob Storage 설정 -->
@@ -15,8 +15,14 @@ dependencies {
    // Database
    runtimeOnly 'org.postgresql:postgresql'

-    // Azure Speech SDK
-    implementation "com.microsoft.cognitiveservices.speech:client-sdk:${azureSpeechVersion}"
+    // Azure Speech SDK (macOS/Linux/Windows용)
+    implementation("com.microsoft.cognitiveservices.speech:client-sdk:${azureSpeechVersion}") {
+        artifact {
+            name = 'client-sdk'
+            extension = 'jar'
+            type = 'jar'
+        }
+    }

    // Azure Blob Storage
    implementation "com.azure:azure-storage-blob:${azureBlobVersion}"
@@ -42,4 +42,24 @@ public class RedisStreamConfig {
    public StringRedisTemplate stringRedisTemplate(RedisConnectionFactory connectionFactory) {
        return new StringRedisTemplate(connectionFactory);
    }
+
+    /**
+     * 범용 Object 저장용 RedisTemplate
+     */
+    @Bean
+    public RedisTemplate<String, Object> redisTemplate(RedisConnectionFactory connectionFactory) {
+        RedisTemplate<String, Object> template = new RedisTemplate<>();
+        template.setConnectionFactory(connectionFactory);
+
+        // Key Serializer
+        template.setKeySerializer(new StringRedisSerializer());
+        template.setHashKeySerializer(new StringRedisSerializer());
+
+        // Value Serializer
+        template.setValueSerializer(new GenericJackson2JsonRedisSerializer());
+        template.setHashValueSerializer(new GenericJackson2JsonRedisSerializer());
+
+        template.afterPropertiesSet();
+        return template;
+    }
 }
@@ -2,10 +2,12 @@ package com.unicorn.hgzero.stt.config;

 import com.unicorn.hgzero.stt.controller.AudioWebSocketHandler;
 import lombok.RequiredArgsConstructor;
+import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.web.socket.config.annotation.EnableWebSocket;
 import org.springframework.web.socket.config.annotation.WebSocketConfigurer;
 import org.springframework.web.socket.config.annotation.WebSocketHandlerRegistry;
+import org.springframework.web.socket.server.standard.ServletServerContainerFactoryBean;

 /**
 * WebSocket 설정
@@ -24,4 +26,16 @@ public class WebSocketConfig implements WebSocketConfigurer {
        registry.addHandler(audioWebSocketHandler, "/ws/audio")
                .setAllowedOrigins("*"); // 실제 운영 환경에서는 특정 도메인으로 제한
    }
+
+    /**
+     * WebSocket 메시지 버퍼 크기 설정
+     * 오디오 청크 전송을 위해 충분한 버퍼 크기 확보 (10MB)
+     */
+    @Bean
+    public ServletServerContainerFactoryBean createWebSocketContainer() {
+        ServletServerContainerFactoryBean container = new ServletServerContainerFactoryBean();
+        container.setMaxTextMessageBufferSize(10 * 1024 * 1024); // 10MB
+        container.setMaxBinaryMessageBufferSize(10 * 1024 * 1024); // 10MB
+        return container;
+    }
 }
@@ -12,13 +12,12 @@ import org.springframework.web.socket.TextMessage;
 import org.springframework.web.socket.WebSocketSession;
 import org.springframework.web.socket.handler.AbstractWebSocketHandler;

-import java.util.Base64;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;

 /**
 * 오디오 WebSocket 핸들러
- * 프론트엔드에서 실시간 오디오 스트림을 수신
+ * 프론트엔드에서 실시간 오디오 스트림을 수신하고 STT 결과를 전송
 */
@Slf4j
@Component
@@ -31,6 +30,9 @@ public class AudioWebSocketHandler extends AbstractWebSocketHandler {
    // 세션별 회의 ID 매핑
    private final Map<String, String> sessionMeetingMap = new ConcurrentHashMap<>();

+    // 회의 ID별 세션 목록 (결과 브로드캐스트용)
+    private final Map<String, Set<WebSocketSession>> meetingSessionsMap = new ConcurrentHashMap<>();
+
    @Override
    public void afterConnectionEstablished(WebSocketSession session) throws Exception {
        log.info("WebSocket 연결 성공 - sessionId: {}", session.getId());
@@ -50,6 +52,11 @@ public class AudioWebSocketHandler extends AbstractWebSocketHandler {
                // 녹음 시작
                String meetingId = (String) data.get("meetingId");
                sessionMeetingMap.put(session.getId(), meetingId);
+
+                // 세션을 회의별 목록에 추가
+                meetingSessionsMap.computeIfAbsent(meetingId, k -> ConcurrentHashMap.newKeySet())
+                        .add(session);
+
                log.info("녹음 시작 - sessionId: {}, meetingId: {}", session.getId(), meetingId);

                // 응답 전송
@@ -147,9 +154,66 @@ public class AudioWebSocketHandler extends AbstractWebSocketHandler {
        }
    }

+    /**
+     * STT 결과를 특정 회의의 모든 클라이언트에게 전송
+     */
+    public void sendTranscriptToMeeting(String meetingId, String text, double confidence) {
+        Set<WebSocketSession> sessions = meetingSessionsMap.get(meetingId);
+
+        if (sessions == null || sessions.isEmpty()) {
+            log.debug("전송할 세션 없음 - meetingId: {}", meetingId);
+            return;
+        }
+
+        try {
+            Map<String, Object> result = new HashMap<>();
+            result.put("transcript", text);
+            result.put("confidence", confidence);
+            result.put("timestamp", System.currentTimeMillis());
+            result.put("speaker", "참석자");
+
+            String jsonMessage = objectMapper.writeValueAsString(result);
+            TextMessage message = new TextMessage(jsonMessage);
+
+            // 모든 세션에 브로드캐스트
+            Iterator<WebSocketSession> iterator = sessions.iterator();
+            while (iterator.hasNext()) {
+                WebSocketSession session = iterator.next();
+                try {
+                    if (session.isOpen()) {
+                        session.sendMessage(message);
+                    } else {
+                        iterator.remove();
+                    }
+                } catch (Exception e) {
+                    log.error("메시지 전송 실패 - sessionId: {}", session.getId(), e);
+                    iterator.remove();
+                }
+            }
+
+            log.info("STT 결과 전송 완료 - meetingId: {}, sessions: {}개, text: {}",
+                    meetingId, sessions.size(), text);
+
+        } catch (Exception e) {
+            log.error("STT 결과 전송 실패 - meetingId: {}", meetingId, e);
+        }
+    }
+
    @Override
    public void afterConnectionClosed(WebSocketSession session, CloseStatus status) throws Exception {
        String meetingId = sessionMeetingMap.remove(session.getId());
+
+        // 회의별 세션 목록에서도 제거
+        if (meetingId != null) {
+            Set<WebSocketSession> sessions = meetingSessionsMap.get(meetingId);
+            if (sessions != null) {
+                sessions.remove(session);
+                if (sessions.isEmpty()) {
+                    meetingSessionsMap.remove(meetingId);
+                }
+            }
+        }
+
        log.info("WebSocket 연결 종료 - sessionId: {}, meetingId: {}, status: {}",
                session.getId(), meetingId, status);
    }
@@ -1,15 +1,13 @@
 package com.unicorn.hgzero.stt.service;

+import com.unicorn.hgzero.stt.controller.AudioWebSocketHandler;
 import com.unicorn.hgzero.stt.dto.AudioChunkDto;
 import com.unicorn.hgzero.stt.event.TranscriptionEvent;
 import com.unicorn.hgzero.stt.event.publisher.EventPublisher;
-import com.unicorn.hgzero.stt.repository.entity.TranscriptSegmentEntity;
-import com.unicorn.hgzero.stt.repository.jpa.TranscriptSegmentRepository;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Service;
-import org.springframework.transaction.annotation.Transactional;

 import java.time.LocalDateTime;
 import java.util.List;
@@ -19,6 +17,9 @@ import java.util.UUID;
 /**
 * 오디오 배치 프로세서
 * 5초마다 Redis에 축적된 오디오를 처리하여 텍스트로 변환
+ *
+ * Note: STT 결과는 DB에 저장하지 않고, Event Hub와 WebSocket으로만 전송
+ *       최종 회의록은 AI 서비스에서 저장
 */
@Slf4j
@Service
@@ -27,18 +28,17 @@ public class AudioBatchProcessor {

    private final AudioBufferService audioBufferService;
    private final AzureSpeechService azureSpeechService;
-    private final TranscriptSegmentRepository segmentRepository;
    private final EventPublisher eventPublisher;
+    private final AudioWebSocketHandler webSocketHandler;

    /**
     * 5초마다 오디오 배치 처리
     * - Redis에서 오디오 청크 조회
     * - Azure Speech로 텍스트 변환
-     * - DB 저장
-     * - Event Hub 이벤트 발행
+     * - Event Hub 이벤트 발행 (AI 서비스로 전송)
+     * - WebSocket 실시간 전송 (클라이언트 표시)
     */
    @Scheduled(fixedDelay = 5000, initialDelay = 10000) // 5초마다 실행, 최초 10초 후 시작
-    @Transactional
    public void processAudioBatch() {
        try {
            // 활성 회의 목록 조회
@@ -96,12 +96,12 @@ public class AudioBatchProcessor {
                return;
            }

-            // 텍스트 세그먼트 DB 저장
-            saveTranscriptSegment(meetingId, result);
-
-            // Event Hub 이벤트 발행
+            // Event Hub 이벤트 발행 (AI 서비스로 전송)
            publishTranscriptionEvent(meetingId, result);

+            // WebSocket으로 실시간 결과 전송 (클라이언트 표시)
+            sendTranscriptToClients(meetingId, result);
+
            // Redis 정리
            audioBufferService.clearProcessedChunks(meetingId);

@@ -112,35 +112,9 @@ public class AudioBatchProcessor {
        }
    }

-    /**
-     * 텍스트 세그먼트 DB 저장
-     */
-    private void saveTranscriptSegment(String meetingId, AzureSpeechService.RecognitionResult result) {
-        String segmentId = UUID.randomUUID().toString();
-        long timestamp = System.currentTimeMillis();
-        boolean warningFlag = result.getConfidence() < 0.6;
-
-        TranscriptSegmentEntity segment = TranscriptSegmentEntity.builder()
-                .segmentId(segmentId)
-                .recordingId(meetingId) // 간소화: recordingId = meetingId
-                .text(result.getText())
-                .speakerId("UNKNOWN") // 화자 식별 제거
-                .speakerName("참석자")
-                .timestamp(timestamp)
-                .duration(5.0) // 5초 분량
-                .confidence(result.getConfidence())
-                .warningFlag(warningFlag)
-                .chunkIndex(0)
-                .build();
-
-        segmentRepository.save(segment);
-
-        log.debug("텍스트 세그먼트 저장 완료 - segmentId: {}, text: {}",
-                segmentId, result.getText());
-    }
-
    /**
     * Event Hub 이벤트 발행 (AI 서비스로 전송)
+     * AI 서비스에서 Claude API로 제안사항 분석 후 처리
     */
    private void publishTranscriptionEvent(String meetingId, AzureSpeechService.RecognitionResult result) {
        try {
@@ -167,4 +141,16 @@ public class AudioBatchProcessor {
            log.error("Event Hub 이벤트 발행 실패 - meetingId: {}", meetingId, e);
        }
    }
+
+    /**
+     * WebSocket으로 STT 결과를 클라이언트에게 실시간 전송
+     */
+    private void sendTranscriptToClients(String meetingId, AzureSpeechService.RecognitionResult result) {
+        try {
+            webSocketHandler.sendTranscriptToMeeting(meetingId, result.getText(), result.getConfidence());
+            log.debug("WebSocket 결과 전송 완료 - meetingId: {}, text: {}", meetingId, result.getText());
+        } catch (Exception e) {
+            log.error("WebSocket 결과 전송 실패 - meetingId: {}", meetingId, e);
+        }
+    }
 }
@@ -8,6 +8,7 @@ import org.springframework.data.redis.core.RedisTemplate;
 import org.springframework.stereotype.Service;

 import java.util.ArrayList;
+import java.util.Base64;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -38,9 +39,12 @@ public class AudioBufferService {
        try {
            String streamKey = getStreamKey(chunk.getMeetingId());

+            // 바이트 배열을 Base64로 인코딩
+            String encodedAudioData = Base64.getEncoder().encodeToString(chunk.getAudioData());
+
            // Hash 형태로 저장
            Map<String, Object> data = Map.of(
-                    "audioData", chunk.getAudioData(),
+                    "audioData", encodedAudioData,
                    "timestamp", chunk.getTimestamp(),
                    "chunkIndex", chunk.getChunkIndex(),
                    "format", chunk.getFormat() != null ? chunk.getFormat() : "audio/webm",
@@ -87,9 +91,13 @@ public class AudioBufferService {
            for (MapRecord<String, Object, Object> record : records) {
                Map<Object, Object> value = record.getValue();

+                // Base64로 인코딩된 문자열을 바이트 배열로 디코딩
+                String encodedAudioData = (String) value.get("audioData");
+                byte[] audioData = Base64.getDecoder().decode(encodedAudioData);
+
                AudioChunkDto chunk = AudioChunkDto.builder()
                        .meetingId(meetingId)
-                        .audioData((byte[]) value.get("audioData"))
+                        .audioData(audioData)
                        .timestamp(Long.valueOf(value.get("timestamp").toString()))
                        .chunkIndex(Integer.valueOf(value.get("chunkIndex").toString()))
                        .format((String) value.get("format"))