STT 서비스 음성 인식 및 AI 제안사항 표시 기능 구현

- PCM 16kHz 포맷 지원으로 Azure Speech 인식 성공
- WebSocket 실시간 전송 기능 추가
- DB 저장 로직 제거 (AI 서비스에서 제안사항 저장)
- AI SSE 기반 제안사항 표시 테스트 페이지 추가

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Minseo-Jo 2025-10-28 10:12:55 +09:00
parent 7e30f6b82e
commit 2c3bc432b3
16 changed files with 1610 additions and 76 deletions

View File

@ -3,7 +3,6 @@ import asyncio
import logging
import json
from azure.eventhub.aio import EventHubConsumerClient
from azure.eventhub.extensions.checkpointstoreblobaio import BlobCheckpointStore
from app.config import get_settings
from app.services.redis_service import RedisService
@ -87,8 +86,8 @@ class EventHubService:
logger.debug(f"Redis 저장 완료 - meetingId: {meeting_id}")
# 체크포인트 업데이트
await partition_context.update_checkpoint(event)
# MVP 개발: checkpoint 업데이트 제거 (InMemory 모드)
# await partition_context.update_checkpoint(event)
except Exception as e:
logger.error(f"이벤트 처리 오류: {e}", exc_info=True)

View File

@ -1,5 +1,6 @@
"""AI Service - FastAPI 애플리케이션"""
import logging
import asyncio
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
@ -7,6 +8,7 @@ from contextlib import asynccontextmanager
from app.config import get_settings
from app.api.v1 import suggestions
from app.services.eventhub_service import start_eventhub_listener
# 로깅 설정
logging.basicConfig(
@ -27,8 +29,9 @@ async def lifespan(app: FastAPI):
logger.info(f"Redis: {settings.redis_host}:{settings.redis_port}")
logger.info("=" * 60)
# TODO: Event Hub 리스너 시작 (별도 백그라운드 태스크)
# asyncio.create_task(start_eventhub_listener())
# Event Hub 리스너 시작 (백그라운드 태스크)
logger.info("Event Hub 리스너 백그라운드 시작...")
asyncio.create_task(start_eventhub_listener())
yield
@ -43,11 +46,11 @@ app = FastAPI(
lifespan=lifespan
)
# CORS 설정
# CORS 설정 (개발 환경: 모든 origin 허용)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_origins=["*"], # 개발 환경에서 모든 origin 허용
allow_credentials=False, # allow_origins=["*"]일 때는 False여야 함
allow_methods=["*"],
allow_headers=["*"],
)

View File

@ -57,7 +57,7 @@
<entry key="AZURE_AI_SEARCH_INDEX" value="meeting-transcripts" />
<!-- Azure Event Hubs Configuration -->
<entry key="AZURE_EVENTHUB_CONNECTION_STRING" value="Endpoint=sb://hgzero-eventhub-ns.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=VUqZ9vFgu35E3c6RiUzoOGVUP8IZpFvlV+AEhC6sUpo=" />
<entry key="AZURE_EVENTHUB_CONNECTION_STRING" value="Endpoint=sb://hgzero-eventhub-ns.servicebus.windows.net/;SharedAccessKeyName=ai-listen-policy;SharedAccessKey=wqcbVIXlOMyn/C562lx6DD75AyjHQ87xo+AEhJ7js9Q=;EntityPath=hgzero-eventhub-name" />
<entry key="AZURE_EVENTHUB_NAMESPACE" value="hgzero-eventhub-ns" />
<entry key="AZURE_EVENTHUB_NAME" value="hgzero-eventhub-name" />
<entry key="AZURE_CHECKPOINT_STORAGE_CONNECTION_STRING" value="" />

View File

@ -50,31 +50,29 @@ public class EventHubConfig {
@PostConstruct
public void startEventProcessor() {
// Checkpoint Storage가 설정되지 않은 경우 Event Hub 기능 비활성화
if (checkpointStorageConnectionString == null || checkpointStorageConnectionString.isEmpty()) {
log.warn("Event Hub Processor 비활성화 - checkpoint storage 설정이 없습니다. " +
"개발 환경에서는 Event Hub 없이 실행 가능하며, 운영 환경에서는 AZURE_CHECKPOINT_STORAGE_CONNECTION_STRING 환경 변수를 설정해야 합니다.");
return;
}
log.info("Event Hub Processor 시작 - eventhub: {}, consumerGroup: {}",
eventHubName, consumerGroup);
// Blob Checkpoint Store 생성 (체크포인트 저장소)
BlobContainerAsyncClient blobContainerAsyncClient = new BlobContainerClientBuilder()
.connectionString(checkpointStorageConnectionString)
.containerName(checkpointContainer)
.buildAsyncClient();
// Event Processor Client 빌드
eventProcessorClient = new EventProcessorClientBuilder()
EventProcessorClientBuilder builder = new EventProcessorClientBuilder()
.connectionString(connectionString, eventHubName)
.consumerGroup(consumerGroup)
.checkpointStore(new BlobCheckpointStore(blobContainerAsyncClient))
.processEvent(this::processEvent)
.processError(this::processError)
.buildEventProcessorClient();
.processError(this::processError);
// Checkpoint Storage 설정
if (checkpointStorageConnectionString != null && !checkpointStorageConnectionString.isEmpty()) {
log.info("Checkpoint Storage 활성화 (Azure Blob) - container: {}", checkpointContainer);
BlobContainerAsyncClient blobContainerAsyncClient = new BlobContainerClientBuilder()
.connectionString(checkpointStorageConnectionString)
.containerName(checkpointContainer)
.buildAsyncClient();
builder.checkpointStore(new BlobCheckpointStore(blobContainerAsyncClient));
} else {
log.warn("Checkpoint Storage 미설정 - InMemory 모드 사용 (MVP 개발용, 재시작 시 처음부터 읽음)");
builder.checkpointStore(new InMemoryCheckpointStore());
}
eventProcessorClient = builder.buildEventProcessorClient();
eventProcessorClient.start();
log.info("Event Hub Processor 시작 완료");

View File

@ -44,7 +44,7 @@ subprojects {
hypersistenceVersion = '3.7.3'
openaiVersion = '0.18.2'
feignJacksonVersion = '13.1'
azureSpeechVersion = '1.37.0'
azureSpeechVersion = '1.44.0'
azureBlobVersion = '12.25.3'
azureEventHubsVersion = '5.18.2'
azureEventHubsCheckpointVersion = '1.19.2'

View File

@ -33,7 +33,7 @@
<entry key="CORS_ALLOWED_ORIGINS" value="http://localhost:*" />
<!-- Azure Speech Service Configuration -->
<entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="" />
<entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="DubvGv3uV28knr8xlONVBzNvQADh1wW1dGTMRx4x3U5CLy8D1DgEJQQJ99BJACYeBjFXJ3w3AAAYACOGBVa7" />
<entry key="AZURE_SPEECH_REGION" value="eastus" />
<entry key="AZURE_SPEECH_LANGUAGE" value="ko-KR" />

View File

@ -34,8 +34,8 @@
<entry key="CORS_ALLOWED_ORIGINS" value="http://localhost:3000,http://localhost:8080,http://localhost:8084" />
<!-- Azure Speech Services 설정 -->
<entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="" />
<entry key="AZURE_SPEECH_REGION" value="koreacentral" />
<entry key="AZURE_SPEECH_SUBSCRIPTION_KEY" value="DubvGv3uV28knr8xlONVBzNvQADh1wW1dGTMRx4x3U5CLy8D1DgEJQQJ99BJACYeBjFXJ3w3AAAYACOGBVa7" />
<entry key="AZURE_SPEECH_REGION" value="eastus" />
<entry key="AZURE_SPEECH_LANGUAGE" value="ko-KR" />
<!-- Azure Blob Storage 설정 -->

View File

@ -15,8 +15,14 @@ dependencies {
// Database
runtimeOnly 'org.postgresql:postgresql'
// Azure Speech SDK
implementation "com.microsoft.cognitiveservices.speech:client-sdk:${azureSpeechVersion}"
// Azure Speech SDK (macOS/Linux/Windows용)
implementation("com.microsoft.cognitiveservices.speech:client-sdk:${azureSpeechVersion}") {
artifact {
name = 'client-sdk'
extension = 'jar'
type = 'jar'
}
}
// Azure Blob Storage
implementation "com.azure:azure-storage-blob:${azureBlobVersion}"

View File

@ -42,4 +42,24 @@ public class RedisStreamConfig {
public StringRedisTemplate stringRedisTemplate(RedisConnectionFactory connectionFactory) {
return new StringRedisTemplate(connectionFactory);
}
/**
* 범용 Object 저장용 RedisTemplate
*/
@Bean
public RedisTemplate<String, Object> redisTemplate(RedisConnectionFactory connectionFactory) {
RedisTemplate<String, Object> template = new RedisTemplate<>();
template.setConnectionFactory(connectionFactory);
// Key Serializer
template.setKeySerializer(new StringRedisSerializer());
template.setHashKeySerializer(new StringRedisSerializer());
// Value Serializer
template.setValueSerializer(new GenericJackson2JsonRedisSerializer());
template.setHashValueSerializer(new GenericJackson2JsonRedisSerializer());
template.afterPropertiesSet();
return template;
}
}

View File

@ -2,10 +2,12 @@ package com.unicorn.hgzero.stt.config;
import com.unicorn.hgzero.stt.controller.AudioWebSocketHandler;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.socket.config.annotation.EnableWebSocket;
import org.springframework.web.socket.config.annotation.WebSocketConfigurer;
import org.springframework.web.socket.config.annotation.WebSocketHandlerRegistry;
import org.springframework.web.socket.server.standard.ServletServerContainerFactoryBean;
/**
* WebSocket 설정
@ -24,4 +26,16 @@ public class WebSocketConfig implements WebSocketConfigurer {
registry.addHandler(audioWebSocketHandler, "/ws/audio")
.setAllowedOrigins("*"); // 실제 운영 환경에서는 특정 도메인으로 제한
}
/**
* WebSocket 메시지 버퍼 크기 설정
* 오디오 청크 전송을 위해 충분한 버퍼 크기 확보 (10MB)
*/
@Bean
public ServletServerContainerFactoryBean createWebSocketContainer() {
ServletServerContainerFactoryBean container = new ServletServerContainerFactoryBean();
container.setMaxTextMessageBufferSize(10 * 1024 * 1024); // 10MB
container.setMaxBinaryMessageBufferSize(10 * 1024 * 1024); // 10MB
return container;
}
}

View File

@ -12,13 +12,12 @@ import org.springframework.web.socket.TextMessage;
import org.springframework.web.socket.WebSocketSession;
import org.springframework.web.socket.handler.AbstractWebSocketHandler;
import java.util.Base64;
import java.util.Map;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
/**
* 오디오 WebSocket 핸들러
* 프론트엔드에서 실시간 오디오 스트림을 수신
* 프론트엔드에서 실시간 오디오 스트림을 수신하고 STT 결과를 전송
*/
@Slf4j
@Component
@ -31,6 +30,9 @@ public class AudioWebSocketHandler extends AbstractWebSocketHandler {
// 세션별 회의 ID 매핑
private final Map<String, String> sessionMeetingMap = new ConcurrentHashMap<>();
// 회의 ID별 세션 목록 (결과 브로드캐스트용)
private final Map<String, Set<WebSocketSession>> meetingSessionsMap = new ConcurrentHashMap<>();
@Override
public void afterConnectionEstablished(WebSocketSession session) throws Exception {
log.info("WebSocket 연결 성공 - sessionId: {}", session.getId());
@ -50,6 +52,11 @@ public class AudioWebSocketHandler extends AbstractWebSocketHandler {
// 녹음 시작
String meetingId = (String) data.get("meetingId");
sessionMeetingMap.put(session.getId(), meetingId);
// 세션을 회의별 목록에 추가
meetingSessionsMap.computeIfAbsent(meetingId, k -> ConcurrentHashMap.newKeySet())
.add(session);
log.info("녹음 시작 - sessionId: {}, meetingId: {}", session.getId(), meetingId);
// 응답 전송
@ -147,9 +154,66 @@ public class AudioWebSocketHandler extends AbstractWebSocketHandler {
}
}
/**
* STT 결과를 특정 회의의 모든 클라이언트에게 전송
*/
public void sendTranscriptToMeeting(String meetingId, String text, double confidence) {
Set<WebSocketSession> sessions = meetingSessionsMap.get(meetingId);
if (sessions == null || sessions.isEmpty()) {
log.debug("전송할 세션 없음 - meetingId: {}", meetingId);
return;
}
try {
Map<String, Object> result = new HashMap<>();
result.put("transcript", text);
result.put("confidence", confidence);
result.put("timestamp", System.currentTimeMillis());
result.put("speaker", "참석자");
String jsonMessage = objectMapper.writeValueAsString(result);
TextMessage message = new TextMessage(jsonMessage);
// 모든 세션에 브로드캐스트
Iterator<WebSocketSession> iterator = sessions.iterator();
while (iterator.hasNext()) {
WebSocketSession session = iterator.next();
try {
if (session.isOpen()) {
session.sendMessage(message);
} else {
iterator.remove();
}
} catch (Exception e) {
log.error("메시지 전송 실패 - sessionId: {}", session.getId(), e);
iterator.remove();
}
}
log.info("STT 결과 전송 완료 - meetingId: {}, sessions: {}개, text: {}",
meetingId, sessions.size(), text);
} catch (Exception e) {
log.error("STT 결과 전송 실패 - meetingId: {}", meetingId, e);
}
}
@Override
public void afterConnectionClosed(WebSocketSession session, CloseStatus status) throws Exception {
String meetingId = sessionMeetingMap.remove(session.getId());
// 회의별 세션 목록에서도 제거
if (meetingId != null) {
Set<WebSocketSession> sessions = meetingSessionsMap.get(meetingId);
if (sessions != null) {
sessions.remove(session);
if (sessions.isEmpty()) {
meetingSessionsMap.remove(meetingId);
}
}
}
log.info("WebSocket 연결 종료 - sessionId: {}, meetingId: {}, status: {}",
session.getId(), meetingId, status);
}

View File

@ -1,15 +1,13 @@
package com.unicorn.hgzero.stt.service;
import com.unicorn.hgzero.stt.controller.AudioWebSocketHandler;
import com.unicorn.hgzero.stt.dto.AudioChunkDto;
import com.unicorn.hgzero.stt.event.TranscriptionEvent;
import com.unicorn.hgzero.stt.event.publisher.EventPublisher;
import com.unicorn.hgzero.stt.repository.entity.TranscriptSegmentEntity;
import com.unicorn.hgzero.stt.repository.jpa.TranscriptSegmentRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.List;
@ -19,6 +17,9 @@ import java.util.UUID;
/**
* 오디오 배치 프로세서
* 5초마다 Redis에 축적된 오디오를 처리하여 텍스트로 변환
*
* Note: STT 결과는 DB에 저장하지 않고, Event Hub와 WebSocket으로만 전송
* 최종 회의록은 AI 서비스에서 저장
*/
@Slf4j
@Service
@ -27,18 +28,17 @@ public class AudioBatchProcessor {
private final AudioBufferService audioBufferService;
private final AzureSpeechService azureSpeechService;
private final TranscriptSegmentRepository segmentRepository;
private final EventPublisher eventPublisher;
private final AudioWebSocketHandler webSocketHandler;
/**
* 5초마다 오디오 배치 처리
* - Redis에서 오디오 청크 조회
* - Azure Speech로 텍스트 변환
* - DB 저장
* - Event Hub 이벤트 발행
* - Event Hub 이벤트 발행 (AI 서비스로 전송)
* - WebSocket 실시간 전송 (클라이언트 표시)
*/
@Scheduled(fixedDelay = 5000, initialDelay = 10000) // 5초마다 실행, 최초 10초 시작
@Transactional
public void processAudioBatch() {
try {
// 활성 회의 목록 조회
@ -96,12 +96,12 @@ public class AudioBatchProcessor {
return;
}
// 텍스트 세그먼트 DB 저장
saveTranscriptSegment(meetingId, result);
// Event Hub 이벤트 발행
// Event Hub 이벤트 발행 (AI 서비스로 전송)
publishTranscriptionEvent(meetingId, result);
// WebSocket으로 실시간 결과 전송 (클라이언트 표시)
sendTranscriptToClients(meetingId, result);
// Redis 정리
audioBufferService.clearProcessedChunks(meetingId);
@ -112,35 +112,9 @@ public class AudioBatchProcessor {
}
}
/**
* 텍스트 세그먼트 DB 저장
*/
private void saveTranscriptSegment(String meetingId, AzureSpeechService.RecognitionResult result) {
String segmentId = UUID.randomUUID().toString();
long timestamp = System.currentTimeMillis();
boolean warningFlag = result.getConfidence() < 0.6;
TranscriptSegmentEntity segment = TranscriptSegmentEntity.builder()
.segmentId(segmentId)
.recordingId(meetingId) // 간소화: recordingId = meetingId
.text(result.getText())
.speakerId("UNKNOWN") // 화자 식별 제거
.speakerName("참석자")
.timestamp(timestamp)
.duration(5.0) // 5초 분량
.confidence(result.getConfidence())
.warningFlag(warningFlag)
.chunkIndex(0)
.build();
segmentRepository.save(segment);
log.debug("텍스트 세그먼트 저장 완료 - segmentId: {}, text: {}",
segmentId, result.getText());
}
/**
* Event Hub 이벤트 발행 (AI 서비스로 전송)
* AI 서비스에서 Claude API로 제안사항 분석 처리
*/
private void publishTranscriptionEvent(String meetingId, AzureSpeechService.RecognitionResult result) {
try {
@ -167,4 +141,16 @@ public class AudioBatchProcessor {
log.error("Event Hub 이벤트 발행 실패 - meetingId: {}", meetingId, e);
}
}
/**
* WebSocket으로 STT 결과를 클라이언트에게 실시간 전송
*/
private void sendTranscriptToClients(String meetingId, AzureSpeechService.RecognitionResult result) {
try {
webSocketHandler.sendTranscriptToMeeting(meetingId, result.getText(), result.getConfidence());
log.debug("WebSocket 결과 전송 완료 - meetingId: {}, text: {}", meetingId, result.getText());
} catch (Exception e) {
log.error("WebSocket 결과 전송 실패 - meetingId: {}", meetingId, e);
}
}
}

View File

@ -8,6 +8,7 @@ import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -38,9 +39,12 @@ public class AudioBufferService {
try {
String streamKey = getStreamKey(chunk.getMeetingId());
// 바이트 배열을 Base64로 인코딩
String encodedAudioData = Base64.getEncoder().encodeToString(chunk.getAudioData());
// Hash 형태로 저장
Map<String, Object> data = Map.of(
"audioData", chunk.getAudioData(),
"audioData", encodedAudioData,
"timestamp", chunk.getTimestamp(),
"chunkIndex", chunk.getChunkIndex(),
"format", chunk.getFormat() != null ? chunk.getFormat() : "audio/webm",
@ -87,9 +91,13 @@ public class AudioBufferService {
for (MapRecord<String, Object, Object> record : records) {
Map<Object, Object> value = record.getValue();
// Base64로 인코딩된 문자열을 바이트 배열로 디코딩
String encodedAudioData = (String) value.get("audioData");
byte[] audioData = Base64.getDecoder().decode(encodedAudioData);
AudioChunkDto chunk = AudioChunkDto.builder()
.meetingId(meetingId)
.audioData((byte[]) value.get("audioData"))
.audioData(audioData)
.timestamp(Long.valueOf(value.get("timestamp").toString()))
.chunkIndex(Integer.valueOf(value.get("chunkIndex").toString()))
.format((String) value.get("format"))

471
test-audio/stt-test-ai.html Normal file
View File

@ -0,0 +1,471 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>HGZero AI 제안사항 실시간 테스트</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 900px;
margin: 50px auto;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: #333;
}
.container {
background: white;
border-radius: 15px;
padding: 30px;
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
}
h1 {
color: #667eea;
text-align: center;
margin-bottom: 10px;
}
.subtitle {
text-align: center;
color: #666;
margin-bottom: 30px;
}
.controls {
display: flex;
gap: 15px;
justify-content: center;
margin: 30px 0;
}
button {
padding: 15px 30px;
font-size: 16px;
border: none;
border-radius: 8px;
cursor: pointer;
transition: all 0.3s;
font-weight: bold;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
#startBtn {
background: #48bb78;
color: white;
}
#startBtn:hover:not(:disabled) {
background: #38a169;
transform: translateY(-2px);
}
#stopBtn {
background: #f56565;
color: white;
}
#stopBtn:hover:not(:disabled) {
background: #e53e3e;
transform: translateY(-2px);
}
.status {
text-align: center;
padding: 15px;
margin: 20px 0;
border-radius: 8px;
font-weight: bold;
}
.status.disconnected {
background: #fed7d7;
color: #c53030;
}
.status.connected {
background: #c6f6d5;
color: #276749;
}
.status.recording {
background: #feebc8;
color: #c05621;
}
.info-box {
background: #ebf8ff;
border-left: 4px solid #4299e1;
padding: 15px;
margin: 20px 0;
border-radius: 4px;
}
.info-box h3 {
margin-top: 0;
color: #2c5282;
}
#suggestions {
background: #f7fafc;
border: 2px solid #e2e8f0;
border-radius: 8px;
padding: 20px;
min-height: 300px;
max-height: 500px;
overflow-y: auto;
margin-top: 20px;
}
.suggestion-item {
padding: 15px;
margin: 10px 0;
background: white;
border-radius: 8px;
border-left: 4px solid #48bb78;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
animation: slideIn 0.3s ease-out;
}
@keyframes slideIn {
from {
opacity: 0;
transform: translateX(-20px);
}
to {
opacity: 1;
transform: translateX(0);
}
}
.suggestion-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 8px;
}
.suggestion-time {
color: #718096;
font-size: 0.85em;
}
.suggestion-confidence {
background: #48bb78;
color: white;
padding: 2px 8px;
border-radius: 12px;
font-size: 0.8em;
}
.suggestion-content {
color: #2d3748;
line-height: 1.6;
}
.log {
background: #1a202c;
color: #48bb78;
padding: 15px;
border-radius: 8px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
max-height: 150px;
overflow-y: auto;
margin-top: 20px;
}
.log-item {
margin: 3px 0;
}
.log-error {
color: #fc8181;
}
.log-info {
color: #63b3ed;
}
.empty-state {
text-align: center;
color: #a0aec0;
padding: 40px 20px;
}
</style>
</head>
<body>
<div class="container">
<h1>💡 HGZero AI 제안사항 실시간 테스트</h1>
<p class="subtitle">STT + Claude AI 기반 실시간 회의 제안사항</p>
<div class="info-box">
<h3>📋 테스트 정보</h3>
<p><strong>STT Service:</strong> <code>ws://localhost:8084/ws/audio</code></p>
<p><strong>AI Service:</strong> <code>http://localhost:8086/api/v1/ai/suggestions</code></p>
<p><strong>Meeting ID:</strong> <code id="meetingId">test-meeting-001</code></p>
</div>
<div id="status" class="status disconnected">
🔴 준비 중
</div>
<div class="controls">
<button id="startBtn" onclick="startSession()">
🎤 회의 시작
</button>
<button id="stopBtn" onclick="stopSession()" disabled>
⏹️ 회의 종료
</button>
</div>
<div id="suggestions">
<div class="empty-state">
<p>🎙️ 회의를 시작하면 AI가 분석한 제안사항이 여기에 표시됩니다.</p>
<p style="font-size: 0.9em; margin-top: 10px;">명확하게 회의 내용을 말씀해주세요.</p>
</div>
</div>
<div class="log" id="log">
<div class="log-item">시스템 로그...</div>
</div>
</div>
<script>
let sttWebSocket = null;
let aiEventSource = null;
let audioContext = null;
let micStream = null;
let chunkIndex = 0;
let isRecording = false;
const meetingId = 'test-meeting-001';
// PCM 데이터를 16bit로 변환
function floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
return buffer;
}
// 회의 시작
async function startSession() {
try {
// 1. STT WebSocket 연결
await connectSTTWebSocket();
// 2. 마이크 시작
await startMicrophone();
// 3. AI SSE 연결
connectAIEventSource();
document.getElementById('startBtn').disabled = true;
document.getElementById('stopBtn').disabled = false;
updateStatus('recording', '🔴 회의 진행 중...');
} catch (error) {
addLog('❌ 회의 시작 실패: ' + error.message, 'error');
alert('회의 시작에 실패했습니다: ' + error.message);
}
}
// STT WebSocket 연결
function connectSTTWebSocket() {
return new Promise((resolve, reject) => {
const wsUrl = 'ws://localhost:8084/ws/audio';
addLog('STT WebSocket 연결 시도...', 'info');
sttWebSocket = new WebSocket(wsUrl);
sttWebSocket.onopen = () => {
addLog('✅ STT WebSocket 연결 성공', 'info');
// 녹음 시작 메시지 전송
sttWebSocket.send(JSON.stringify({
type: 'start',
meetingId: meetingId
}));
resolve();
};
sttWebSocket.onerror = (error) => {
addLog('❌ STT WebSocket 오류', 'error');
reject(error);
};
sttWebSocket.onclose = () => {
addLog('🔴 STT WebSocket 연결 종료', 'error');
};
});
}
// 마이크 시작
async function startMicrophone() {
addLog('🎤 마이크 접근 요청...', 'info');
micStream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
addLog('✅ 마이크 접근 허용', 'info');
// AudioContext 생성 (16kHz)
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
const source = audioContext.createMediaStreamSource(micStream);
const scriptNode = audioContext.createScriptProcessor(2048, 1, 1);
scriptNode.onaudioprocess = (audioProcessingEvent) => {
if (!isRecording) return;
const inputBuffer = audioProcessingEvent.inputBuffer;
const inputData = inputBuffer.getChannelData(0);
// Float32 -> Int16 PCM 변환
const pcmData = floatTo16BitPCM(inputData);
// Base64 인코딩
const base64Audio = btoa(
new Uint8Array(pcmData).reduce((data, byte) => data + String.fromCharCode(byte), '')
);
// WebSocket으로 전송
if (sttWebSocket && sttWebSocket.readyState === WebSocket.OPEN) {
sttWebSocket.send(JSON.stringify({
type: 'chunk',
meetingId: meetingId,
audioData: base64Audio,
timestamp: Date.now(),
chunkIndex: chunkIndex++,
format: 'audio/pcm',
sampleRate: 16000
}));
}
};
source.connect(scriptNode);
scriptNode.connect(audioContext.destination);
chunkIndex = 0;
isRecording = true;
addLog('✅ 녹음 시작 (PCM 16kHz)', 'info');
}
// AI SSE 연결
function connectAIEventSource() {
const sseUrl = `http://localhost:8086/api/v1/ai/suggestions/meetings/${meetingId}/stream`;
addLog('AI SSE 연결 시도...', 'info');
aiEventSource = new EventSource(sseUrl);
aiEventSource.addEventListener('ai-suggestion', (event) => {
try {
const data = JSON.parse(event.data);
displaySuggestions(data.suggestions);
addLog(`💡 AI 제안사항 수신: ${data.suggestions.length}개`, 'info');
} catch (error) {
addLog('❌ SSE 데이터 파싱 실패: ' + error.message, 'error');
}
});
aiEventSource.onopen = () => {
addLog('✅ AI SSE 연결 성공', 'info');
};
aiEventSource.onerror = (error) => {
addLog('❌ AI SSE 연결 오류', 'error');
};
}
// 제안사항 화면 표시
function displaySuggestions(suggestions) {
const suggestionsDiv = document.getElementById('suggestions');
// 첫 제안사항이면 empty state 제거
const emptyState = suggestionsDiv.querySelector('.empty-state');
if (emptyState) {
emptyState.remove();
}
suggestions.forEach(suggestion => {
const item = document.createElement('div');
item.className = 'suggestion-item';
const confidence = Math.round(suggestion.confidence * 100);
item.innerHTML = `
<div class="suggestion-header">
<span class="suggestion-time">${suggestion.timestamp}</span>
<span class="suggestion-confidence">${confidence}%</span>
</div>
<div class="suggestion-content">${suggestion.content}</div>
`;
suggestionsDiv.appendChild(item);
suggestionsDiv.scrollTop = suggestionsDiv.scrollHeight;
});
}
// 회의 종료
function stopSession() {
isRecording = false;
// 마이크 종료
if (audioContext) {
audioContext.close();
audioContext = null;
}
if (micStream) {
micStream.getTracks().forEach(track => track.stop());
micStream = null;
}
// STT WebSocket 종료
if (sttWebSocket) {
sttWebSocket.send(JSON.stringify({
type: 'stop',
meetingId: meetingId
}));
sttWebSocket.close();
sttWebSocket = null;
}
// AI SSE 종료
if (aiEventSource) {
aiEventSource.close();
aiEventSource = null;
}
document.getElementById('startBtn').disabled = false;
document.getElementById('stopBtn').disabled = true;
updateStatus('disconnected', '🔴 회의 종료');
addLog('✅ 회의 종료', 'info');
}
// 상태 업데이트
function updateStatus(statusClass, text) {
const statusDiv = document.getElementById('status');
statusDiv.className = 'status ' + statusClass;
statusDiv.textContent = text;
}
// 로그 추가
function addLog(message, type = 'info') {
const logDiv = document.getElementById('log');
const logItem = document.createElement('div');
logItem.className = 'log-item log-' + type;
const timestamp = new Date().toLocaleTimeString('ko-KR', {
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
});
logItem.textContent = `[${timestamp}] ${message}`;
logDiv.appendChild(logItem);
logDiv.scrollTop = logDiv.scrollHeight;
}
// 페이지 종료 시 정리
window.onbeforeunload = () => {
if (isRecording) {
stopSession();
}
};
</script>
</body>
</html>

View File

@ -0,0 +1,560 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>HGZero STT 실시간 테스트 (WAV)</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 900px;
margin: 50px auto;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: #333;
}
.container {
background: white;
border-radius: 15px;
padding: 30px;
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
}
h1 {
color: #667eea;
text-align: center;
margin-bottom: 10px;
}
.subtitle {
text-align: center;
color: #666;
margin-bottom: 30px;
}
.controls {
display: flex;
gap: 15px;
justify-content: center;
margin: 30px 0;
}
button {
padding: 15px 30px;
font-size: 16px;
border: none;
border-radius: 8px;
cursor: pointer;
transition: all 0.3s;
font-weight: bold;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
#startBtn {
background: #48bb78;
color: white;
}
#startBtn:hover:not(:disabled) {
background: #38a169;
transform: translateY(-2px);
}
#stopBtn {
background: #f56565;
color: white;
}
#stopBtn:hover:not(:disabled) {
background: #e53e3e;
transform: translateY(-2px);
}
.status {
text-align: center;
padding: 15px;
margin: 20px 0;
border-radius: 8px;
font-weight: bold;
}
.status.disconnected {
background: #fed7d7;
color: #c53030;
}
.status.connected {
background: #c6f6d5;
color: #276749;
}
.status.recording {
background: #feebc8;
color: #c05621;
}
.info-box {
background: #ebf8ff;
border-left: 4px solid #4299e1;
padding: 15px;
margin: 20px 0;
border-radius: 4px;
}
.info-box h3 {
margin-top: 0;
color: #2c5282;
}
#transcript {
background: #f7fafc;
border: 2px solid #e2e8f0;
border-radius: 8px;
padding: 20px;
min-height: 200px;
max-height: 400px;
overflow-y: auto;
margin-top: 20px;
font-family: 'Courier New', monospace;
}
.transcript-item {
padding: 10px;
margin: 5px 0;
background: white;
border-radius: 5px;
border-left: 3px solid #667eea;
}
#suggestions {
background: #fffaf0;
border: 2px solid #fbbf24;
border-radius: 8px;
padding: 20px;
min-height: 150px;
max-height: 300px;
overflow-y: auto;
margin-top: 20px;
}
.suggestion-item {
padding: 10px;
margin: 5px 0;
background: white;
border-radius: 5px;
border-left: 3px solid #f59e0b;
}
.suggestion-title {
font-weight: bold;
color: #d97706;
margin-bottom: 5px;
}
.timestamp {
color: #718096;
font-size: 0.85em;
margin-bottom: 5px;
}
.log {
background: #1a202c;
color: #48bb78;
padding: 15px;
border-radius: 8px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
max-height: 150px;
overflow-y: auto;
margin-top: 20px;
}
.log-item {
margin: 3px 0;
}
.log-error {
color: #fc8181;
}
.log-info {
color: #63b3ed;
}
</style>
</head>
<body>
<div class="container">
<h1>🎤 HGZero 실시간 STT 테스트 (WAV)</h1>
<p class="subtitle">WebSocket 기반 실시간 음성-텍스트 변환 (PCM WAV 16kHz)</p>
<div class="info-box">
<h3>📋 테스트 정보</h3>
<p><strong>WebSocket URL:</strong> <code>ws://localhost:8084/ws/audio</code></p>
<p><strong>Meeting ID:</strong> <code>test-meeting-001</code></p>
<p><strong>Audio Format:</strong> PCM WAV, 16kHz, Mono, 16-bit</p>
</div>
<div id="status" class="status disconnected">
🔴 연결 끊김
</div>
<div class="controls">
<button id="startBtn" onclick="startRecording()">
🎤 녹음 시작
</button>
<button id="stopBtn" onclick="stopRecording()" disabled>
⏹️ 녹음 중지
</button>
</div>
<div id="transcript">
<p style="color: #a0aec0; text-align: center;">여기에 실시간 STT 결과가 5초마다 표시됩니다...</p>
</div>
<h3 style="margin-top: 30px; color: #667eea;">💡 실시간 AI 제안사항</h3>
<div id="suggestions">
<p style="color: #a0aec0; text-align: center;">AI 제안사항이 여기에 표시됩니다...</p>
</div>
<div class="log" id="log">
<div class="log-item">시스템 로그...</div>
</div>
</div>
<script>
let ws = null;
let audioContext = null;
let processor = null;
let input = null;
let chunkIndex = 0;
let eventSource = null;
const meetingId = 'test-meeting-001';
const sampleRate = 16000;
const aiServiceUrl = 'http://localhost:8086';
// WebSocket 연결
function connectWebSocket() {
const wsUrl = 'ws://localhost:8084/ws/audio';
addLog('WebSocket 연결 시도: ' + wsUrl, 'info');
ws = new WebSocket(wsUrl);
ws.onopen = () => {
addLog('✅ WebSocket 연결 성공', 'info');
updateStatus('connected', '🟢 연결됨');
document.getElementById('startBtn').disabled = false;
};
ws.onmessage = (event) => {
addLog('📩 서버 응답: ' + event.data, 'info');
try {
const data = JSON.parse(event.data);
if (data.status === 'started') {
updateStatus('recording', '🔴 녹음 중... (5초마다 STT 결과 표시)');
} else if (data.status === 'stopped') {
updateStatus('connected', '🟢 연결됨 (녹음 종료)');
} else if (data.transcript) {
displayTranscript(data);
}
} catch (e) {
addLog('서버 응답 파싱 실패: ' + e.message, 'error');
}
};
ws.onerror = (error) => {
addLog('❌ WebSocket 오류', 'error');
};
ws.onclose = () => {
addLog('🔴 WebSocket 연결 종료', 'error');
updateStatus('disconnected', '🔴 연결 끊김');
document.getElementById('startBtn').disabled = true;
document.getElementById('stopBtn').disabled = true;
};
}
// WAV 헤더 생성
function createWavHeader(dataLength, sampleRate, numChannels, bitsPerSample) {
const buffer = new ArrayBuffer(44);
const view = new DataView(buffer);
// RIFF identifier
writeString(view, 0, 'RIFF');
// file length
view.setUint32(4, 36 + dataLength, true);
// RIFF type
writeString(view, 8, 'WAVE');
// format chunk identifier
writeString(view, 12, 'fmt ');
// format chunk length
view.setUint32(16, 16, true);
// sample format (PCM)
view.setUint16(20, 1, true);
// channel count
view.setUint16(22, numChannels, true);
// sample rate
view.setUint32(24, sampleRate, true);
// byte rate
view.setUint32(28, sampleRate * numChannels * bitsPerSample / 8, true);
// block align
view.setUint16(32, numChannels * bitsPerSample / 8, true);
// bits per sample
view.setUint16(34, bitsPerSample, true);
// data chunk identifier
writeString(view, 36, 'data');
// data chunk length
view.setUint32(40, dataLength, true);
return buffer;
}
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
// Float32 to Int16 변환
function floatTo16BitPCM(float32Array) {
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
return int16Array;
}
// 녹음 시작
async function startRecording() {
try {
addLog('🎤 마이크 접근 요청...', 'info');
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: sampleRate,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
addLog('✅ 마이크 접근 허용', 'info');
// Audio Context 생성
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: sampleRate
});
input = audioContext.createMediaStreamSource(stream);
processor = audioContext.createScriptProcessor(4096, 1, 1);
processor.onaudioprocess = (e) => {
const inputData = e.inputBuffer.getChannelData(0);
// Float32 → Int16 PCM 변환
const pcmData = floatTo16BitPCM(inputData);
// WAV 헤더 + PCM 데이터
const wavHeader = createWavHeader(pcmData.length * 2, sampleRate, 1, 16);
const wavData = new Uint8Array(wavHeader.byteLength + pcmData.length * 2);
wavData.set(new Uint8Array(wavHeader), 0);
wavData.set(new Uint8Array(pcmData.buffer), wavHeader.byteLength);
// Base64로 인코딩하여 전송
const base64Audio = btoa(String.fromCharCode.apply(null, wavData));
const message = JSON.stringify({
type: 'chunk',
meetingId: meetingId,
audioData: base64Audio,
timestamp: Date.now(),
chunkIndex: chunkIndex++,
format: 'audio/wav',
sampleRate: sampleRate
});
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(message);
if (chunkIndex % 10 === 0) { // 10초마다 로그
addLog(`📤 청크 전송 중... #${chunkIndex} (${wavData.length} bytes)`, 'info');
}
}
};
input.connect(processor);
processor.connect(audioContext.destination);
// 녹음 시작 메시지 전송
ws.send(JSON.stringify({
type: 'start',
meetingId: meetingId
}));
document.getElementById('startBtn').disabled = true;
document.getElementById('stopBtn').disabled = false;
addLog('✅ 녹음 시작 (WAV PCM 16kHz)', 'info');
} catch (error) {
addLog('❌ 마이크 접근 실패: ' + error.message, 'error');
alert('마이크 접근이 거부되었습니다. 브라우저 설정을 확인해주세요.');
}
}
// 녹음 중지
function stopRecording() {
if (processor) {
processor.disconnect();
processor = null;
}
if (input) {
input.disconnect();
input = null;
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
// 녹음 종료 메시지 전송
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({
type: 'stop',
meetingId: meetingId
}));
}
document.getElementById('startBtn').disabled = false;
document.getElementById('stopBtn').disabled = true;
addLog('✅ 녹음 종료 명령 전송', 'info');
addLog('🛑 녹음 중지', 'info');
}
// STT 결과 표시
function displayTranscript(data) {
const transcriptDiv = document.getElementById('transcript');
// 초기 메시지 제거
if (transcriptDiv.querySelector('p')) {
transcriptDiv.innerHTML = '';
}
const item = document.createElement('div');
item.className = 'transcript-item';
const timestamp = new Date(data.timestamp || Date.now()).toLocaleTimeString('ko-KR');
item.innerHTML = `
<div class="timestamp">${timestamp} - 화자: ${data.speaker || '알 수 없음'}</div>
<div>${data.transcript || data.text || '(텍스트 없음)'}</div>
`;
transcriptDiv.appendChild(item);
transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
addLog('📝 STT 결과 수신', 'info');
}
// 상태 업데이트
function updateStatus(statusClass, text) {
const statusDiv = document.getElementById('status');
statusDiv.className = 'status ' + statusClass;
statusDiv.textContent = text;
}
// 로그 추가
function addLog(message, type = 'info') {
const logDiv = document.getElementById('log');
const logItem = document.createElement('div');
logItem.className = 'log-item log-' + type;
const timestamp = new Date().toLocaleTimeString('ko-KR', {
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
});
logItem.textContent = `[${timestamp}] ${message}`;
logDiv.appendChild(logItem);
logDiv.scrollTop = logDiv.scrollHeight;
// 로그 개수 제한 (최대 50개)
while (logDiv.children.length > 50) {
logDiv.removeChild(logDiv.firstChild);
}
}
// AI 제안사항 SSE 연결
function connectAISuggestions() {
const sseUrl = `${aiServiceUrl}/api/v1/ai/suggestions/meetings/${meetingId}/stream`;
addLog('AI 제안사항 SSE 연결 시도: ' + sseUrl, 'info');
eventSource = new EventSource(sseUrl);
eventSource.addEventListener('ai-suggestion', (event) => {
try {
const data = JSON.parse(event.data);
displaySuggestions(data);
addLog('✅ AI 제안사항 수신', 'info');
} catch (e) {
addLog('AI 제안 파싱 실패: ' + e.message, 'error');
}
});
eventSource.onopen = () => {
addLog('✅ AI 제안사항 SSE 연결 성공', 'info');
};
eventSource.onerror = (error) => {
const state = eventSource.readyState;
let stateText = '';
switch(state) {
case EventSource.CONNECTING: stateText = 'CONNECTING'; break;
case EventSource.OPEN: stateText = 'OPEN'; break;
case EventSource.CLOSED: stateText = 'CLOSED'; break;
default: stateText = 'UNKNOWN';
}
addLog(`❌ AI 제안사항 SSE 오류 (State: ${stateText})`, 'error');
// 연결이 닫혔을 때만 재연결 시도
if (state === EventSource.CLOSED) {
eventSource.close();
setTimeout(() => {
addLog('AI SSE 재연결 시도...', 'info');
connectAISuggestions();
}, 5000);
}
};
}
// AI 제안사항 표시
function displaySuggestions(data) {
const suggestionsDiv = document.getElementById('suggestions');
// 초기 메시지 제거
if (suggestionsDiv.querySelector('p')) {
suggestionsDiv.innerHTML = '';
}
// 제안사항 표시
if (data.suggestions && data.suggestions.length > 0) {
data.suggestions.forEach(suggestion => {
const item = document.createElement('div');
item.className = 'suggestion-item';
const timestamp = new Date().toLocaleTimeString('ko-KR');
item.innerHTML = `
<div class="timestamp">${timestamp}</div>
<div class="suggestion-title">💡 ${suggestion.title || '제안사항'}</div>
<div>${suggestion.content || suggestion.description || suggestion}</div>
`;
suggestionsDiv.appendChild(item);
});
suggestionsDiv.scrollTop = suggestionsDiv.scrollHeight;
}
}
// 페이지 로드 시 WebSocket 및 SSE 연결
window.onload = () => {
addLog('🚀 HGZero STT 테스트 페이지 로드 (WAV 버전)', 'info');
connectWebSocket();
connectAISuggestions();
};
// 페이지 종료 시 정리
window.onbeforeunload = () => {
stopRecording();
if (ws) {
ws.close();
}
if (eventSource) {
eventSource.close();
}
};
</script>
</body>
</html>

405
test-audio/stt-test.html Normal file
View File

@ -0,0 +1,405 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>HGZero STT 실시간 테스트</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 900px;
margin: 50px auto;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: #333;
}
.container {
background: white;
border-radius: 15px;
padding: 30px;
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
}
h1 {
color: #667eea;
text-align: center;
margin-bottom: 10px;
}
.subtitle {
text-align: center;
color: #666;
margin-bottom: 30px;
}
.controls {
display: flex;
gap: 15px;
justify-content: center;
margin: 30px 0;
}
button {
padding: 15px 30px;
font-size: 16px;
border: none;
border-radius: 8px;
cursor: pointer;
transition: all 0.3s;
font-weight: bold;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
#startBtn {
background: #48bb78;
color: white;
}
#startBtn:hover:not(:disabled) {
background: #38a169;
transform: translateY(-2px);
}
#stopBtn {
background: #f56565;
color: white;
}
#stopBtn:hover:not(:disabled) {
background: #e53e3e;
transform: translateY(-2px);
}
.status {
text-align: center;
padding: 15px;
margin: 20px 0;
border-radius: 8px;
font-weight: bold;
}
.status.disconnected {
background: #fed7d7;
color: #c53030;
}
.status.connected {
background: #c6f6d5;
color: #276749;
}
.status.recording {
background: #feebc8;
color: #c05621;
}
.info-box {
background: #ebf8ff;
border-left: 4px solid #4299e1;
padding: 15px;
margin: 20px 0;
border-radius: 4px;
}
.info-box h3 {
margin-top: 0;
color: #2c5282;
}
#transcript {
background: #f7fafc;
border: 2px solid #e2e8f0;
border-radius: 8px;
padding: 20px;
min-height: 200px;
max-height: 400px;
overflow-y: auto;
margin-top: 20px;
font-family: 'Courier New', monospace;
}
.transcript-item {
padding: 10px;
margin: 5px 0;
background: white;
border-radius: 5px;
border-left: 3px solid #667eea;
}
.timestamp {
color: #718096;
font-size: 0.85em;
margin-bottom: 5px;
}
.log {
background: #1a202c;
color: #48bb78;
padding: 15px;
border-radius: 8px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
max-height: 150px;
overflow-y: auto;
margin-top: 20px;
}
.log-item {
margin: 3px 0;
}
.log-error {
color: #fc8181;
}
.log-info {
color: #63b3ed;
}
</style>
</head>
<body>
<div class="container">
<h1>🎤 HGZero 실시간 STT 테스트</h1>
<p class="subtitle">WebSocket 기반 실시간 음성-텍스트 변환</p>
<div class="info-box">
<h3>📋 테스트 정보</h3>
<p><strong>WebSocket URL:</strong> <code id="wsUrl">ws://localhost:8084/ws/audio</code></p>
<p><strong>Meeting ID:</strong> <code id="meetingId">test-meeting-001</code></p>
<p><strong>Sample Rate:</strong> 16000 Hz</p>
</div>
<div id="status" class="status disconnected">
🔴 연결 끊김
</div>
<div class="controls">
<button id="startBtn" onclick="startRecording()">
🎤 녹음 시작
</button>
<button id="stopBtn" onclick="stopRecording()" disabled>
⏹️ 녹음 중지
</button>
</div>
<div id="transcript">
<p style="color: #a0aec0; text-align: center;">여기에 실시간 STT 결과가 표시됩니다...</p>
</div>
<div class="log" id="log">
<div class="log-item">시스템 로그...</div>
</div>
</div>
<script>
let ws = null;
let audioContext = null;
let audioWorkletNode = null;
let micStream = null;
let chunkIndex = 0;
let isRecording = false;
const meetingId = 'test-meeting-001';
// WebSocket 연결
function connectWebSocket() {
const wsUrl = 'ws://localhost:8084/ws/audio';
addLog('WebSocket 연결 시도: ' + wsUrl, 'info');
ws = new WebSocket(wsUrl);
ws.onopen = () => {
addLog('✅ WebSocket 연결 성공', 'info');
updateStatus('connected', '🟢 연결됨');
document.getElementById('startBtn').disabled = false;
};
ws.onmessage = (event) => {
addLog('📩 서버 응답: ' + event.data, 'info');
const data = JSON.parse(event.data);
if (data.status === 'started') {
updateStatus('recording', '🔴 녹음 중...');
} else if (data.status === 'stopped') {
updateStatus('connected', '🟢 연결됨 (녹음 종료)');
} else if (data.transcript) {
displayTranscript(data);
}
};
ws.onerror = (error) => {
addLog('❌ WebSocket 오류: ' + error, 'error');
};
ws.onclose = () => {
addLog('🔴 WebSocket 연결 종료', 'error');
updateStatus('disconnected', '🔴 연결 끊김');
document.getElementById('startBtn').disabled = true;
document.getElementById('stopBtn').disabled = true;
};
}
// PCM 데이터를 16bit로 변환
function floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
return buffer;
}
// 녹음 시작
async function startRecording() {
try {
addLog('🎤 마이크 접근 요청...', 'info');
micStream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
addLog('✅ 마이크 접근 허용', 'info');
// AudioContext 생성 (16kHz)
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
const source = audioContext.createMediaStreamSource(micStream);
// ScriptProcessorNode로 실시간 PCM 추출 (2048 샘플 = 약 128ms)
const scriptNode = audioContext.createScriptProcessor(2048, 1, 1);
scriptNode.onaudioprocess = (audioProcessingEvent) => {
if (!isRecording) return;
const inputBuffer = audioProcessingEvent.inputBuffer;
const inputData = inputBuffer.getChannelData(0);
// Float32 -> Int16 PCM 변환
const pcmData = floatTo16BitPCM(inputData);
// Base64 인코딩
const base64Audio = btoa(
new Uint8Array(pcmData).reduce((data, byte) => data + String.fromCharCode(byte), '')
);
// WebSocket으로 전송
if (ws.readyState === WebSocket.OPEN) {
const message = JSON.stringify({
type: 'chunk',
meetingId: meetingId,
audioData: base64Audio,
timestamp: Date.now(),
chunkIndex: chunkIndex++,
format: 'audio/pcm',
sampleRate: 16000
});
ws.send(message);
if (chunkIndex % 10 === 0) {
addLog(`📤 청크 전송 #${chunkIndex} (${pcmData.byteLength} bytes)`, 'info');
}
}
};
source.connect(scriptNode);
scriptNode.connect(audioContext.destination);
chunkIndex = 0;
isRecording = true;
// 녹음 시작 메시지 전송
ws.send(JSON.stringify({
type: 'start',
meetingId: meetingId
}));
document.getElementById('startBtn').disabled = true;
document.getElementById('stopBtn').disabled = false;
addLog('✅ 녹음 시작 (PCM 16kHz, 16bit, Mono)', 'info');
} catch (error) {
addLog('❌ 마이크 접근 실패: ' + error.message, 'error');
alert('마이크 접근이 거부되었습니다. 브라우저 설정을 확인해주세요.');
}
}
// 녹음 중지
function stopRecording() {
isRecording = false;
if (audioContext) {
audioContext.close();
audioContext = null;
}
if (micStream) {
micStream.getTracks().forEach(track => track.stop());
micStream = null;
}
// 녹음 종료 메시지 전송
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({
type: 'stop',
meetingId: meetingId
}));
}
document.getElementById('startBtn').disabled = false;
document.getElementById('stopBtn').disabled = true;
addLog('✅ 녹음 종료 명령 전송', 'info');
}
// STT 결과 표시
function displayTranscript(data) {
const transcriptDiv = document.getElementById('transcript');
const item = document.createElement('div');
item.className = 'transcript-item';
const timestamp = new Date(data.timestamp).toLocaleTimeString('ko-KR');
item.innerHTML = `
<div class="timestamp">${timestamp} - 화자: ${data.speaker || '알 수 없음'}</div>
<div>${data.transcript}</div>
`;
transcriptDiv.appendChild(item);
transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
}
// 상태 업데이트
function updateStatus(statusClass, text) {
const statusDiv = document.getElementById('status');
statusDiv.className = 'status ' + statusClass;
statusDiv.textContent = text;
}
// 로그 추가
function addLog(message, type = 'info') {
const logDiv = document.getElementById('log');
const logItem = document.createElement('div');
logItem.className = 'log-item log-' + type;
const timestamp = new Date().toLocaleTimeString('ko-KR', {
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
});
logItem.textContent = `[${timestamp}] ${message}`;
logDiv.appendChild(logItem);
logDiv.scrollTop = logDiv.scrollHeight;
}
// 페이지 로드 시 WebSocket 연결
window.onload = () => {
addLog('🚀 HGZero STT 테스트 페이지 로드', 'info');
connectWebSocket();
};
// 페이지 종료 시 정리
window.onbeforeunload = () => {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
stopRecording();
}
if (ws) {
ws.close();
}
};
</script>
</body>
</html>