From cb09ec4fb43c55d9e116c124138d920e411adcd0 Mon Sep 17 00:00:00 2001 From: kanguk Date: Sat, 19 Oct 2024 02:44:20 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20=EC=8B=A4=EC=8B=9C=EA=B0=84=20=EC=8A=A4?= =?UTF-8?q?=ED=8A=B8=EB=A6=AC=EB=B0=8D=20API=EB=A5=BC=20=ED=99=9C=EC=9A=A9?= =?UTF-8?q?=ED=95=98=EC=97=AC=20STT=20=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 기존 CLOVA Speech Recognition 에서 CLOVA Speech 실시간 스트리밍 API를 활용하여 실시간 처리를 가능하도록 수정한다. --- build.gradle | 39 ++++++ .../splanet/config/WebSocketConfig.java | 10 -- .../core/handler/SpeechWebSocketHandler.java | 124 +++++++++++------- .../core/properties/ClovaProperties.java | 4 +- .../SpeechRecognitionController.java | 29 ---- .../stt/service/ClovaSpeechGrpcService.java | 89 +++++++++++++ .../stt/service/ClovaSpeechService.java | 6 +- .../stt/service/ClovaSpeechServiceImpl.java | 54 -------- src/main/proto/nest.proto | 34 +++++ src/main/resources/application.yml | 7 +- 10 files changed, 243 insertions(+), 153 deletions(-) delete mode 100644 src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java create mode 100644 src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java delete mode 100644 src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java create mode 100644 src/main/proto/nest.proto diff --git a/build.gradle b/build.gradle index 2c56e54f..c00aa0c1 100644 --- a/build.gradle +++ b/build.gradle @@ -2,6 +2,8 @@ plugins { id 'java' id 'org.springframework.boot' version '3.3.3' id 'io.spring.dependency-management' version '1.1.6' + id 'com.google.protobuf' version '0.9.4' + } group = 'com.splanet' @@ -39,6 +41,17 @@ dependencies { implementation 'jakarta.validation:jakarta.validation-api:3.0.2' implementation 'org.apache.httpcomponents.client5:httpclient5:5.2.1' implementation 'org.springframework.boot:spring-boot-starter-websocket' + // gRPC 및 Protocol Buffers 의존성 + implementation 'io.grpc:grpc-netty-shaded:1.56.1' + implementation 'io.grpc:grpc-protobuf:1.56.1' + implementation 'io.grpc:grpc-stub:1.56.1' + implementation 'com.google.protobuf:protobuf-java:3.23.4' + + // gRPC 관련 필요한 의존성 + implementation 'javax.annotation:javax.annotation-api:1.3.2' + implementation 'com.google.code.gson:gson:2.8.9' + + compileOnly 'org.projectlombok:lombok' runtimeOnly 'com.mysql:mysql-connector-j' annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor' @@ -52,3 +65,29 @@ dependencies { tasks.named('test') { useJUnitPlatform() } + +protobuf { + protoc { + artifact = 'com.google.protobuf:protoc:3.23.4' + } + plugins { + grpc { + artifact = 'io.grpc:protoc-gen-grpc-java:1.66.0' + } + } + generateProtoTasks { + all().forEach { task -> + task.plugins { + grpc {} + } + } + } +} + +sourceSets { + main { + java { + srcDirs 'build/generated/source/proto/main/java', 'build/generated/source/proto/main/grpc' + } + } +} \ No newline at end of file diff --git a/src/main/java/com/splanet/splanet/config/WebSocketConfig.java b/src/main/java/com/splanet/splanet/config/WebSocketConfig.java index d5688e67..db192738 100644 --- a/src/main/java/com/splanet/splanet/config/WebSocketConfig.java +++ b/src/main/java/com/splanet/splanet/config/WebSocketConfig.java @@ -1,12 +1,10 @@ package com.splanet.splanet.config; import com.splanet.splanet.core.handler.SpeechWebSocketHandler; -import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.web.socket.config.annotation.EnableWebSocket; import org.springframework.web.socket.config.annotation.WebSocketConfigurer; import org.springframework.web.socket.config.annotation.WebSocketHandlerRegistry; -import org.springframework.web.socket.server.standard.ServletServerContainerFactoryBean; @Configuration @EnableWebSocket @@ -23,12 +21,4 @@ public void registerWebSocketHandlers(WebSocketHandlerRegistry registry) { registry.addHandler(speechWebSocketHandler, "/ws/stt") .setAllowedOrigins("*"); } - - @Bean - public ServletServerContainerFactoryBean configureWebSocketContainer() { - ServletServerContainerFactoryBean factory = new ServletServerContainerFactoryBean(); - factory.setMaxBinaryMessageBufferSize(256 * 1024); //바이너리 버퍼 크기 지정 16KB - factory.setMaxTextMessageBufferSize(256 * 1024); //텍스트 버퍼 크기 지정 16KB - return factory; - } } \ No newline at end of file diff --git a/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java b/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java index f5d5ab05..f1710902 100644 --- a/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java +++ b/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java @@ -1,78 +1,100 @@ package com.splanet.splanet.core.handler; -import com.splanet.splanet.stt.service.ClovaSpeechService; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.protobuf.ByteString; +import com.nbp.cdncp.nest.grpc.proto.v1.NestResponse; +import com.splanet.splanet.stt.service.ClovaSpeechGrpcService; +import io.grpc.stub.StreamObserver; +import org.springframework.stereotype.Component; import org.springframework.web.socket.BinaryMessage; +import org.springframework.web.socket.CloseStatus; import org.springframework.web.socket.TextMessage; import org.springframework.web.socket.WebSocketSession; import org.springframework.web.socket.handler.BinaryWebSocketHandler; -import org.springframework.stereotype.Component; -import java.util.ArrayList; -import java.util.List; +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.Map; @Component public class SpeechWebSocketHandler extends BinaryWebSocketHandler { - private final ClovaSpeechService clovaSpeechService; - private List audioDataBuffer = new ArrayList<>(); - private static final int MINIMUM_AUDIO_SIZE = 64000; // 최소 데이터 크기를 96KB로 설정 (약 3초 분량) + private final ClovaSpeechGrpcService clovaSpeechGrpcService; + private final Map> clientObservers = new ConcurrentHashMap<>(); - public SpeechWebSocketHandler(ClovaSpeechService clovaSpeechService) { - this.clovaSpeechService = clovaSpeechService; + public SpeechWebSocketHandler(ClovaSpeechGrpcService clovaSpeechGrpcService) { + this.clovaSpeechGrpcService = clovaSpeechGrpcService; } @Override - protected synchronized void handleBinaryMessage(WebSocketSession session, BinaryMessage message) throws Exception { - session.setBinaryMessageSizeLimit(256 * 1024); // 메시지 크기 제한을 256KB로 설정 - byte[] audioData = message.getPayload().array(); + public void afterConnectionEstablished(WebSocketSession session) throws Exception { + // 세션이 열릴 때마다 새로운 gRPC 스트림을 생성 + StreamObserver responseObserver = new StreamObserver() { + @Override + public void onNext(NestResponse value) { + // 서버로부터 받은 응답 처리 + try { + String contents = value.getContents(); // JSON 문자열 - // 오디오 데이터를 버퍼에 추가 - audioDataBuffer.add(audioData); + // JSON 파싱 + JsonParser parser = new JsonParser(); + JsonObject jsonObject = parser.parse(contents).getAsJsonObject(); - // 누적된 오디오 데이터 크기 계산 - int totalSize = audioDataBuffer.stream().mapToInt(arr -> arr.length).sum(); + if (jsonObject.has("transcription")) { + JsonObject transcription = jsonObject.getAsJsonObject("transcription"); + String text = transcription.get("text").getAsString(); + // 클라이언트로 text 필드만 전송 + session.sendMessage(new TextMessage(text)); + } + } catch (Exception e) { + e.printStackTrace(); + } + } - // 현재 누적된 데이터 크기를 로그로 출력 - System.out.println("현재 누적된 데이터 크기: " + totalSize + " bytes"); + @Override + public void onError(Throwable t) { + t.printStackTrace(); + try { + session.sendMessage(new TextMessage("오류 발생: " + t.getMessage())); + } catch (IOException e) { + e.printStackTrace(); + } + } - // 오디오 데이터가 충분히 쌓였을 때만 CLOVA API로 전송 - if (totalSize >= MINIMUM_AUDIO_SIZE) { - byte[] fullAudioData = mergeAudioData(); - try { - // CLOVA API로 전송 - String transcript = clovaSpeechService.recognize(fullAudioData); - session.sendMessage(new TextMessage(transcript)); - // 인식에 성공했으므로 버퍼를 초기화 - audioDataBuffer.clear(); - System.out.println("인식 성공: 버퍼를 초기화합니다."); - } catch (Exception e) { - e.printStackTrace(); - // STT007 오류 발생 시 버퍼를 유지하고 데이터 수집 계속 - if (e.getMessage().contains("STT007")) { - System.err.println("오류 발생: STT007 - 데이터가 너무 작습니다. 더 많은 데이터를 수집 중..."); - // 버퍼를 유지하여 다음 데이터를 기다립니다. - } else { - // 다른 오류 발생 시 버퍼를 초기화하고 오류 메시지 전송 - audioDataBuffer.clear(); - session.sendMessage(new TextMessage("오류 발생: " + e.getMessage())); - System.err.println("오류 발생: " + e.getMessage() + " - 버퍼를 초기화합니다."); + @Override + public void onCompleted() { + // 스트림 완료 처리 + try { + session.close(); + } catch (IOException e) { + e.printStackTrace(); } } - } else { - // 아직 데이터가 충분하지 않으면 아무 작업도 하지 않음 - System.out.println("데이터가 아직 충분하지 않음"); + }; + + // 오디오 데이터를 전송할 StreamObserver 생성 + StreamObserver requestObserver = clovaSpeechGrpcService.recognize(responseObserver); + clientObservers.put(session.getId(), requestObserver); + } + + @Override + protected void handleBinaryMessage(WebSocketSession session, BinaryMessage message) throws Exception { + // 클라이언트로부터 받은 오디오 데이터를 gRPC 서비스로 전달 + StreamObserver requestObserver = clientObservers.get(session.getId()); + if (requestObserver != null) { + byte[] audioData = message.getPayload().array(); + ByteString audioChunk = ByteString.copyFrom(audioData); + requestObserver.onNext(audioChunk); } } - // 누적된 오디오 데이터를 병합하는 메서드 - private byte[] mergeAudioData() { - int totalLength = audioDataBuffer.stream().mapToInt(arr -> arr.length).sum(); - byte[] mergedData = new byte[totalLength]; - int currentIndex = 0; - for (byte[] data : audioDataBuffer) { - System.arraycopy(data, 0, mergedData, currentIndex, data.length); - currentIndex += data.length; + @Override + public void afterConnectionClosed(WebSocketSession session, CloseStatus status) throws Exception { + // 세션이 종료되면 gRPC 스트림도 종료 + StreamObserver requestObserver = clientObservers.remove(session.getId()); + if (requestObserver != null) { + requestObserver.onCompleted(); } - return mergedData; } } diff --git a/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java b/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java index e4421cef..d223958d 100644 --- a/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java +++ b/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java @@ -8,10 +8,8 @@ @Getter @Setter @Configuration -@ConfigurationProperties(prefix = "clova") +@ConfigurationProperties(prefix = "clova.speech") public class ClovaProperties { - private String clientId; private String clientSecret; - private String url; private String language; } diff --git a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java b/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java deleted file mode 100644 index 0c7d406a..00000000 --- a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java +++ /dev/null @@ -1,29 +0,0 @@ -//package com.splanet.splanet.stt.controller; -// -//import com.splanet.splanet.stt.service.ClovaSpeechService; -//import org.springframework.stereotype.Controller; -//import org.springframework.web.bind.annotation.PostMapping; -//import org.springframework.web.bind.annotation.RequestMapping; -//import org.springframework.web.bind.annotation.RequestParam; -//import org.springframework.web.multipart.MultipartFile; -//import org.springframework.web.bind.annotation.RestController; -// -//@Controller -//@RequestMapping("/api/stt") -//public class SpeechRecognitionController { -// -// private final ClovaSpeechService clovaSpeechService; -// -// public SpeechRecognitionController(ClovaSpeechService clovaSpeechService) { -// this.clovaSpeechService = clovaSpeechService; -// } -// -// @PostMapping(consumes = "multipart/form-data") -// public String recognizeSpeech(@RequestParam("file") MultipartFile file) { -// try { -// return clovaSpeechService.recognize(file.getBytes()); -// } catch (Exception e) { -// throw new RuntimeException("파일 처리 중 오류 발생: " + e.getMessage()); -// } -// } -//} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java new file mode 100644 index 00000000..47cd0257 --- /dev/null +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java @@ -0,0 +1,89 @@ +package com.splanet.splanet.stt.service; + +import com.google.protobuf.ByteString; +import com.nbp.cdncp.nest.grpc.proto.v1.*; +import com.splanet.splanet.core.properties.ClovaProperties; +import io.grpc.ManagedChannel; +import io.grpc.Metadata; +import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder; +import io.grpc.stub.MetadataUtils; +import io.grpc.stub.StreamObserver; +import org.springframework.stereotype.Service; + +@Service +public class ClovaSpeechGrpcService implements ClovaSpeechService { + + private final NestServiceGrpc.NestServiceStub nestServiceStub; + private final ClovaProperties clovaProperties; + + public ClovaSpeechGrpcService(ClovaProperties clovaProperties) { + this.clovaProperties = clovaProperties; + + // gRPC 채널 생성 + ManagedChannel channel = NettyChannelBuilder + .forAddress("clovaspeech-gw.ncloud.com", 50051) + .useTransportSecurity() + .build(); + + // Stub 생성 및 인증 정보 설정 + NestServiceGrpc.NestServiceStub stub = NestServiceGrpc.newStub(channel); + Metadata metadata = new Metadata(); + metadata.put(Metadata.Key.of("authorization", Metadata.ASCII_STRING_MARSHALLER), "Bearer " + clovaProperties.getClientSecret()); + this.nestServiceStub = MetadataUtils.attachHeaders(stub, metadata); + } + + @Override + public StreamObserver recognize(StreamObserver responseObserver) { + StreamObserver requestObserver = nestServiceStub.recognize(responseObserver); + + // Config 메시지 전송 + requestObserver.onNext(createConfigRequest(clovaProperties.getLanguage())); + + return new StreamObserver() { + private int sequenceId = 0; + + @Override + public void onNext(ByteString audioChunk) { + NestRequest dataRequest = createDataRequest(audioChunk, sequenceId, false); + requestObserver.onNext(dataRequest); + sequenceId++; + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + requestObserver.onError(t); + } + + @Override + public void onCompleted() { + requestObserver.onCompleted(); + } + }; + } + + // Config 설정 + private NestRequest createConfigRequest(String language) { + NestConfig config = NestConfig.newBuilder() + .setConfig("{\"transcription\":{\"language\":\"" + language + "\"}}") + .build(); + + return NestRequest.newBuilder() + .setType(RequestType.CONFIG) + .setConfig(config) + .build(); + } + + // 데이터 구성 + private NestRequest createDataRequest(ByteString audioChunk, int sequenceId, boolean epFlag) { + NestData data = NestData.newBuilder() + .setChunk(audioChunk) + .setExtraContents("{\"seqId\":" + sequenceId + ",\"epFlag\":" + epFlag + "}") + .build(); + + return NestRequest.newBuilder() + .setType(RequestType.DATA) + .setData(data) + .build(); + } +} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java index af1551d6..ddfb9f04 100644 --- a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java @@ -1,7 +1,9 @@ package com.splanet.splanet.stt.service; -import org.springframework.web.multipart.MultipartFile; +import com.google.protobuf.ByteString; +import com.nbp.cdncp.nest.grpc.proto.v1.NestResponse; +import io.grpc.stub.StreamObserver; public interface ClovaSpeechService { - String recognize(byte[] audioBytes); + StreamObserver recognize(StreamObserver responseObserver); } diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java deleted file mode 100644 index f08bf8fe..00000000 --- a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.splanet.splanet.stt.service; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.splanet.splanet.core.exception.BusinessException; -import com.splanet.splanet.core.exception.ErrorCode; -import com.splanet.splanet.core.properties.ClovaProperties; -import org.apache.hc.client5.http.classic.methods.HttpPost; -import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; -import org.apache.hc.client5.http.impl.classic.HttpClients; -import org.apache.hc.core5.http.ContentType; -import org.apache.hc.core5.http.io.entity.ByteArrayEntity; -import org.apache.hc.core5.http.io.entity.EntityUtils; -import org.springframework.stereotype.Service; -import org.springframework.web.multipart.MultipartFile; - -@Service -public class ClovaSpeechServiceImpl implements ClovaSpeechService { - - private final ClovaProperties clovaProperties; - - public ClovaSpeechServiceImpl(ClovaProperties clovaProperties) { - this.clovaProperties = clovaProperties; - } - - @Override - public String recognize(byte[] audioBytes) { - String apiURL = clovaProperties.getUrl() + "?lang=" + clovaProperties.getLanguage(); - try (CloseableHttpClient httpClient = HttpClients.createDefault()) { - HttpPost httpPost = new HttpPost(apiURL); - httpPost.addHeader("Content-Type", "application/octet-stream"); - httpPost.addHeader("X-NCP-APIGW-API-KEY-ID", clovaProperties.getClientId()); - httpPost.addHeader("X-NCP-APIGW-API-KEY", clovaProperties.getClientSecret()); - - ByteArrayEntity byteArrayEntity = new ByteArrayEntity(audioBytes, ContentType.APPLICATION_OCTET_STREAM); - httpPost.setEntity(byteArrayEntity); - - return httpClient.execute(httpPost, response -> { - int statusCode = response.getCode(); - String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); - - if (statusCode == 200) { - ObjectMapper objectMapper = new ObjectMapper(); - JsonNode rootNode = objectMapper.readTree(responseBody); - return rootNode.path("text").asText(); - } else { - throw new RuntimeException("CLOVA Speech API 호출 실패: " + responseBody); - } - }); - } catch (Exception e) { - throw new RuntimeException("오디오 파일 처리 중 오류 발생: " + e.getMessage()); - } - } -} \ No newline at end of file diff --git a/src/main/proto/nest.proto b/src/main/proto/nest.proto new file mode 100644 index 00000000..77e3efad --- /dev/null +++ b/src/main/proto/nest.proto @@ -0,0 +1,34 @@ +syntax = "proto3"; +option java_multiple_files = true; + +package com.nbp.cdncp.nest.grpc.proto.v1; + +enum RequestType { + CONFIG = 0; + DATA = 1; +} + +message NestConfig { + string config = 1; +} + +message NestData { + bytes chunk = 1; + string extra_contents = 2; +} + +message NestRequest { + RequestType type = 1; + oneof part { + NestConfig config = 2; + NestData data = 3; + } +} + +message NestResponse { + string contents = 1; +} + +service NestService { + rpc recognize(stream NestRequest) returns (stream NestResponse) {} +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 32782318..97df6275 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -48,7 +48,6 @@ springdoc: path: /swagger clova: - client-id: ${CLOVA_CLIENT_ID} - client-secret: ${CLOVA_CLIENT_SECRET} - url: ${CLOVA_SPEECH_URL} - language: Kor \ No newline at end of file + speech: + client-secret: ${CLOVA_CLIENT_SECRET} + language: ko