From fd2b52b5d56e11a40b860f6d5497591819def5e0 Mon Sep 17 00:00:00 2001 From: kanguk Date: Fri, 18 Oct 2024 19:03:10 +0900 Subject: [PATCH 1/3] =?UTF-8?q?feat:=20STT=20=EC=B4=88=EA=B8=B0=20?= =?UTF-8?q?=EA=B8=B0=EB=8A=A5=20=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 파일 업로드 -> STT 실행의 기본 기능을 구현한다. --- build.gradle | 2 +- .../core/properties/ClovaProperties.java | 17 ++++++ .../SpeechRecognitionController.java | 24 ++++++++ .../stt/service/ClovaSpeechService.java | 7 +++ .../stt/service/ClovaSpeechServiceImpl.java | 56 +++++++++++++++++++ 5 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java create mode 100644 src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java create mode 100644 src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java create mode 100644 src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java diff --git a/build.gradle b/build.gradle index 505230dd..f3547be9 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ dependencies { implementation 'io.jsonwebtoken:jjwt:0.9.1' implementation 'org.hibernate.validator:hibernate-validator:8.0.0.Final' implementation 'jakarta.validation:jakarta.validation-api:3.0.2' - implementation 'org.springframework.boot:spring-boot-starter-data-redis' + implementation 'org.apache.httpcomponents.client5:httpclient5:5.2.1' compileOnly 'org.projectlombok:lombok' runtimeOnly 'com.mysql:mysql-connector-j' annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor' diff --git a/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java b/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java new file mode 100644 index 00000000..e4421cef --- /dev/null +++ b/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java @@ -0,0 +1,17 @@ +package com.splanet.splanet.core.properties; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Configuration; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +@Configuration +@ConfigurationProperties(prefix = "clova") +public class ClovaProperties { + private String clientId; + private String clientSecret; + private String url; + private String language; +} diff --git a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java b/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java new file mode 100644 index 00000000..e10f6b0c --- /dev/null +++ b/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java @@ -0,0 +1,24 @@ +package com.splanet.splanet.stt.controller; + +import com.splanet.splanet.stt.service.ClovaSpeechService; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/api/stt") +public class SpeechRecognitionController { + + private final ClovaSpeechService clovaSpeechService; + + public SpeechRecognitionController(ClovaSpeechService clovaSpeechService) { + this.clovaSpeechService = clovaSpeechService; + } + + @PostMapping(consumes = "multipart/form-data") + public String recognizeSpeech(@RequestParam("file") MultipartFile file) { + return clovaSpeechService.recognize(file); + } +} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java new file mode 100644 index 00000000..fd9b2e8f --- /dev/null +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java @@ -0,0 +1,7 @@ +package com.splanet.splanet.stt.service; + +import org.springframework.web.multipart.MultipartFile; + +public interface ClovaSpeechService { + String recognize(MultipartFile file); +} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java new file mode 100644 index 00000000..294d7a92 --- /dev/null +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java @@ -0,0 +1,56 @@ +package com.splanet.splanet.stt.service; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.splanet.splanet.core.exception.BusinessException; +import com.splanet.splanet.core.exception.ErrorCode; +import com.splanet.splanet.core.properties.ClovaProperties; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClients; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.io.entity.ByteArrayEntity; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +@Service +public class ClovaSpeechServiceImpl implements ClovaSpeechService { + + private final ClovaProperties clovaProperties; + + public ClovaSpeechServiceImpl(ClovaProperties clovaProperties) { + this.clovaProperties = clovaProperties; + } + + @Override + public String recognize(MultipartFile file) { + String apiURL = clovaProperties.getUrl() + "?lang=" + clovaProperties.getLanguage(); // 언어 설정 반영 + try (CloseableHttpClient httpClient = HttpClients.createDefault()) { + byte[] audioBytes = file.getBytes(); + + HttpPost httpPost = new HttpPost(apiURL); + httpPost.addHeader("Content-Type", "application/octet-stream"); + httpPost.addHeader("X-NCP-APIGW-API-KEY-ID", clovaProperties.getClientId()); + httpPost.addHeader("X-NCP-APIGW-API-KEY", clovaProperties.getClientSecret()); + + ByteArrayEntity byteArrayEntity = new ByteArrayEntity(audioBytes, ContentType.APPLICATION_OCTET_STREAM); + httpPost.setEntity(byteArrayEntity); + + return httpClient.execute(httpPost, response -> { + int statusCode = response.getCode(); + String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); + + if (statusCode == 200) { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode rootNode = objectMapper.readTree(responseBody); + return rootNode.path("text").asText(); + } else { + throw new BusinessException(ErrorCode.INVALID_INPUT_VALUE, "CLOVA Speech API 호출 실패: " + responseBody); + } + }); + } catch (Exception e) { + throw new BusinessException(ErrorCode.INVALID_INPUT_VALUE, "오디오 파일 처리 중 오류 발생: " + e.getMessage()); + } + } +} \ No newline at end of file From 9f16b665dfbbb84c22460badf69bfe36fef4f1ea Mon Sep 17 00:00:00 2001 From: kanguk Date: Fri, 18 Oct 2024 22:57:23 +0900 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20WebSocket=20=EC=9D=84=20=ED=99=9C?= =?UTF-8?q?=EC=9A=A9=ED=95=98=EC=97=AC=20=EC=8B=A4=EC=8B=9C=EA=B0=84?= =?UTF-8?q?=EC=B2=98=EB=9F=BC=20=EB=B3=B4=EC=9D=B4=EB=8F=84=EB=A1=9D=20?= =?UTF-8?q?=ED=95=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 하지만 문제가 있음 (임시커밋) --- build.gradle | 1 + .../com/splanet/splanet/config/WebConfig.java | 16 ++++ .../splanet/config/WebSocketConfig.java | 34 ++++++++ .../core/handler/SpeechWebSocketHandler.java | 78 +++++++++++++++++++ .../splanet/jwt/JwtAuthenticationFilter.java | 2 +- .../SpeechRecognitionController.java | 53 +++++++------ .../stt/service/ClovaSpeechService.java | 2 +- .../stt/service/ClovaSpeechServiceImpl.java | 10 +-- 8 files changed, 164 insertions(+), 32 deletions(-) create mode 100644 src/main/java/com/splanet/splanet/config/WebConfig.java create mode 100644 src/main/java/com/splanet/splanet/config/WebSocketConfig.java create mode 100644 src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java diff --git a/build.gradle b/build.gradle index f3547be9..646cb796 100644 --- a/build.gradle +++ b/build.gradle @@ -39,6 +39,7 @@ dependencies { implementation 'org.hibernate.validator:hibernate-validator:8.0.0.Final' implementation 'jakarta.validation:jakarta.validation-api:3.0.2' implementation 'org.apache.httpcomponents.client5:httpclient5:5.2.1' + implementation 'org.springframework.boot:spring-boot-starter-websocket' compileOnly 'org.projectlombok:lombok' runtimeOnly 'com.mysql:mysql-connector-j' annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor' diff --git a/src/main/java/com/splanet/splanet/config/WebConfig.java b/src/main/java/com/splanet/splanet/config/WebConfig.java new file mode 100644 index 00000000..ee84bcee --- /dev/null +++ b/src/main/java/com/splanet/splanet/config/WebConfig.java @@ -0,0 +1,16 @@ +package com.splanet.splanet.config; + +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.CorsRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +@Configuration +public class WebConfig implements WebMvcConfigurer { + + @Override + public void addCorsMappings(CorsRegistry registry) { //인터페이스 WebMvcConfigurer 상속 + registry.addMapping("/**") //모든 경로를 허용해줄것이므로 + .allowedOrigins("*") //리소스 공유 허락할 origin 지정 + .allowedMethods("*"); //모든 메소드를 허용 + } +} \ No newline at end of file diff --git a/src/main/java/com/splanet/splanet/config/WebSocketConfig.java b/src/main/java/com/splanet/splanet/config/WebSocketConfig.java new file mode 100644 index 00000000..d5688e67 --- /dev/null +++ b/src/main/java/com/splanet/splanet/config/WebSocketConfig.java @@ -0,0 +1,34 @@ +package com.splanet.splanet.config; + +import com.splanet.splanet.core.handler.SpeechWebSocketHandler; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.socket.config.annotation.EnableWebSocket; +import org.springframework.web.socket.config.annotation.WebSocketConfigurer; +import org.springframework.web.socket.config.annotation.WebSocketHandlerRegistry; +import org.springframework.web.socket.server.standard.ServletServerContainerFactoryBean; + +@Configuration +@EnableWebSocket +public class WebSocketConfig implements WebSocketConfigurer { + + private final SpeechWebSocketHandler speechWebSocketHandler; + + public WebSocketConfig(SpeechWebSocketHandler speechWebSocketHandler) { + this.speechWebSocketHandler = speechWebSocketHandler; + } + + @Override + public void registerWebSocketHandlers(WebSocketHandlerRegistry registry) { + registry.addHandler(speechWebSocketHandler, "/ws/stt") + .setAllowedOrigins("*"); + } + + @Bean + public ServletServerContainerFactoryBean configureWebSocketContainer() { + ServletServerContainerFactoryBean factory = new ServletServerContainerFactoryBean(); + factory.setMaxBinaryMessageBufferSize(256 * 1024); //바이너리 버퍼 크기 지정 16KB + factory.setMaxTextMessageBufferSize(256 * 1024); //텍스트 버퍼 크기 지정 16KB + return factory; + } +} \ No newline at end of file diff --git a/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java b/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java new file mode 100644 index 00000000..f5d5ab05 --- /dev/null +++ b/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java @@ -0,0 +1,78 @@ +package com.splanet.splanet.core.handler; + +import com.splanet.splanet.stt.service.ClovaSpeechService; +import org.springframework.web.socket.BinaryMessage; +import org.springframework.web.socket.TextMessage; +import org.springframework.web.socket.WebSocketSession; +import org.springframework.web.socket.handler.BinaryWebSocketHandler; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; + +@Component +public class SpeechWebSocketHandler extends BinaryWebSocketHandler { + + private final ClovaSpeechService clovaSpeechService; + private List audioDataBuffer = new ArrayList<>(); + private static final int MINIMUM_AUDIO_SIZE = 64000; // 최소 데이터 크기를 96KB로 설정 (약 3초 분량) + + public SpeechWebSocketHandler(ClovaSpeechService clovaSpeechService) { + this.clovaSpeechService = clovaSpeechService; + } + + @Override + protected synchronized void handleBinaryMessage(WebSocketSession session, BinaryMessage message) throws Exception { + session.setBinaryMessageSizeLimit(256 * 1024); // 메시지 크기 제한을 256KB로 설정 + byte[] audioData = message.getPayload().array(); + + // 오디오 데이터를 버퍼에 추가 + audioDataBuffer.add(audioData); + + // 누적된 오디오 데이터 크기 계산 + int totalSize = audioDataBuffer.stream().mapToInt(arr -> arr.length).sum(); + + // 현재 누적된 데이터 크기를 로그로 출력 + System.out.println("현재 누적된 데이터 크기: " + totalSize + " bytes"); + + // 오디오 데이터가 충분히 쌓였을 때만 CLOVA API로 전송 + if (totalSize >= MINIMUM_AUDIO_SIZE) { + byte[] fullAudioData = mergeAudioData(); + try { + // CLOVA API로 전송 + String transcript = clovaSpeechService.recognize(fullAudioData); + session.sendMessage(new TextMessage(transcript)); + // 인식에 성공했으므로 버퍼를 초기화 + audioDataBuffer.clear(); + System.out.println("인식 성공: 버퍼를 초기화합니다."); + } catch (Exception e) { + e.printStackTrace(); + // STT007 오류 발생 시 버퍼를 유지하고 데이터 수집 계속 + if (e.getMessage().contains("STT007")) { + System.err.println("오류 발생: STT007 - 데이터가 너무 작습니다. 더 많은 데이터를 수집 중..."); + // 버퍼를 유지하여 다음 데이터를 기다립니다. + } else { + // 다른 오류 발생 시 버퍼를 초기화하고 오류 메시지 전송 + audioDataBuffer.clear(); + session.sendMessage(new TextMessage("오류 발생: " + e.getMessage())); + System.err.println("오류 발생: " + e.getMessage() + " - 버퍼를 초기화합니다."); + } + } + } else { + // 아직 데이터가 충분하지 않으면 아무 작업도 하지 않음 + System.out.println("데이터가 아직 충분하지 않음"); + } + } + + // 누적된 오디오 데이터를 병합하는 메서드 + private byte[] mergeAudioData() { + int totalLength = audioDataBuffer.stream().mapToInt(arr -> arr.length).sum(); + byte[] mergedData = new byte[totalLength]; + int currentIndex = 0; + for (byte[] data : audioDataBuffer) { + System.arraycopy(data, 0, mergedData, currentIndex, data.length); + currentIndex += data.length; + } + return mergedData; + } +} diff --git a/src/main/java/com/splanet/splanet/jwt/JwtAuthenticationFilter.java b/src/main/java/com/splanet/splanet/jwt/JwtAuthenticationFilter.java index 68f0a513..26142f8b 100644 --- a/src/main/java/com/splanet/splanet/jwt/JwtAuthenticationFilter.java +++ b/src/main/java/com/splanet/splanet/jwt/JwtAuthenticationFilter.java @@ -76,7 +76,7 @@ private boolean isApiPath(String requestURI) { } private boolean isExemptedPath(String requestURI) { - return requestURI.equals("/api/users/create") || requestURI.startsWith("/api/token"); + return requestURI.equals("/api/users/create") || requestURI.startsWith("/api/token") || requestURI.startsWith("/api/stt"); } private void sendErrorResponse(HttpServletResponse response, int status, String message) throws IOException { diff --git a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java b/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java index e10f6b0c..0c7d406a 100644 --- a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java +++ b/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java @@ -1,24 +1,29 @@ -package com.splanet.splanet.stt.controller; - -import com.splanet.splanet.stt.service.ClovaSpeechService; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; -import org.springframework.web.bind.annotation.RestController; - -@RestController -@RequestMapping("/api/stt") -public class SpeechRecognitionController { - - private final ClovaSpeechService clovaSpeechService; - - public SpeechRecognitionController(ClovaSpeechService clovaSpeechService) { - this.clovaSpeechService = clovaSpeechService; - } - - @PostMapping(consumes = "multipart/form-data") - public String recognizeSpeech(@RequestParam("file") MultipartFile file) { - return clovaSpeechService.recognize(file); - } -} +//package com.splanet.splanet.stt.controller; +// +//import com.splanet.splanet.stt.service.ClovaSpeechService; +//import org.springframework.stereotype.Controller; +//import org.springframework.web.bind.annotation.PostMapping; +//import org.springframework.web.bind.annotation.RequestMapping; +//import org.springframework.web.bind.annotation.RequestParam; +//import org.springframework.web.multipart.MultipartFile; +//import org.springframework.web.bind.annotation.RestController; +// +//@Controller +//@RequestMapping("/api/stt") +//public class SpeechRecognitionController { +// +// private final ClovaSpeechService clovaSpeechService; +// +// public SpeechRecognitionController(ClovaSpeechService clovaSpeechService) { +// this.clovaSpeechService = clovaSpeechService; +// } +// +// @PostMapping(consumes = "multipart/form-data") +// public String recognizeSpeech(@RequestParam("file") MultipartFile file) { +// try { +// return clovaSpeechService.recognize(file.getBytes()); +// } catch (Exception e) { +// throw new RuntimeException("파일 처리 중 오류 발생: " + e.getMessage()); +// } +// } +//} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java index fd9b2e8f..af1551d6 100644 --- a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java @@ -3,5 +3,5 @@ import org.springframework.web.multipart.MultipartFile; public interface ClovaSpeechService { - String recognize(MultipartFile file); + String recognize(byte[] audioBytes); } diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java index 294d7a92..f08bf8fe 100644 --- a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java @@ -24,11 +24,9 @@ public ClovaSpeechServiceImpl(ClovaProperties clovaProperties) { } @Override - public String recognize(MultipartFile file) { - String apiURL = clovaProperties.getUrl() + "?lang=" + clovaProperties.getLanguage(); // 언어 설정 반영 + public String recognize(byte[] audioBytes) { + String apiURL = clovaProperties.getUrl() + "?lang=" + clovaProperties.getLanguage(); try (CloseableHttpClient httpClient = HttpClients.createDefault()) { - byte[] audioBytes = file.getBytes(); - HttpPost httpPost = new HttpPost(apiURL); httpPost.addHeader("Content-Type", "application/octet-stream"); httpPost.addHeader("X-NCP-APIGW-API-KEY-ID", clovaProperties.getClientId()); @@ -46,11 +44,11 @@ public String recognize(MultipartFile file) { JsonNode rootNode = objectMapper.readTree(responseBody); return rootNode.path("text").asText(); } else { - throw new BusinessException(ErrorCode.INVALID_INPUT_VALUE, "CLOVA Speech API 호출 실패: " + responseBody); + throw new RuntimeException("CLOVA Speech API 호출 실패: " + responseBody); } }); } catch (Exception e) { - throw new BusinessException(ErrorCode.INVALID_INPUT_VALUE, "오디오 파일 처리 중 오류 발생: " + e.getMessage()); + throw new RuntimeException("오디오 파일 처리 중 오류 발생: " + e.getMessage()); } } } \ No newline at end of file From dae3889b705a0df1522ac3290f169b174120f3d2 Mon Sep 17 00:00:00 2001 From: kanguk Date: Sat, 19 Oct 2024 02:44:20 +0900 Subject: [PATCH 3/3] =?UTF-8?q?feat:=20=EC=8B=A4=EC=8B=9C=EA=B0=84=20?= =?UTF-8?q?=EC=8A=A4=ED=8A=B8=EB=A6=AC=EB=B0=8D=20API=EB=A5=BC=20=ED=99=9C?= =?UTF-8?q?=EC=9A=A9=ED=95=98=EC=97=AC=20STT=20=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 기존 CLOVA Speech Recognition 에서 CLOVA Speech 실시간 스트리밍 API를 활용하여 실시간 처리를 가능하도록 수정한다. --- build.gradle | 39 ++++++ .../splanet/config/WebSocketConfig.java | 10 -- .../core/handler/SpeechWebSocketHandler.java | 124 +++++++++++------- .../core/properties/ClovaProperties.java | 4 +- .../SpeechRecognitionController.java | 29 ---- .../stt/service/ClovaSpeechGrpcService.java | 89 +++++++++++++ .../stt/service/ClovaSpeechService.java | 6 +- .../stt/service/ClovaSpeechServiceImpl.java | 54 -------- src/main/proto/nest.proto | 34 +++++ 9 files changed, 240 insertions(+), 149 deletions(-) delete mode 100644 src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java create mode 100644 src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java delete mode 100644 src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java create mode 100644 src/main/proto/nest.proto diff --git a/build.gradle b/build.gradle index 646cb796..f02fed08 100644 --- a/build.gradle +++ b/build.gradle @@ -2,6 +2,8 @@ plugins { id 'java' id 'org.springframework.boot' version '3.3.3' id 'io.spring.dependency-management' version '1.1.6' + id 'com.google.protobuf' version '0.9.4' + } group = 'com.splanet' @@ -40,6 +42,17 @@ dependencies { implementation 'jakarta.validation:jakarta.validation-api:3.0.2' implementation 'org.apache.httpcomponents.client5:httpclient5:5.2.1' implementation 'org.springframework.boot:spring-boot-starter-websocket' + // gRPC 및 Protocol Buffers 의존성 + implementation 'io.grpc:grpc-netty-shaded:1.56.1' + implementation 'io.grpc:grpc-protobuf:1.56.1' + implementation 'io.grpc:grpc-stub:1.56.1' + implementation 'com.google.protobuf:protobuf-java:3.23.4' + + // gRPC 관련 필요한 의존성 + implementation 'javax.annotation:javax.annotation-api:1.3.2' + implementation 'com.google.code.gson:gson:2.8.9' + + compileOnly 'org.projectlombok:lombok' runtimeOnly 'com.mysql:mysql-connector-j' annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor' @@ -53,3 +66,29 @@ dependencies { tasks.named('test') { useJUnitPlatform() } + +protobuf { + protoc { + artifact = 'com.google.protobuf:protoc:3.23.4' + } + plugins { + grpc { + artifact = 'io.grpc:protoc-gen-grpc-java:1.66.0' + } + } + generateProtoTasks { + all().forEach { task -> + task.plugins { + grpc {} + } + } + } +} + +sourceSets { + main { + java { + srcDirs 'build/generated/source/proto/main/java', 'build/generated/source/proto/main/grpc' + } + } +} \ No newline at end of file diff --git a/src/main/java/com/splanet/splanet/config/WebSocketConfig.java b/src/main/java/com/splanet/splanet/config/WebSocketConfig.java index d5688e67..db192738 100644 --- a/src/main/java/com/splanet/splanet/config/WebSocketConfig.java +++ b/src/main/java/com/splanet/splanet/config/WebSocketConfig.java @@ -1,12 +1,10 @@ package com.splanet.splanet.config; import com.splanet.splanet.core.handler.SpeechWebSocketHandler; -import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.web.socket.config.annotation.EnableWebSocket; import org.springframework.web.socket.config.annotation.WebSocketConfigurer; import org.springframework.web.socket.config.annotation.WebSocketHandlerRegistry; -import org.springframework.web.socket.server.standard.ServletServerContainerFactoryBean; @Configuration @EnableWebSocket @@ -23,12 +21,4 @@ public void registerWebSocketHandlers(WebSocketHandlerRegistry registry) { registry.addHandler(speechWebSocketHandler, "/ws/stt") .setAllowedOrigins("*"); } - - @Bean - public ServletServerContainerFactoryBean configureWebSocketContainer() { - ServletServerContainerFactoryBean factory = new ServletServerContainerFactoryBean(); - factory.setMaxBinaryMessageBufferSize(256 * 1024); //바이너리 버퍼 크기 지정 16KB - factory.setMaxTextMessageBufferSize(256 * 1024); //텍스트 버퍼 크기 지정 16KB - return factory; - } } \ No newline at end of file diff --git a/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java b/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java index f5d5ab05..f1710902 100644 --- a/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java +++ b/src/main/java/com/splanet/splanet/core/handler/SpeechWebSocketHandler.java @@ -1,78 +1,100 @@ package com.splanet.splanet.core.handler; -import com.splanet.splanet.stt.service.ClovaSpeechService; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.protobuf.ByteString; +import com.nbp.cdncp.nest.grpc.proto.v1.NestResponse; +import com.splanet.splanet.stt.service.ClovaSpeechGrpcService; +import io.grpc.stub.StreamObserver; +import org.springframework.stereotype.Component; import org.springframework.web.socket.BinaryMessage; +import org.springframework.web.socket.CloseStatus; import org.springframework.web.socket.TextMessage; import org.springframework.web.socket.WebSocketSession; import org.springframework.web.socket.handler.BinaryWebSocketHandler; -import org.springframework.stereotype.Component; -import java.util.ArrayList; -import java.util.List; +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.Map; @Component public class SpeechWebSocketHandler extends BinaryWebSocketHandler { - private final ClovaSpeechService clovaSpeechService; - private List audioDataBuffer = new ArrayList<>(); - private static final int MINIMUM_AUDIO_SIZE = 64000; // 최소 데이터 크기를 96KB로 설정 (약 3초 분량) + private final ClovaSpeechGrpcService clovaSpeechGrpcService; + private final Map> clientObservers = new ConcurrentHashMap<>(); - public SpeechWebSocketHandler(ClovaSpeechService clovaSpeechService) { - this.clovaSpeechService = clovaSpeechService; + public SpeechWebSocketHandler(ClovaSpeechGrpcService clovaSpeechGrpcService) { + this.clovaSpeechGrpcService = clovaSpeechGrpcService; } @Override - protected synchronized void handleBinaryMessage(WebSocketSession session, BinaryMessage message) throws Exception { - session.setBinaryMessageSizeLimit(256 * 1024); // 메시지 크기 제한을 256KB로 설정 - byte[] audioData = message.getPayload().array(); + public void afterConnectionEstablished(WebSocketSession session) throws Exception { + // 세션이 열릴 때마다 새로운 gRPC 스트림을 생성 + StreamObserver responseObserver = new StreamObserver() { + @Override + public void onNext(NestResponse value) { + // 서버로부터 받은 응답 처리 + try { + String contents = value.getContents(); // JSON 문자열 - // 오디오 데이터를 버퍼에 추가 - audioDataBuffer.add(audioData); + // JSON 파싱 + JsonParser parser = new JsonParser(); + JsonObject jsonObject = parser.parse(contents).getAsJsonObject(); - // 누적된 오디오 데이터 크기 계산 - int totalSize = audioDataBuffer.stream().mapToInt(arr -> arr.length).sum(); + if (jsonObject.has("transcription")) { + JsonObject transcription = jsonObject.getAsJsonObject("transcription"); + String text = transcription.get("text").getAsString(); + // 클라이언트로 text 필드만 전송 + session.sendMessage(new TextMessage(text)); + } + } catch (Exception e) { + e.printStackTrace(); + } + } - // 현재 누적된 데이터 크기를 로그로 출력 - System.out.println("현재 누적된 데이터 크기: " + totalSize + " bytes"); + @Override + public void onError(Throwable t) { + t.printStackTrace(); + try { + session.sendMessage(new TextMessage("오류 발생: " + t.getMessage())); + } catch (IOException e) { + e.printStackTrace(); + } + } - // 오디오 데이터가 충분히 쌓였을 때만 CLOVA API로 전송 - if (totalSize >= MINIMUM_AUDIO_SIZE) { - byte[] fullAudioData = mergeAudioData(); - try { - // CLOVA API로 전송 - String transcript = clovaSpeechService.recognize(fullAudioData); - session.sendMessage(new TextMessage(transcript)); - // 인식에 성공했으므로 버퍼를 초기화 - audioDataBuffer.clear(); - System.out.println("인식 성공: 버퍼를 초기화합니다."); - } catch (Exception e) { - e.printStackTrace(); - // STT007 오류 발생 시 버퍼를 유지하고 데이터 수집 계속 - if (e.getMessage().contains("STT007")) { - System.err.println("오류 발생: STT007 - 데이터가 너무 작습니다. 더 많은 데이터를 수집 중..."); - // 버퍼를 유지하여 다음 데이터를 기다립니다. - } else { - // 다른 오류 발생 시 버퍼를 초기화하고 오류 메시지 전송 - audioDataBuffer.clear(); - session.sendMessage(new TextMessage("오류 발생: " + e.getMessage())); - System.err.println("오류 발생: " + e.getMessage() + " - 버퍼를 초기화합니다."); + @Override + public void onCompleted() { + // 스트림 완료 처리 + try { + session.close(); + } catch (IOException e) { + e.printStackTrace(); } } - } else { - // 아직 데이터가 충분하지 않으면 아무 작업도 하지 않음 - System.out.println("데이터가 아직 충분하지 않음"); + }; + + // 오디오 데이터를 전송할 StreamObserver 생성 + StreamObserver requestObserver = clovaSpeechGrpcService.recognize(responseObserver); + clientObservers.put(session.getId(), requestObserver); + } + + @Override + protected void handleBinaryMessage(WebSocketSession session, BinaryMessage message) throws Exception { + // 클라이언트로부터 받은 오디오 데이터를 gRPC 서비스로 전달 + StreamObserver requestObserver = clientObservers.get(session.getId()); + if (requestObserver != null) { + byte[] audioData = message.getPayload().array(); + ByteString audioChunk = ByteString.copyFrom(audioData); + requestObserver.onNext(audioChunk); } } - // 누적된 오디오 데이터를 병합하는 메서드 - private byte[] mergeAudioData() { - int totalLength = audioDataBuffer.stream().mapToInt(arr -> arr.length).sum(); - byte[] mergedData = new byte[totalLength]; - int currentIndex = 0; - for (byte[] data : audioDataBuffer) { - System.arraycopy(data, 0, mergedData, currentIndex, data.length); - currentIndex += data.length; + @Override + public void afterConnectionClosed(WebSocketSession session, CloseStatus status) throws Exception { + // 세션이 종료되면 gRPC 스트림도 종료 + StreamObserver requestObserver = clientObservers.remove(session.getId()); + if (requestObserver != null) { + requestObserver.onCompleted(); } - return mergedData; } } diff --git a/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java b/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java index e4421cef..d223958d 100644 --- a/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java +++ b/src/main/java/com/splanet/splanet/core/properties/ClovaProperties.java @@ -8,10 +8,8 @@ @Getter @Setter @Configuration -@ConfigurationProperties(prefix = "clova") +@ConfigurationProperties(prefix = "clova.speech") public class ClovaProperties { - private String clientId; private String clientSecret; - private String url; private String language; } diff --git a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java b/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java deleted file mode 100644 index 0c7d406a..00000000 --- a/src/main/java/com/splanet/splanet/stt/controller/SpeechRecognitionController.java +++ /dev/null @@ -1,29 +0,0 @@ -//package com.splanet.splanet.stt.controller; -// -//import com.splanet.splanet.stt.service.ClovaSpeechService; -//import org.springframework.stereotype.Controller; -//import org.springframework.web.bind.annotation.PostMapping; -//import org.springframework.web.bind.annotation.RequestMapping; -//import org.springframework.web.bind.annotation.RequestParam; -//import org.springframework.web.multipart.MultipartFile; -//import org.springframework.web.bind.annotation.RestController; -// -//@Controller -//@RequestMapping("/api/stt") -//public class SpeechRecognitionController { -// -// private final ClovaSpeechService clovaSpeechService; -// -// public SpeechRecognitionController(ClovaSpeechService clovaSpeechService) { -// this.clovaSpeechService = clovaSpeechService; -// } -// -// @PostMapping(consumes = "multipart/form-data") -// public String recognizeSpeech(@RequestParam("file") MultipartFile file) { -// try { -// return clovaSpeechService.recognize(file.getBytes()); -// } catch (Exception e) { -// throw new RuntimeException("파일 처리 중 오류 발생: " + e.getMessage()); -// } -// } -//} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java new file mode 100644 index 00000000..47cd0257 --- /dev/null +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechGrpcService.java @@ -0,0 +1,89 @@ +package com.splanet.splanet.stt.service; + +import com.google.protobuf.ByteString; +import com.nbp.cdncp.nest.grpc.proto.v1.*; +import com.splanet.splanet.core.properties.ClovaProperties; +import io.grpc.ManagedChannel; +import io.grpc.Metadata; +import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder; +import io.grpc.stub.MetadataUtils; +import io.grpc.stub.StreamObserver; +import org.springframework.stereotype.Service; + +@Service +public class ClovaSpeechGrpcService implements ClovaSpeechService { + + private final NestServiceGrpc.NestServiceStub nestServiceStub; + private final ClovaProperties clovaProperties; + + public ClovaSpeechGrpcService(ClovaProperties clovaProperties) { + this.clovaProperties = clovaProperties; + + // gRPC 채널 생성 + ManagedChannel channel = NettyChannelBuilder + .forAddress("clovaspeech-gw.ncloud.com", 50051) + .useTransportSecurity() + .build(); + + // Stub 생성 및 인증 정보 설정 + NestServiceGrpc.NestServiceStub stub = NestServiceGrpc.newStub(channel); + Metadata metadata = new Metadata(); + metadata.put(Metadata.Key.of("authorization", Metadata.ASCII_STRING_MARSHALLER), "Bearer " + clovaProperties.getClientSecret()); + this.nestServiceStub = MetadataUtils.attachHeaders(stub, metadata); + } + + @Override + public StreamObserver recognize(StreamObserver responseObserver) { + StreamObserver requestObserver = nestServiceStub.recognize(responseObserver); + + // Config 메시지 전송 + requestObserver.onNext(createConfigRequest(clovaProperties.getLanguage())); + + return new StreamObserver() { + private int sequenceId = 0; + + @Override + public void onNext(ByteString audioChunk) { + NestRequest dataRequest = createDataRequest(audioChunk, sequenceId, false); + requestObserver.onNext(dataRequest); + sequenceId++; + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + requestObserver.onError(t); + } + + @Override + public void onCompleted() { + requestObserver.onCompleted(); + } + }; + } + + // Config 설정 + private NestRequest createConfigRequest(String language) { + NestConfig config = NestConfig.newBuilder() + .setConfig("{\"transcription\":{\"language\":\"" + language + "\"}}") + .build(); + + return NestRequest.newBuilder() + .setType(RequestType.CONFIG) + .setConfig(config) + .build(); + } + + // 데이터 구성 + private NestRequest createDataRequest(ByteString audioChunk, int sequenceId, boolean epFlag) { + NestData data = NestData.newBuilder() + .setChunk(audioChunk) + .setExtraContents("{\"seqId\":" + sequenceId + ",\"epFlag\":" + epFlag + "}") + .build(); + + return NestRequest.newBuilder() + .setType(RequestType.DATA) + .setData(data) + .build(); + } +} diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java index af1551d6..ddfb9f04 100644 --- a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java +++ b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechService.java @@ -1,7 +1,9 @@ package com.splanet.splanet.stt.service; -import org.springframework.web.multipart.MultipartFile; +import com.google.protobuf.ByteString; +import com.nbp.cdncp.nest.grpc.proto.v1.NestResponse; +import io.grpc.stub.StreamObserver; public interface ClovaSpeechService { - String recognize(byte[] audioBytes); + StreamObserver recognize(StreamObserver responseObserver); } diff --git a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java b/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java deleted file mode 100644 index f08bf8fe..00000000 --- a/src/main/java/com/splanet/splanet/stt/service/ClovaSpeechServiceImpl.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.splanet.splanet.stt.service; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.splanet.splanet.core.exception.BusinessException; -import com.splanet.splanet.core.exception.ErrorCode; -import com.splanet.splanet.core.properties.ClovaProperties; -import org.apache.hc.client5.http.classic.methods.HttpPost; -import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; -import org.apache.hc.client5.http.impl.classic.HttpClients; -import org.apache.hc.core5.http.ContentType; -import org.apache.hc.core5.http.io.entity.ByteArrayEntity; -import org.apache.hc.core5.http.io.entity.EntityUtils; -import org.springframework.stereotype.Service; -import org.springframework.web.multipart.MultipartFile; - -@Service -public class ClovaSpeechServiceImpl implements ClovaSpeechService { - - private final ClovaProperties clovaProperties; - - public ClovaSpeechServiceImpl(ClovaProperties clovaProperties) { - this.clovaProperties = clovaProperties; - } - - @Override - public String recognize(byte[] audioBytes) { - String apiURL = clovaProperties.getUrl() + "?lang=" + clovaProperties.getLanguage(); - try (CloseableHttpClient httpClient = HttpClients.createDefault()) { - HttpPost httpPost = new HttpPost(apiURL); - httpPost.addHeader("Content-Type", "application/octet-stream"); - httpPost.addHeader("X-NCP-APIGW-API-KEY-ID", clovaProperties.getClientId()); - httpPost.addHeader("X-NCP-APIGW-API-KEY", clovaProperties.getClientSecret()); - - ByteArrayEntity byteArrayEntity = new ByteArrayEntity(audioBytes, ContentType.APPLICATION_OCTET_STREAM); - httpPost.setEntity(byteArrayEntity); - - return httpClient.execute(httpPost, response -> { - int statusCode = response.getCode(); - String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); - - if (statusCode == 200) { - ObjectMapper objectMapper = new ObjectMapper(); - JsonNode rootNode = objectMapper.readTree(responseBody); - return rootNode.path("text").asText(); - } else { - throw new RuntimeException("CLOVA Speech API 호출 실패: " + responseBody); - } - }); - } catch (Exception e) { - throw new RuntimeException("오디오 파일 처리 중 오류 발생: " + e.getMessage()); - } - } -} \ No newline at end of file diff --git a/src/main/proto/nest.proto b/src/main/proto/nest.proto new file mode 100644 index 00000000..77e3efad --- /dev/null +++ b/src/main/proto/nest.proto @@ -0,0 +1,34 @@ +syntax = "proto3"; +option java_multiple_files = true; + +package com.nbp.cdncp.nest.grpc.proto.v1; + +enum RequestType { + CONFIG = 0; + DATA = 1; +} + +message NestConfig { + string config = 1; +} + +message NestData { + bytes chunk = 1; + string extra_contents = 2; +} + +message NestRequest { + RequestType type = 1; + oneof part { + NestConfig config = 2; + NestData data = 3; + } +} + +message NestResponse { + string contents = 1; +} + +service NestService { + rpc recognize(stream NestRequest) returns (stream NestResponse) {} +}