From c923da422bbad374adb66f8e74a73c9e38498273 Mon Sep 17 00:00:00 2001 From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com> Date: Fri, 9 Feb 2024 14:42:28 +0530 Subject: [PATCH 1/5] Add support for OpenAI TTS Engine --- ayushma/migrations/0051_project_tts_engine.py | 19 +++++++ ayushma/models/enums.py | 5 ++ ayushma/models/project.py | 5 +- ayushma/serializers/project.py | 1 + ayushma/utils/language_helpers.py | 52 ++++++++++++------- ayushma/utils/openaiapi.py | 11 +++- utils/pagination.py | 1 + 7 files changed, 72 insertions(+), 22 deletions(-) create mode 100644 ayushma/migrations/0051_project_tts_engine.py diff --git a/ayushma/migrations/0051_project_tts_engine.py b/ayushma/migrations/0051_project_tts_engine.py new file mode 100644 index 00000000..4febdac7 --- /dev/null +++ b/ayushma/migrations/0051_project_tts_engine.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.6 on 2024-02-09 08:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ayushma", "0050_alter_chat_model_alter_project_model"), + ] + + operations = [ + migrations.AddField( + model_name="project", + name="tts_engine", + field=models.IntegerField( + choices=[(1, "Openai"), (2, "Google")], default=2 + ), + ), + ] diff --git a/ayushma/models/enums.py b/ayushma/models/enums.py index 56611c53..66c1e22e 100644 --- a/ayushma/models/enums.py +++ b/ayushma/models/enums.py @@ -19,6 +19,11 @@ class STTEngine(IntegerChoices): SELF_HOSTED = 3 +class TTSEngine(IntegerChoices): + OPENAI = 1 + GOOGLE = 2 + + class FeedBackRating(IntegerChoices): HALLUCINATING = 1 WRONG = 2 diff --git a/ayushma/models/project.py b/ayushma/models/project.py index e2e53531..03aeb5e4 100644 --- a/ayushma/models/project.py +++ b/ayushma/models/project.py @@ -1,7 +1,7 @@ from django.contrib.postgres.fields import ArrayField from django.db import models -from ayushma.models.enums import ModelType, STTEngine +from ayushma.models.enums import ModelType, STTEngine, TTSEngine from ayushma.models.users import User from utils.models.base import BaseModel @@ -16,6 +16,9 @@ class Project(BaseModel): stt_engine = models.IntegerField( choices=STTEngine.choices, default=STTEngine.WHISPER ) + tts_engine = models.IntegerField( + choices=TTSEngine.choices, default=TTSEngine.GOOGLE + ) model = models.IntegerField(choices=ModelType.choices, default=ModelType.GPT_3_5) preset_questions = ArrayField(models.TextField(), null=True, blank=True) is_default = models.BooleanField(default=False) diff --git a/ayushma/serializers/project.py b/ayushma/serializers/project.py index 4a7d0ddf..5681b43a 100644 --- a/ayushma/serializers/project.py +++ b/ayushma/serializers/project.py @@ -25,6 +25,7 @@ class Meta: "modified_at", "description", "stt_engine", + "tts_engine", "model", "is_default", "display_preset_questions", diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py index c67e6161..886387ad 100644 --- a/ayushma/utils/language_helpers.py +++ b/ayushma/utils/language_helpers.py @@ -1,7 +1,9 @@ import re +from django.conf import settings from google.cloud import texttospeech from google.cloud import translate_v2 as translate +from openai import OpenAI from rest_framework.exceptions import APIException @@ -37,31 +39,43 @@ def sanitize_text(text): return sanitized_text -def text_to_speech(text, language_code): +def text_to_speech(text, language_code, service): try: # in en-IN neural voice is not available if language_code == "en-IN": language_code = "en-US" - client = texttospeech.TextToSpeechClient() - text = sanitize_text(text) - synthesis_input = texttospeech.SynthesisInput(text=text) - - voice = texttospeech.VoiceSelectionParams( - language_code=language_code, name=language_code_voice_map[language_code] - ) - audio_config = texttospeech.AudioConfig( - audio_encoding=texttospeech.AudioEncoding.MP3 - ) - - response = client.synthesize_speech( - input=synthesis_input, - voice=voice, - audio_config=audio_config, - ) - - return response.audio_content + + if service == "google": + client = texttospeech.TextToSpeechClient() + + synthesis_input = texttospeech.SynthesisInput(text=text) + + voice = texttospeech.VoiceSelectionParams( + language_code=language_code, name=language_code_voice_map[language_code] + ) + audio_config = texttospeech.AudioConfig( + audio_encoding=texttospeech.AudioEncoding.MP3 + ) + + response = client.synthesize_speech( + input=synthesis_input, + voice=voice, + audio_config=audio_config, + ) + + return response.audio_content + elif service == "openai": + client = OpenAI(api_key=settings.OPENAI_API_KEY) + response = client.audio.speech.create( + model="tts-1-hd", + voice="nova", + input=text, + ) + return response.read() + else: + raise APIException("Service not supported") except Exception as e: print(e) return None diff --git a/ayushma/utils/openaiapi.py b/ayushma/utils/openaiapi.py index 10f7956c..9c2c13ef 100644 --- a/ayushma/utils/openaiapi.py +++ b/ayushma/utils/openaiapi.py @@ -15,7 +15,7 @@ from ayushma.models import ChatMessage from ayushma.models.chat import Chat from ayushma.models.document import Document -from ayushma.models.enums import ChatMessageType, ModelType +from ayushma.models.enums import ChatMessageType, ModelType, TTSEngine from ayushma.utils.langchain import LangChainHelper from ayushma.utils.language_helpers import text_to_speech, translate_text from core.settings.base import AI_NAME @@ -203,6 +203,7 @@ def handle_post_response( temperature, stats, language, + tts_engine, generate_audio=True, ): chat_message: ChatMessage = ChatMessage.objects.create( @@ -225,7 +226,9 @@ def handle_post_response( ayushma_voice = None if generate_audio: stats["tts_start_time"] = time.time() - ayushma_voice = text_to_speech(translated_chat_response, user_language) + ayushma_voice = text_to_speech( + translated_chat_response, user_language, tts_engine + ) stats["tts_end_time"] = time.time() url = None @@ -324,6 +327,8 @@ def converse( elif message.messageType == ChatMessageType.AYUSHMA: chat_history.append(AIMessage(content=f"Ayushma: {message.message}")) + tts_engine = TTSEngine(chat.project.tts_engine).name.lower() + if not stream: lang_chain_helper = LangChainHelper( stream=False, @@ -347,6 +352,7 @@ def converse( temperature, stats, language, + tts_engine, generate_audio, ) @@ -404,6 +410,7 @@ def converse( temperature, stats, language, + tts_engine, generate_audio, ) diff --git a/utils/pagination.py b/utils/pagination.py index 05509fe4..c54def1c 100644 --- a/utils/pagination.py +++ b/utils/pagination.py @@ -13,5 +13,6 @@ def get_paginated_response(self, data): "has_previous": self.offset > 0, "has_next": self.offset + self.limit < self.count, "results": data, + "offset": self.offset, } ) From 4c0744921e4ea8fce51cedcf7c3d05aecd822b7d Mon Sep 17 00:00:00 2001 From: Ashesh <3626859+Ashesh3@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:23:16 +0530 Subject: [PATCH 2/5] Apply suggestions from code review Co-authored-by: Rithvik Nishad --- ayushma/models/enums.py | 4 ++-- ayushma/utils/language_helpers.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ayushma/models/enums.py b/ayushma/models/enums.py index 66c1e22e..a7a7c9cb 100644 --- a/ayushma/models/enums.py +++ b/ayushma/models/enums.py @@ -20,8 +20,8 @@ class STTEngine(IntegerChoices): class TTSEngine(IntegerChoices): - OPENAI = 1 - GOOGLE = 2 + OPENAI = (1, "openai") + GOOGLE = (2, "google") class FeedBackRating(IntegerChoices): diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py index 886387ad..35b5cfd2 100644 --- a/ayushma/utils/language_helpers.py +++ b/ayushma/utils/language_helpers.py @@ -47,7 +47,7 @@ def text_to_speech(text, language_code, service): text = sanitize_text(text) - if service == "google": + if service == TTSEngine.GOOGLE: client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=text) @@ -66,7 +66,7 @@ def text_to_speech(text, language_code, service): ) return response.audio_content - elif service == "openai": + elif service == TTSEngine.OPENAI: client = OpenAI(api_key=settings.OPENAI_API_KEY) response = client.audio.speech.create( model="tts-1-hd", From 8b99bff5d9e7069e7a09c0a6d1329586235b915c Mon Sep 17 00:00:00 2001 From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:26:09 +0530 Subject: [PATCH 3/5] Update tts_engine field to use SmallIntegerField --- ayushma/migrations/0051_project_tts_engine.py | 2 +- ayushma/models/project.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ayushma/migrations/0051_project_tts_engine.py b/ayushma/migrations/0051_project_tts_engine.py index 4febdac7..79ef57ce 100644 --- a/ayushma/migrations/0051_project_tts_engine.py +++ b/ayushma/migrations/0051_project_tts_engine.py @@ -12,7 +12,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name="project", name="tts_engine", - field=models.IntegerField( + field=models.SmallIntegerField( choices=[(1, "Openai"), (2, "Google")], default=2 ), ), diff --git a/ayushma/models/project.py b/ayushma/models/project.py index 03aeb5e4..5ebcc76d 100644 --- a/ayushma/models/project.py +++ b/ayushma/models/project.py @@ -16,7 +16,7 @@ class Project(BaseModel): stt_engine = models.IntegerField( choices=STTEngine.choices, default=STTEngine.WHISPER ) - tts_engine = models.IntegerField( + tts_engine = models.SmallIntegerField( choices=TTSEngine.choices, default=TTSEngine.GOOGLE ) model = models.IntegerField(choices=ModelType.choices, default=ModelType.GPT_3_5) From 1bb3761da1c7c70824fb1e8ce9173c98061fefa5 Mon Sep 17 00:00:00 2001 From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:34:05 +0530 Subject: [PATCH 4/5] Update settings.json and language_helpers.py files --- ayushma/utils/language_helpers.py | 2 ++ ayushma/utils/openaiapi.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py index 35b5cfd2..460e2093 100644 --- a/ayushma/utils/language_helpers.py +++ b/ayushma/utils/language_helpers.py @@ -6,6 +6,8 @@ from openai import OpenAI from rest_framework.exceptions import APIException +from ayushma.models.enums import TTSEngine + def translate_text(target, text): try: diff --git a/ayushma/utils/openaiapi.py b/ayushma/utils/openaiapi.py index 9c2c13ef..2326d5ff 100644 --- a/ayushma/utils/openaiapi.py +++ b/ayushma/utils/openaiapi.py @@ -15,7 +15,7 @@ from ayushma.models import ChatMessage from ayushma.models.chat import Chat from ayushma.models.document import Document -from ayushma.models.enums import ChatMessageType, ModelType, TTSEngine +from ayushma.models.enums import ChatMessageType, ModelType from ayushma.utils.langchain import LangChainHelper from ayushma.utils.language_helpers import text_to_speech, translate_text from core.settings.base import AI_NAME @@ -327,7 +327,7 @@ def converse( elif message.messageType == ChatMessageType.AYUSHMA: chat_history.append(AIMessage(content=f"Ayushma: {message.message}")) - tts_engine = TTSEngine(chat.project.tts_engine).name.lower() + tts_engine = chat.project.tts_engine if not stream: lang_chain_helper = LangChainHelper( From 6c18a39a86e03268332f449954d89df275416751 Mon Sep 17 00:00:00 2001 From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:54:00 +0530 Subject: [PATCH 5/5] Update project TTS engine choices --- ayushma/migrations/0051_project_tts_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ayushma/migrations/0051_project_tts_engine.py b/ayushma/migrations/0051_project_tts_engine.py index 79ef57ce..5ddc3372 100644 --- a/ayushma/migrations/0051_project_tts_engine.py +++ b/ayushma/migrations/0051_project_tts_engine.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.6 on 2024-02-09 08:58 +# Generated by Django 4.2.6 on 2024-02-11 15:23 from django.db import migrations, models @@ -13,7 +13,7 @@ class Migration(migrations.Migration): model_name="project", name="tts_engine", field=models.SmallIntegerField( - choices=[(1, "Openai"), (2, "Google")], default=2 + choices=[(1, "openai"), (2, "google")], default=2 ), ), ]