From c923da422bbad374adb66f8e74a73c9e38498273 Mon Sep 17 00:00:00 2001
From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com>
Date: Fri, 9 Feb 2024 14:42:28 +0530
Subject: [PATCH 1/5] Add support for OpenAI TTS Engine

---
 ayushma/migrations/0051_project_tts_engine.py | 19 +++++++
 ayushma/models/enums.py                       |  5 ++
 ayushma/models/project.py                     |  5 +-
 ayushma/serializers/project.py                |  1 +
 ayushma/utils/language_helpers.py             | 52 ++++++++++++-------
 ayushma/utils/openaiapi.py                    | 11 +++-
 utils/pagination.py                           |  1 +
 7 files changed, 72 insertions(+), 22 deletions(-)
 create mode 100644 ayushma/migrations/0051_project_tts_engine.py

diff --git a/ayushma/migrations/0051_project_tts_engine.py b/ayushma/migrations/0051_project_tts_engine.py
new file mode 100644
index 00000000..4febdac7
--- /dev/null
+++ b/ayushma/migrations/0051_project_tts_engine.py
@@ -0,0 +1,19 @@
+# Generated by Django 4.2.6 on 2024-02-09 08:58
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("ayushma", "0050_alter_chat_model_alter_project_model"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="project",
+            name="tts_engine",
+            field=models.IntegerField(
+                choices=[(1, "Openai"), (2, "Google")], default=2
+            ),
+        ),
+    ]
diff --git a/ayushma/models/enums.py b/ayushma/models/enums.py
index 56611c53..66c1e22e 100644
--- a/ayushma/models/enums.py
+++ b/ayushma/models/enums.py
@@ -19,6 +19,11 @@ class STTEngine(IntegerChoices):
     SELF_HOSTED = 3
 
 
+class TTSEngine(IntegerChoices):
+    OPENAI = 1
+    GOOGLE = 2
+
+
 class FeedBackRating(IntegerChoices):
     HALLUCINATING = 1
     WRONG = 2
diff --git a/ayushma/models/project.py b/ayushma/models/project.py
index e2e53531..03aeb5e4 100644
--- a/ayushma/models/project.py
+++ b/ayushma/models/project.py
@@ -1,7 +1,7 @@
 from django.contrib.postgres.fields import ArrayField
 from django.db import models
 
-from ayushma.models.enums import ModelType, STTEngine
+from ayushma.models.enums import ModelType, STTEngine, TTSEngine
 from ayushma.models.users import User
 from utils.models.base import BaseModel
 
@@ -16,6 +16,9 @@ class Project(BaseModel):
     stt_engine = models.IntegerField(
         choices=STTEngine.choices, default=STTEngine.WHISPER
     )
+    tts_engine = models.IntegerField(
+        choices=TTSEngine.choices, default=TTSEngine.GOOGLE
+    )
     model = models.IntegerField(choices=ModelType.choices, default=ModelType.GPT_3_5)
     preset_questions = ArrayField(models.TextField(), null=True, blank=True)
     is_default = models.BooleanField(default=False)
diff --git a/ayushma/serializers/project.py b/ayushma/serializers/project.py
index 4a7d0ddf..5681b43a 100644
--- a/ayushma/serializers/project.py
+++ b/ayushma/serializers/project.py
@@ -25,6 +25,7 @@ class Meta:
             "modified_at",
             "description",
             "stt_engine",
+            "tts_engine",
             "model",
             "is_default",
             "display_preset_questions",
diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py
index c67e6161..886387ad 100644
--- a/ayushma/utils/language_helpers.py
+++ b/ayushma/utils/language_helpers.py
@@ -1,7 +1,9 @@
 import re
 
+from django.conf import settings
 from google.cloud import texttospeech
 from google.cloud import translate_v2 as translate
+from openai import OpenAI
 from rest_framework.exceptions import APIException
 
 
@@ -37,31 +39,43 @@ def sanitize_text(text):
     return sanitized_text
 
 
-def text_to_speech(text, language_code):
+def text_to_speech(text, language_code, service):
     try:
         # in en-IN neural voice is not available
         if language_code == "en-IN":
             language_code = "en-US"
 
-        client = texttospeech.TextToSpeechClient()
-
         text = sanitize_text(text)
-        synthesis_input = texttospeech.SynthesisInput(text=text)
-
-        voice = texttospeech.VoiceSelectionParams(
-            language_code=language_code, name=language_code_voice_map[language_code]
-        )
-        audio_config = texttospeech.AudioConfig(
-            audio_encoding=texttospeech.AudioEncoding.MP3
-        )
-
-        response = client.synthesize_speech(
-            input=synthesis_input,
-            voice=voice,
-            audio_config=audio_config,
-        )
-
-        return response.audio_content
+
+        if service == "google":
+            client = texttospeech.TextToSpeechClient()
+
+            synthesis_input = texttospeech.SynthesisInput(text=text)
+
+            voice = texttospeech.VoiceSelectionParams(
+                language_code=language_code, name=language_code_voice_map[language_code]
+            )
+            audio_config = texttospeech.AudioConfig(
+                audio_encoding=texttospeech.AudioEncoding.MP3
+            )
+
+            response = client.synthesize_speech(
+                input=synthesis_input,
+                voice=voice,
+                audio_config=audio_config,
+            )
+
+            return response.audio_content
+        elif service == "openai":
+            client = OpenAI(api_key=settings.OPENAI_API_KEY)
+            response = client.audio.speech.create(
+                model="tts-1-hd",
+                voice="nova",
+                input=text,
+            )
+            return response.read()
+        else:
+            raise APIException("Service not supported")
     except Exception as e:
         print(e)
         return None
diff --git a/ayushma/utils/openaiapi.py b/ayushma/utils/openaiapi.py
index 10f7956c..9c2c13ef 100644
--- a/ayushma/utils/openaiapi.py
+++ b/ayushma/utils/openaiapi.py
@@ -15,7 +15,7 @@
 from ayushma.models import ChatMessage
 from ayushma.models.chat import Chat
 from ayushma.models.document import Document
-from ayushma.models.enums import ChatMessageType, ModelType
+from ayushma.models.enums import ChatMessageType, ModelType, TTSEngine
 from ayushma.utils.langchain import LangChainHelper
 from ayushma.utils.language_helpers import text_to_speech, translate_text
 from core.settings.base import AI_NAME
@@ -203,6 +203,7 @@ def handle_post_response(
     temperature,
     stats,
     language,
+    tts_engine,
     generate_audio=True,
 ):
     chat_message: ChatMessage = ChatMessage.objects.create(
@@ -225,7 +226,9 @@ def handle_post_response(
     ayushma_voice = None
     if generate_audio:
         stats["tts_start_time"] = time.time()
-        ayushma_voice = text_to_speech(translated_chat_response, user_language)
+        ayushma_voice = text_to_speech(
+            translated_chat_response, user_language, tts_engine
+        )
         stats["tts_end_time"] = time.time()
 
     url = None
@@ -324,6 +327,8 @@ def converse(
         elif message.messageType == ChatMessageType.AYUSHMA:
             chat_history.append(AIMessage(content=f"Ayushma: {message.message}"))
 
+    tts_engine = TTSEngine(chat.project.tts_engine).name.lower()
+
     if not stream:
         lang_chain_helper = LangChainHelper(
             stream=False,
@@ -347,6 +352,7 @@ def converse(
             temperature,
             stats,
             language,
+            tts_engine,
             generate_audio,
         )
 
@@ -404,6 +410,7 @@ def converse(
                             temperature,
                             stats,
                             language,
+                            tts_engine,
                             generate_audio,
                         )
 
diff --git a/utils/pagination.py b/utils/pagination.py
index 05509fe4..c54def1c 100644
--- a/utils/pagination.py
+++ b/utils/pagination.py
@@ -13,5 +13,6 @@ def get_paginated_response(self, data):
                 "has_previous": self.offset > 0,
                 "has_next": self.offset + self.limit < self.count,
                 "results": data,
+                "offset": self.offset,
             }
         )

From 4c0744921e4ea8fce51cedcf7c3d05aecd822b7d Mon Sep 17 00:00:00 2001
From: Ashesh <3626859+Ashesh3@users.noreply.github.com>
Date: Sun, 11 Feb 2024 20:23:16 +0530
Subject: [PATCH 2/5] Apply suggestions from code review

Co-authored-by: Rithvik Nishad <mail@rithviknishad.dev>
---
 ayushma/models/enums.py           | 4 ++--
 ayushma/utils/language_helpers.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ayushma/models/enums.py b/ayushma/models/enums.py
index 66c1e22e..a7a7c9cb 100644
--- a/ayushma/models/enums.py
+++ b/ayushma/models/enums.py
@@ -20,8 +20,8 @@ class STTEngine(IntegerChoices):
 
 
 class TTSEngine(IntegerChoices):
-    OPENAI = 1
-    GOOGLE = 2
+    OPENAI = (1, "openai")
+    GOOGLE = (2, "google")
 
 
 class FeedBackRating(IntegerChoices):
diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py
index 886387ad..35b5cfd2 100644
--- a/ayushma/utils/language_helpers.py
+++ b/ayushma/utils/language_helpers.py
@@ -47,7 +47,7 @@ def text_to_speech(text, language_code, service):
 
         text = sanitize_text(text)
 
-        if service == "google":
+        if service == TTSEngine.GOOGLE:
             client = texttospeech.TextToSpeechClient()
 
             synthesis_input = texttospeech.SynthesisInput(text=text)
@@ -66,7 +66,7 @@ def text_to_speech(text, language_code, service):
             )
 
             return response.audio_content
-        elif service == "openai":
+        elif service == TTSEngine.OPENAI:
             client = OpenAI(api_key=settings.OPENAI_API_KEY)
             response = client.audio.speech.create(
                 model="tts-1-hd",

From 8b99bff5d9e7069e7a09c0a6d1329586235b915c Mon Sep 17 00:00:00 2001
From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com>
Date: Sun, 11 Feb 2024 20:26:09 +0530
Subject: [PATCH 3/5] Update tts_engine field to use SmallIntegerField

---
 ayushma/migrations/0051_project_tts_engine.py | 2 +-
 ayushma/models/project.py                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ayushma/migrations/0051_project_tts_engine.py b/ayushma/migrations/0051_project_tts_engine.py
index 4febdac7..79ef57ce 100644
--- a/ayushma/migrations/0051_project_tts_engine.py
+++ b/ayushma/migrations/0051_project_tts_engine.py
@@ -12,7 +12,7 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name="project",
             name="tts_engine",
-            field=models.IntegerField(
+            field=models.SmallIntegerField(
                 choices=[(1, "Openai"), (2, "Google")], default=2
             ),
         ),
diff --git a/ayushma/models/project.py b/ayushma/models/project.py
index 03aeb5e4..5ebcc76d 100644
--- a/ayushma/models/project.py
+++ b/ayushma/models/project.py
@@ -16,7 +16,7 @@ class Project(BaseModel):
     stt_engine = models.IntegerField(
         choices=STTEngine.choices, default=STTEngine.WHISPER
     )
-    tts_engine = models.IntegerField(
+    tts_engine = models.SmallIntegerField(
         choices=TTSEngine.choices, default=TTSEngine.GOOGLE
     )
     model = models.IntegerField(choices=ModelType.choices, default=ModelType.GPT_3_5)

From 1bb3761da1c7c70824fb1e8ce9173c98061fefa5 Mon Sep 17 00:00:00 2001
From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com>
Date: Sun, 11 Feb 2024 20:34:05 +0530
Subject: [PATCH 4/5] Update settings.json and language_helpers.py files

---
 ayushma/utils/language_helpers.py | 2 ++
 ayushma/utils/openaiapi.py        | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py
index 35b5cfd2..460e2093 100644
--- a/ayushma/utils/language_helpers.py
+++ b/ayushma/utils/language_helpers.py
@@ -6,6 +6,8 @@
 from openai import OpenAI
 from rest_framework.exceptions import APIException
 
+from ayushma.models.enums import TTSEngine
+
 
 def translate_text(target, text):
     try:
diff --git a/ayushma/utils/openaiapi.py b/ayushma/utils/openaiapi.py
index 9c2c13ef..2326d5ff 100644
--- a/ayushma/utils/openaiapi.py
+++ b/ayushma/utils/openaiapi.py
@@ -15,7 +15,7 @@
 from ayushma.models import ChatMessage
 from ayushma.models.chat import Chat
 from ayushma.models.document import Document
-from ayushma.models.enums import ChatMessageType, ModelType, TTSEngine
+from ayushma.models.enums import ChatMessageType, ModelType
 from ayushma.utils.langchain import LangChainHelper
 from ayushma.utils.language_helpers import text_to_speech, translate_text
 from core.settings.base import AI_NAME
@@ -327,7 +327,7 @@ def converse(
         elif message.messageType == ChatMessageType.AYUSHMA:
             chat_history.append(AIMessage(content=f"Ayushma: {message.message}"))
 
-    tts_engine = TTSEngine(chat.project.tts_engine).name.lower()
+    tts_engine = chat.project.tts_engine
 
     if not stream:
         lang_chain_helper = LangChainHelper(

From 6c18a39a86e03268332f449954d89df275416751 Mon Sep 17 00:00:00 2001
From: Ashesh3 <3626859+Ashesh3@users.noreply.github.com>
Date: Sun, 11 Feb 2024 20:54:00 +0530
Subject: [PATCH 5/5] Update project TTS engine choices

---
 ayushma/migrations/0051_project_tts_engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ayushma/migrations/0051_project_tts_engine.py b/ayushma/migrations/0051_project_tts_engine.py
index 79ef57ce..5ddc3372 100644
--- a/ayushma/migrations/0051_project_tts_engine.py
+++ b/ayushma/migrations/0051_project_tts_engine.py
@@ -1,4 +1,4 @@
-# Generated by Django 4.2.6 on 2024-02-09 08:58
+# Generated by Django 4.2.6 on 2024-02-11 15:23
 
 from django.db import migrations, models
 
@@ -13,7 +13,7 @@ class Migration(migrations.Migration):
             model_name="project",
             name="tts_engine",
             field=models.SmallIntegerField(
-                choices=[(1, "Openai"), (2, "Google")], default=2
+                choices=[(1, "openai"), (2, "google")], default=2
             ),
         ),
     ]