From f87d4e324485d67b8f0dfef6775a87bdfb586e35 Mon Sep 17 00:00:00 2001 From: Ishan Mishra Date: Tue, 19 Sep 2023 21:58:03 +0530 Subject: [PATCH] self hosted model added (#327) * self hosted model added * url changed * self hosted endpoint added --- .../0044_alter_project_stt_engine.py | 18 +++++++++++++ ayushma/models/enums.py | 1 + ayushma/utils/speech_to_text.py | 26 +++++++++++++++++++ core/settings/base.py | 3 +++ 4 files changed, 48 insertions(+) create mode 100644 ayushma/migrations/0044_alter_project_stt_engine.py diff --git a/ayushma/migrations/0044_alter_project_stt_engine.py b/ayushma/migrations/0044_alter_project_stt_engine.py new file mode 100644 index 00000000..0d1a9a9d --- /dev/null +++ b/ayushma/migrations/0044_alter_project_stt_engine.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.5 on 2023-09-17 12:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ayushma', '0043_merge_20230905_1530'), + ] + + operations = [ + migrations.AlterField( + model_name='project', + name='stt_engine', + field=models.IntegerField(choices=[(1, 'Whisper'), (2, 'Google'), (3, 'Self Hosted')], default=1), + ), + ] diff --git a/ayushma/models/enums.py b/ayushma/models/enums.py index 6e687e13..24a73400 100644 --- a/ayushma/models/enums.py +++ b/ayushma/models/enums.py @@ -16,6 +16,7 @@ class DocumentType(IntegerChoices): class STTEngine(IntegerChoices): WHISPER = 1 GOOGLE = 2 + SELF_HOSTED = 3 class FeedBackRating(IntegerChoices): diff --git a/ayushma/utils/speech_to_text.py b/ayushma/utils/speech_to_text.py index 0b24c4e7..e81f97f0 100644 --- a/ayushma/utils/speech_to_text.py +++ b/ayushma/utils/speech_to_text.py @@ -1,6 +1,9 @@ +import json import os import openai +import requests +from django.conf import settings from google.cloud import speech from ayushma.models.enums import STTEngine @@ -48,11 +51,34 @@ def recognize(self, audio): if not response.results: return "" return response.results[0].alternatives[0].transcript + +class SelfHostedEngine: + def __init__(self, api_key, language_code): + self.language_code = language_code + + def recognize(self, audio): + + response = requests.post( + settings.SELF_HOSTED_ENDPOINT, + files={"audio": audio}, + data={ + # change this model to get faster results see: https://github.com/coronasafe/care-whisper + "model": "small", + "language": self.language_code.replace("-IN", ""), + }, + ) + + if not response.ok: + print("Failed to recognize speech with self hosted engine") + return "" + response = response.json() + return response["data"]["transcription"].strip() engines = { "whisper": WhisperEngine, "google": GoogleEngine, + "self_hosted": SelfHostedEngine, # Add new engines here } diff --git a/core/settings/base.py b/core/settings/base.py index 0dbb645f..e500b50b 100644 --- a/core/settings/base.py +++ b/core/settings/base.py @@ -368,3 +368,6 @@ AI_NAME = env("AI_NAME", default="Ayushma") GOOGLE_RECAPTCHA_SECRET_KEY = env("GOOGLE_RECAPTCHA_SECRET_KEY", default=None) + +# url for self hosted speech to text +SELF_HOSTED_ENDPOINT = env("SELF_HOSTED_ENDPOINT", default=None)