Code push

gregorgabrovsek · May 15, 2022 · de99499 · de99499
1 parent 1a02278
commit de99499
Show file tree

Hide file tree

Showing 29 changed files with 25,650 additions and 0 deletions.
diff --git a/hearsay/__init__.py b/hearsay/__init__.py
diff --git a/hearsay/assembly/__init__.py b/hearsay/assembly/__init__.py
diff --git a/hearsay/assembly/manager.py b/hearsay/assembly/manager.py
@@ -0,0 +1,67 @@
+from pydantic import BaseModel
+import requests
+
+
+ASSEMBLYAI_BASE_URL = "https://api.assemblyai.com/v2"
+ASSEMBLYAI_API_TOKEN = "INSERT_TOKEN_HERE"
+
+
+class UploadResult(BaseModel):
+    upload_url: str
+
+
+class TranscriptionJob(BaseModel):
+    id: str
+    status: str
+
+
+class Word(BaseModel):
+    confidence: float
+    end: int
+    start: int
+    text: str
+
+
+class TranscriptionResult(BaseModel):
+    id: str
+    punctuate: bool
+    status: str
+    text: str | None = None
+    words: list[Word] | None = None
+
+
+def upload_audio_to_assembly(audio_path: str) -> UploadResult:
+    def read_file(filename, chunk_size=5242880):
+        with open(filename, "rb") as _file:
+            while True:
+                data = _file.read(chunk_size)
+                if not data:
+                    break
+                yield data
+
+    headers = {"authorization": ASSEMBLYAI_API_TOKEN}
+    response = requests.post(
+        f"{ASSEMBLYAI_BASE_URL}/upload",
+        headers=headers,
+        data=read_file(audio_path),
+    )
+
+    return UploadResult(**response.json())
+
+
+def submit_for_transcription(audio_url: str) -> TranscriptionJob:
+    endpoint = f"{ASSEMBLYAI_BASE_URL}/transcript"
+    json = {"audio_url": audio_url}
+    headers = {
+        "authorization": ASSEMBLYAI_API_TOKEN,
+        "content-type": "application/json",
+    }
+    response = requests.post(endpoint, json=json, headers=headers)
+    return TranscriptionJob(**response.json())
+
+
+def get_transcription_result(job_id: str) -> TranscriptionResult:
+    endpoint = f"{ASSEMBLYAI_BASE_URL}/transcript/{job_id}"
+    headers = {"authorization": ASSEMBLYAI_API_TOKEN}
+    response = requests.get(endpoint, headers=headers)
+    return TranscriptionResult(**response.json())
diff --git a/hearsay/av_management/__init__.py b/hearsay/av_management/__init__.py
diff --git a/hearsay/av_management/video_mgmt.py b/hearsay/av_management/video_mgmt.py
@@ -0,0 +1,18 @@
+import subprocess
+import uuid
+import os
+
+
+def extract_audio_from_mp4(video_name: str, temporary_folder_location: str) -> str:
+    """
+    From the given mp4 file, extract the audio file and return the name of the audio.
+
+    :param video_name: name of the mp4 file
+    :param temporary_folder_location: location of the temporary folder
+    :return: name of the wav file
+    """
+    audio_name = f"{str(uuid.uuid4())}.wav"
+    command = f"ffmpeg -i \"{os.path.join(temporary_folder_location, video_name)}\" -ab 160k -ac 2 -ar 44100 -vn \"{os.path.join(temporary_folder_location, audio_name)}\""
+    print(command)
+    subprocess.call(command, shell=True)
+    return audio_name
diff --git a/hearsay/cloud_translation/__init__.py b/hearsay/cloud_translation/__init__.py
diff --git a/hearsay/cloud_translation/google.py b/hearsay/cloud_translation/google.py
@@ -0,0 +1,34 @@
+# Imports the Google Cloud Translation library
+from google.cloud import translate
+
+
+# Initialize Translation client
+def translate_text(text="I wish you all the best with your presentations.", project_id="hearsay-python"):
+    """Translating Text."""
+
+    client = translate.TranslationServiceClient()
+
+    location = "global"
+
+    parent = f"projects/{project_id}/locations/{location}"
+
+    # Translate text from English to French
+    # Detail on supported types can be found here:
+    # https://cloud.google.com/translate/docs/supported-formats
+    response = client.translate_text(
+        request={
+            "parent": parent,
+            "contents": [text],
+            "mime_type": "text/plain",  # mime types: text/plain, text/html
+            "source_language_code": "en-US",
+            "target_language_code": "fr",
+        }
+    )
+
+    # Display the translation for each input text provided
+    for translation in response.translations:
+        print("Translated text: {}".format(translation.translated_text))
+
+
+if __name__ == '__main__':
+    translate_text()
diff --git a/hearsay/main.py b/hearsay/main.py
@@ -0,0 +1,159 @@
+import uuid
+
+import uvicorn
+from fastapi import Depends, File, UploadFile, FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import os
+import time
+
+import tempfile
+from starlette.responses import FileResponse
+from fastapi.responses import PlainTextResponse
+from fastapi.staticfiles import StaticFiles
+
+from hearsay.av_management.video_mgmt import extract_audio_from_mp4
+from hearsay.assembly.manager import upload_audio_to_assembly, submit_for_transcription, get_transcription_result
+from hearsay.subtitles.srtGen import srt_from_transcription, write_to_file, srt_from_transcription_and_srt, \
+    srt_from_transcription_and_text
+from hearsay.subtitles.vtt import convert_file_to_vtt
+
+temp = tempfile.TemporaryDirectory(prefix="hearsay_")
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+async def file_location():
+    return temp.name
+
+
+app.mount("/web_page", StaticFiles(directory="web_page"), name="static")
+
+
+@app.post("/upload")
+async def upload_video(path: str = Depends(file_location), file: UploadFile = File(...)):
+    full_name = os.path.join(path, file.filename)
+    if not file.filename.endswith(".mp4"):
+        return {"message": "File must be an MP4 file."}
+    try:
+        contents = await file.read()
+        with open(full_name, 'wb+') as f:
+            f.write(contents)
+    except Exception as e:
+        return {"message": "There was an error uploading the file"}
+    finally:
+        await file.close()
+
+    return {"message": "Success", "file_name": file.filename}
+
+
+@app.get("/uploaded/{file_name}", response_class=FileResponse)
+async def download_video(file_name: str, path: str = Depends(file_location)):
+    full_name = os.path.join(path, file_name)
+    return full_name
+
+
+@app.get("/subtitle/{file_name}", response_class=PlainTextResponse)
+async def subtitle(
+    file_name: str,
+    path: str = Depends(file_location),
+):
+    with open(os.path.join(path, file_name), "r") as f:
+        return f.read()
+
+
+@app.post("/transcribe/{file_name}")
+async def transcribe(
+        file_name: str,
+        script: str | None = None,
+        path: str = Depends(file_location),
+        subtitle_file: UploadFile | None = File(None)
+):
+    print(file_name)
+    print(script)
+    print(subtitle_file)
+
+    for_real = True
+    if for_real:
+        audio_location = extract_audio_from_mp4(video_name=file_name, temporary_folder_location=path)
+        upload_result = upload_audio_to_assembly(audio_path=os.path.join(path, audio_location))
+        upload_id = submit_for_transcription(audio_url=upload_result.upload_url)
+        i = 0
+        while True:
+            try:
+                transcription_result = get_transcription_result(job_id=upload_id.id)
+                if transcription_result.status in ["completed", "error"]:
+                    break
+            except:
+                pass
+
+            time.sleep(2 ** i)
+            i += 1
+
+        if script is None and subtitle_file is None:
+            generated_srt = srt_from_transcription(transcription=transcription_result)
+        elif subtitle_file is None:
+            generated_srt = srt_from_transcription_and_text(
+                transcription=transcription_result,
+                real_text=script,
+            )
+        else:
+            generated_srt = srt_from_transcription_and_text(
+                transcription=transcription_result,
+                real_text=script,
+            )
+            # generated_srt = srt_from_transcription_and_srt(
+            #     transcription=transcription_result,
+            #     srt_str=str(await subtitle_file.read()),
+            # )
+    else:
+        generated_srt = """1
+00:00:01,450 --> 00:00:05,986
+Slovenia, officially the Republic of Slovenia,
+is a country in Central Europe. 
+
+2
+00:00:05,986 --> 00:00:09,178
+It is bordered by Italy to the
+west, Austria to the north, 
+
+3
+00:00:09,178 --> 00:00:13,666
+Hungary to the northeast, Croatia to
+the southeast, and the Adriatic Sea 
+
+4
+00:00:13,666 --> 00:00:15,110
+to the southwest.
+
+
+5
+00:00:15,110 --> 00:00:18,022
+Slovenia is mostly mountainous
+and forested. 
+
+6
+00:00:18,022 --> 00:00:22,070
+Slovenes constitute over 80% of
+the country's population. 
+
+7
+00:00:22.010 --> 00:00:25.110
+Slovene, a South Slavic language,
+is the official language. 
+"""
+
+    subtitle_name = f"{str(uuid.uuid4())}.srt"
+    full_subtitle_name = os.path.join(path, subtitle_name)
+    write_to_file(full_subtitle_name, generated_srt)
+    convert_file_to_vtt(full_subtitle_name)
+    return {"subtitle_name": subtitle_name.replace(".srt", ".vtt")}
+
+
+if __name__ == '__main__':
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/hearsay/subtitles/__init__.py b/hearsay/subtitles/__init__.py