From 48b6f6b6c40c556a02502c4e727d6b9788ff1498 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Sat, 30 Nov 2024 16:13:01 +0700 Subject: [PATCH 1/9] Migrate speech models to GCP Compute engine --- .github/workflows/gcp_models.yml | 27 ++++++++++++++++----------- backend/modal/Dockerfile | 22 +++++++++++++--------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml index 05b02b084..34faa8a98 100644 --- a/.github/workflows/gcp_models.yml +++ b/.github/workflows/gcp_models.yml @@ -4,7 +4,8 @@ name: Deploy Speech Models to Cloud RUN on: push: - branches: [ "main", "development" ] + # branches: [ "main", "development" ] + branches: "migrate-speed-models-to-gcp" paths: - 'backend/modal/**' @@ -14,7 +15,8 @@ env: jobs: deploy: - environment: ${{ (github.ref == 'refs/heads/development' && 'development') || (github.ref == 'refs/heads/main' && 'prod') }} + # environment: ${{ (github.ref == 'refs/heads/development' && 'development') || (github.ref == 'refs/heads/main' && 'prod') }} + environment: 'development' permissions: contents: 'read' id-token: 'write' @@ -22,25 +24,28 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Google Auth id: auth - uses: 'google-github-actions/auth@v0' + uses: 'google-github-actions/auth@v2' with: credentials_json: ${{ secrets.GCP_CREDENTIALS }} + - run: gcloud auth configure-docker + - name: Build and Push Docker image run: | docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} -f backend/modal/Dockerfile . docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} - - name: Deploy to Cloud Run - id: deploy - uses: google-github-actions/deploy-cloudrun@v0 - with: - service: ${{ env.SERVICE }} - region: ${{ env.REGION }} - image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} + + # - name: Deploy to Cloud Run + # id: deploy + # uses: google-github-actions/deploy-cloudrun@v2 + # with: + # service: ${{ env.SERVICE }} + # region: ${{ env.REGION }} + # image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} # If required, use the Cloud Run url output in later steps - name: Show Output diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile index d45213648..0a6314cb4 100644 --- a/backend/modal/Dockerfile +++ b/backend/modal/Dockerfile @@ -1,16 +1,20 @@ -FROM tiangolo/uvicorn-gunicorn:python3.11 +FROM python:3.11 AS builder -RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl -RUN apt-get install unzip -RUN apt-get -y install python3 -RUN apt-get -y install python3-pip -RUN apt-get -y install git -RUN apt-get -y install ffmpeg +ENV PATH="/opt/venv/bin:$PATH" +RUN python -m venv /opt/venv COPY backend/requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt -COPY backend/modal/ /app +FROM python:3.11-slim + +WORKDIR /app +ENV PATH="/opt/venv/bin:$PATH" + +RUN apt-get update && apt-get -y install ffmpeg curl unzip && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /opt/venv /opt/venv +COPY backend/modal/ . EXPOSE 8080 -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] From 6e3ba325588c8dc909b48b9610152c1328330fa6 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Wed, 4 Dec 2024 21:24:01 +0700 Subject: [PATCH 2/9] Create models image on prod --- .github/workflows/gcp_models.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml index 34faa8a98..23e42b3f2 100644 --- a/.github/workflows/gcp_models.yml +++ b/.github/workflows/gcp_models.yml @@ -6,8 +6,8 @@ on: push: # branches: [ "main", "development" ] branches: "migrate-speed-models-to-gcp" - paths: - - 'backend/modal/**' + # paths: + # - 'backend/modal/**' env: SERVICE: models @@ -16,7 +16,7 @@ env: jobs: deploy: # environment: ${{ (github.ref == 'refs/heads/development' && 'development') || (github.ref == 'refs/heads/main' && 'prod') }} - environment: 'development' + environment: 'prod' permissions: contents: 'read' id-token: 'write' From 7c736f276ea479dfa2c5a2e8ba409c2d6772964b Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Sat, 7 Dec 2024 15:57:53 +0700 Subject: [PATCH 3/9] Install CUDA toolkit in docker image --- .github/workflows/gcp_models.yml | 4 ++++ backend/modal/Dockerfile | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml index 23e42b3f2..0e7328c5f 100644 --- a/.github/workflows/gcp_models.yml +++ b/.github/workflows/gcp_models.yml @@ -23,6 +23,10 @@ jobs: runs-on: ubuntu-latest steps: + # To workaround "no space left on device" issue of GitHub-hosted runner + - name: Delete huge unnecessary tools folder + run: rm -rf /opt/hostedtoolcache + - name: Checkout uses: actions/checkout@v4 diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile index 0a6314cb4..985639c70 100644 --- a/backend/modal/Dockerfile +++ b/backend/modal/Dockerfile @@ -11,7 +11,14 @@ FROM python:3.11-slim WORKDIR /app ENV PATH="/opt/venv/bin:$PATH" -RUN apt-get update && apt-get -y install ffmpeg curl unzip && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get -y install build-essential ffmpeg curl unzip wget software-properties-common && \ +wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \ +dpkg -i cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \ +cp /var/cuda-repo-debian11-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \ +add-apt-repository contrib && \ +apt-get update && \ +apt-get -y install cuda-toolkit-12-6 && \ +rm -rf /var/lib/apt/lists/* cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb COPY --from=builder /opt/venv /opt/venv COPY backend/modal/ . From 8fa3b111de28a0f033c217d430a7a371194fa37a Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Sun, 8 Dec 2024 08:17:22 +0700 Subject: [PATCH 4/9] Fix Dockerfile --- backend/modal/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile index 985639c70..4b6d43148 100644 --- a/backend/modal/Dockerfile +++ b/backend/modal/Dockerfile @@ -21,6 +21,8 @@ apt-get -y install cuda-toolkit-12-6 && \ rm -rf /var/lib/apt/lists/* cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb COPY --from=builder /opt/venv /opt/venv +COPY backend/database /app/database +COPY backend/utils /app/utils COPY backend/modal/ . EXPOSE 8080 From 330617acf443352aa64b1a54fe11e53028891bb3 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 9 Dec 2024 12:27:56 +0700 Subject: [PATCH 5/9] Fix error in VAD --- backend/modal/main.py | 2 +- backend/modal/vad_modal.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/modal/main.py b/backend/modal/main.py index 47ff560b7..51d2c0d4d 100644 --- a/backend/modal/main.py +++ b/backend/modal/main.py @@ -17,6 +17,6 @@ def speaker_identification( @app.post('/v1/vad') -def vad(audio_file: UploadFile = File): +def vad(audio_file: UploadFile = File(...)): print('vad') return vad_endpoint(audio_file) diff --git a/backend/modal/vad_modal.py b/backend/modal/vad_modal.py index 82353a51e..0577ec43e 100644 --- a/backend/modal/vad_modal.py +++ b/backend/modal/vad_modal.py @@ -24,14 +24,14 @@ os.makedirs('_temp', exist_ok=True) -@app.function( - image=image, - keep_warm=1, - memory=(1024, 2048), - cpu=4, - secrets=[Secret.from_name('huggingface-token')], -) -@web_endpoint(method='POST') +# @app.function( +# image=image, +# keep_warm=1, +# memory=(1024, 2048), +# cpu=4, +# secrets=[Secret.from_name('huggingface-token')], +# ) +# @web_endpoint(method='POST') def endpoint(file: UploadFile): upload_id = str(uuid.uuid4()) file_path = f"_temp/{upload_id}_{file.filename}" From cdeba6d13ef7f702069ebd3ff1fe6b617f988c01 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Sun, 15 Dec 2024 20:19:57 +0700 Subject: [PATCH 6/9] Fix VAD issue --- backend/modal/main.py | 7 +++---- backend/modal/vad_modal.py | 27 +++++++++------------------ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/backend/modal/main.py b/backend/modal/main.py index 51d2c0d4d..2bc4ef876 100644 --- a/backend/modal/main.py +++ b/backend/modal/main.py @@ -3,11 +3,10 @@ from fastapi import FastAPI, UploadFile, File, Form from speech_profile_modal import ResponseItem, endpoint as speaker_identification_endpoint -from vad_modal import endpoint as vad_endpoint +from vad_modal import vad_endpoint app = FastAPI() - @app.post('/v1/speaker-identification') def speaker_identification( uid: str, audio_file: UploadFile = File, segments: str = Form(...) @@ -15,8 +14,8 @@ def speaker_identification( print('speaker_identification') return speaker_identification_endpoint(uid, audio_file, segments) - @app.post('/v1/vad') -def vad(audio_file: UploadFile = File(...)): +def vad(audio_file: UploadFile = File): print('vad') + print(vad_endpoint) return vad_endpoint(audio_file) diff --git a/backend/modal/vad_modal.py b/backend/modal/vad_modal.py index 0577ec43e..4d5b82150 100644 --- a/backend/modal/vad_modal.py +++ b/backend/modal/vad_modal.py @@ -3,7 +3,6 @@ import torch from fastapi import UploadFile -from modal import App, web_endpoint, Secret, Image from pyannote.audio import Pipeline # Instantiate pretrained voice activity detection pipeline @@ -13,26 +12,18 @@ use_auth_token=os.getenv('HUGGINGFACE_TOKEN') ).to(device) -app = App(name='vad') -image = ( - Image.debian_slim() - .pip_install("pyannote.audio") - .pip_install("torch") - .pip_install("torchaudio") -) +# app = App(name='vad') +# image = ( +# Image.debian_slim() +# .pip_install("pyannote.audio") +# .pip_install("torch") +# .pip_install("torchaudio") +# ) os.makedirs('_temp', exist_ok=True) -# @app.function( -# image=image, -# keep_warm=1, -# memory=(1024, 2048), -# cpu=4, -# secrets=[Secret.from_name('huggingface-token')], -# ) -# @web_endpoint(method='POST') -def endpoint(file: UploadFile): +def vad_endpoint(file: UploadFile): upload_id = str(uuid.uuid4()) file_path = f"_temp/{upload_id}_{file.filename}" with open(file_path, 'wb') as f: @@ -47,4 +38,4 @@ def endpoint(file: UploadFile): 'end': segment.end, 'duration': segment.duration, }) - return data + return data \ No newline at end of file From 95b51cc223e6874ea3414a3e9e903cd1fc830997 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 16 Dec 2024 15:43:27 +0700 Subject: [PATCH 7/9] Fix VAD issue on main.py --- backend/modal/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/modal/main.py b/backend/modal/main.py index 2bc4ef876..5a1ee2ea6 100644 --- a/backend/modal/main.py +++ b/backend/modal/main.py @@ -15,7 +15,7 @@ def speaker_identification( return speaker_identification_endpoint(uid, audio_file, segments) @app.post('/v1/vad') -def vad(audio_file: UploadFile = File): +def vad(file: UploadFile = File): print('vad') print(vad_endpoint) - return vad_endpoint(audio_file) + return vad_endpoint(file) From 95bf0e5b7dd836629c2d503f2ef6268e344edad7 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 17 Dec 2024 14:38:58 +0700 Subject: [PATCH 8/9] Add PATH of nvidia bin and lib --- backend/modal/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/modal/Dockerfile b/backend/modal/Dockerfile index 4b6d43148..c22f79020 100644 --- a/backend/modal/Dockerfile +++ b/backend/modal/Dockerfile @@ -9,7 +9,8 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt FROM python:3.11-slim WORKDIR /app -ENV PATH="/opt/venv/bin:$PATH" +ENV PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/venv/bin:$PATH" +ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64" RUN apt-get update && apt-get -y install build-essential ffmpeg curl unzip wget software-properties-common && \ wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \ From 24702ca6a6d1b77dc8ec622900b16c4656dd38ca Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Thu, 19 Dec 2024 18:59:20 +0700 Subject: [PATCH 9/9] Write CI/CD pipeline for speech models service --- .github/workflows/gcp_models.yml | 48 ++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/.github/workflows/gcp_models.yml b/.github/workflows/gcp_models.yml index 0e7328c5f..df368f1f6 100644 --- a/.github/workflows/gcp_models.yml +++ b/.github/workflows/gcp_models.yml @@ -4,19 +4,18 @@ name: Deploy Speech Models to Cloud RUN on: push: - # branches: [ "main", "development" ] - branches: "migrate-speed-models-to-gcp" - # paths: - # - 'backend/modal/**' + branches: [ "main", "development" ] + paths: + - 'backend/modal/**' env: SERVICE: models REGION: us-central1 + CONTAINER_NAME: speech-models jobs: deploy: - # environment: ${{ (github.ref == 'refs/heads/development' && 'development') || (github.ref == 'refs/heads/main' && 'prod') }} - environment: 'prod' + environment: ${{ (github.ref == 'refs/heads/development' && 'development') || (github.ref == 'refs/heads/main' && 'prod') }} permissions: contents: 'read' id-token: 'write' @@ -40,9 +39,36 @@ jobs: - name: Build and Push Docker image run: | - docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} -f backend/modal/Dockerfile . - docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} - + docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7} -f backend/modal/Dockerfile . + docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7} + + - name: Create SSH Key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SPEECH_MODELS_SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + + - name: Deploy Docker image + run: | + ssh -o StrictHostKeyChecking=no \ + ${{ secrets.SPEECH_MODELS_SSH_USERNAME }}@${{ secrets.SPEECH_MODELS_SSH_HOST }} \ + "set -x; \ + echo '[+] Pull latest Speech Models image...'; \ + docker pull gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}; \ + echo '[+] Remove current Speech Models container...'; \ + docker rm -f ${{ env.CONTAINER_NAME }}; \ + echo '[+ + ] Start new Speech Models container...'; \ + docker run -d --name ${{ env.CONTAINER_NAME }} -p 8080:8080 \ + --volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \ + --volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \ + --device /dev/nvidia0:/dev/nvidia0 \ + --device /dev/nvidia-uvm:/dev/nvidia-uvm \ + --device /dev/nvidiactl:/dev/nvidiactl \ + -e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \ + -e HUGGINGFACE_TOKEN=${{ secrets.HUGGINGFACE_TOKEN }} \ + gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}" + # - name: Deploy to Cloud Run # id: deploy # uses: google-github-actions/deploy-cloudrun@v2 @@ -52,5 +78,5 @@ jobs: # image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} # If required, use the Cloud Run url output in later steps - - name: Show Output - run: echo ${{ steps.deploy.outputs.url }} \ No newline at end of file + # - name: Show Output + # run: echo ${{ steps.deploy.outputs.url }} \ No newline at end of file