Skip to content

Commit

Permalink
Migrate speed models to gcp (#1562)
Browse files Browse the repository at this point in the history
  • Loading branch information
beastoin authored Dec 20, 2024
2 parents 3fd9680 + 24702ca commit b549739
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 45 deletions.
61 changes: 48 additions & 13 deletions .github/workflows/gcp_models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
env:
SERVICE: models
REGION: us-central1
CONTAINER_NAME: speech-models

jobs:
deploy:
Expand All @@ -21,27 +22,61 @@ jobs:

runs-on: ubuntu-latest
steps:
# To workaround "no space left on device" issue of GitHub-hosted runner
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache

- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Google Auth
id: auth
uses: 'google-github-actions/auth@v0'
uses: 'google-github-actions/auth@v2'
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}

- run: gcloud auth configure-docker

- name: Build and Push Docker image
run: |
docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }} -f backend/modal/Dockerfile .
docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
- name: Deploy to Cloud Run
id: deploy
uses: google-github-actions/deploy-cloudrun@v0
with:
service: ${{ env.SERVICE }}
region: ${{ env.REGION }}
image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}
docker build -t gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7} -f backend/modal/Dockerfile .
docker push gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}
- name: Create SSH Key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SPEECH_MODELS_SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
- name: Deploy Docker image
run: |
ssh -o StrictHostKeyChecking=no \
${{ secrets.SPEECH_MODELS_SSH_USERNAME }}@${{ secrets.SPEECH_MODELS_SSH_HOST }} \
"set -x; \
echo '[+] Pull latest Speech Models image...'; \
docker pull gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}; \
echo '[+] Remove current Speech Models container...'; \
docker rm -f ${{ env.CONTAINER_NAME }}; \
echo '[+
] Start new Speech Models container...'; \
docker run -d --name ${{ env.CONTAINER_NAME }} -p 8080:8080 \
--volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 \
--volume /var/lib/nvidia/bin:/usr/local/nvidia/bin \
--device /dev/nvidia0:/dev/nvidia0 \
--device /dev/nvidia-uvm:/dev/nvidia-uvm \
--device /dev/nvidiactl:/dev/nvidiactl \
-e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
-e HUGGINGFACE_TOKEN=${{ secrets.HUGGINGFACE_TOKEN }} \
gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}:${GITHUB_SHA::7}"
# - name: Deploy to Cloud Run
# id: deploy
# uses: google-github-actions/deploy-cloudrun@v2
# with:
# service: ${{ env.SERVICE }}
# region: ${{ env.REGION }}
# image: gcr.io/${{ vars.GCP_PROJECT_ID }}/${{ env.SERVICE }}

# If required, use the Cloud Run url output in later steps
- name: Show Output
run: echo ${{ steps.deploy.outputs.url }}
# - name: Show Output
# run: echo ${{ steps.deploy.outputs.url }}
32 changes: 23 additions & 9 deletions backend/modal/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
FROM tiangolo/uvicorn-gunicorn:python3.11
FROM python:3.11 AS builder

RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl
RUN apt-get install unzip
RUN apt-get -y install python3
RUN apt-get -y install python3-pip
RUN apt-get -y install git
RUN apt-get -y install ffmpeg
ENV PATH="/opt/venv/bin:$PATH"
RUN python -m venv /opt/venv

COPY backend/requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -r /tmp/requirements.txt

COPY backend/modal/ /app
FROM python:3.11-slim

WORKDIR /app
ENV PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/venv/bin:$PATH"
ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64"

RUN apt-get update && apt-get -y install build-essential ffmpeg curl unzip wget software-properties-common && \
wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \
dpkg -i cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb && \
cp /var/cuda-repo-debian11-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
add-apt-repository contrib && \
apt-get update && \
apt-get -y install cuda-toolkit-12-6 && \
rm -rf /var/lib/apt/lists/* cuda-repo-debian11-12-6-local_12.6.3-560.35.05-1_amd64.deb

COPY --from=builder /opt/venv /opt/venv
COPY backend/database /app/database
COPY backend/utils /app/utils
COPY backend/modal/ .

EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
9 changes: 4 additions & 5 deletions backend/modal/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@
from fastapi import FastAPI, UploadFile, File, Form

from speech_profile_modal import ResponseItem, endpoint as speaker_identification_endpoint
from vad_modal import endpoint as vad_endpoint
from vad_modal import vad_endpoint

app = FastAPI()


@app.post('/v1/speaker-identification')
def speaker_identification(
uid: str, audio_file: UploadFile = File, segments: str = Form(...)
) -> List[ResponseItem]:
print('speaker_identification')
return speaker_identification_endpoint(uid, audio_file, segments)


@app.post('/v1/vad')
def vad(audio_file: UploadFile = File):
def vad(file: UploadFile = File):
print('vad')
return vad_endpoint(audio_file)
print(vad_endpoint)
return vad_endpoint(file)
27 changes: 9 additions & 18 deletions backend/modal/vad_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import torch
from fastapi import UploadFile
from modal import App, web_endpoint, Secret, Image
from pyannote.audio import Pipeline

# Instantiate pretrained voice activity detection pipeline
Expand All @@ -13,26 +12,18 @@
use_auth_token=os.getenv('HUGGINGFACE_TOKEN')
).to(device)

app = App(name='vad')
image = (
Image.debian_slim()
.pip_install("pyannote.audio")
.pip_install("torch")
.pip_install("torchaudio")
)
# app = App(name='vad')
# image = (
# Image.debian_slim()
# .pip_install("pyannote.audio")
# .pip_install("torch")
# .pip_install("torchaudio")
# )

os.makedirs('_temp', exist_ok=True)


@app.function(
image=image,
keep_warm=1,
memory=(1024, 2048),
cpu=4,
secrets=[Secret.from_name('huggingface-token')],
)
@web_endpoint(method='POST')
def endpoint(file: UploadFile):
def vad_endpoint(file: UploadFile):
upload_id = str(uuid.uuid4())
file_path = f"_temp/{upload_id}_{file.filename}"
with open(file_path, 'wb') as f:
Expand All @@ -47,4 +38,4 @@ def endpoint(file: UploadFile):
'end': segment.end,
'duration': segment.duration,
})
return data
return data

0 comments on commit b549739

Please sign in to comment.