janhq · nguyenhoangthuan99 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/.gitignore b/.gitignore
@@ -21,4 +21,8 @@ platform/command
 platform/src/infrastructure/commanders/test/test_data
 **/vcpkg_installed
 engine/test.db
-!docs/yarn.lock
+!docs/yarn.lock
+__pycache__
+*.log
+*.db
+.pytest_cache/
diff --git a/enterprises/Dockerfile b/enterprises/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.12
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY ./app /app/app
+COPY config.yml /app/config.yml
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/enterprises/ai-services/cortex/Dockerfile b/enterprises/ai-services/cortex/Dockerfile
@@ -0,0 +1,83 @@
+FROM ubuntu:22.04 as base
+
+FROM base as build
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    curl \
+    wget \
+    jq \
+    tar \
+    openmpi-bin \
+    libopenmpi-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    uuid-dev \
+    lsb-release \
+    software-properties-common \
+    gpg \
+    zip \
+    unzip \
+    gcc \
+    g++ \
+    ninja-build \
+    pkg-config \
+    openssl && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
+    apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \
+    apt-get update && \
+    apt-get install -y cmake && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY . /app/
+RUN git clone --recurse https://github.com/janhq/cortex.cpp.git
+RUN cd cortex.cpp && git submodule update --init && cd engine && make configure-vcpkg && make build CMAKE_EXTRA_FLAGS="-DCORTEX_CPP_VERSION=$(git rev-parse HEAD) -DCMAKE_BUILD_TEST=OFF -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+
+FROM base as runtime
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    curl \
+    wget \
+    jq \
+    tar \
+    openmpi-bin \
+    libopenmpi-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+ARG CORTEX_LLAMACPP_VERSION=latest
+
+COPY --from=build /app/cortex.cpp/engine/build/cortex /usr/local/bin/cortex
+COPY --from=build /app/cortex.cpp/engine/build/cortex-server /usr/local/bin/cortex-server
+
+COPY ./download-cortex.llamacpp.sh /tmp/download-cortex.llamacpp.sh
+
+# Get the latest version of the Cortex Llama
+RUN chmod +x /tmp/download-cortex.llamacpp.sh && /bin/bash /tmp/download-cortex.llamacpp.sh ${CORTEX_LLAMACPP_VERSION}
+
+# Copy the entrypoint script
+COPY ./entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+EXPOSE 39281
+RUN cortex start
+RUN cortex pull ichigo:3b-gguf-q8-0
+HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \
+    CMD curl -f http://127.0.0.1:39281/healthz || exit 1
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/enterprises/ai-services/cortex/download-cortex.llamacpp.sh b/enterprises/ai-services/cortex/download-cortex.llamacpp.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+VERSION=${1:-latest}
+
+# Get the latest version of the cortex.llamacpp
+if [ "$VERSION" = "latest" ]; then
+    VERSION=$(curl -s https://api.github.com/repos/janhq/cortex.llamacpp/releases/latest | jq -r '.tag_name' | sed 's/^v//');
+fi
+
+# Create the directory to store the cortex.llamacpp
+mkdir -p /opt/cortex.llamacpp
+cd /opt/cortex.llamacpp
+
+# Download the cortex.llamacpp engines
+echo -e "Downloading Cortex Llama version $VERSION"
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx-cuda-11-7.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx-cuda-12-0.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2-cuda-11-7.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2-cuda-12-0.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512-cuda-11-7.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512-cuda-12-0.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx-cuda-11-7.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx-cuda-12-0.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-vulkan.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cuda-11-7-linux-amd64.tar.gz
+wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cuda-12-0-linux-amd64.tar.gz
diff --git a/enterprises/ai-services/cortex/entrypoint.sh b/enterprises/ai-services/cortex/entrypoint.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+# Install cortex.llamacpp engine
+
+cortex engines install llama-cpp -s /opt/cortex.llamacpp
+cortex -v
+
+# Start the cortex server
+
+sed -i 's/apiServerHost: 127.0.0.1/apiServerHost: 0.0.0.0/' /root/.cortexrc
+
+cortex start
+# Keep the container running by tailing the log files
+tail -f /root/cortexcpp/logs/cortex.log &
+tail -f /root/cortexcpp/logs/cortex-cli.log &
+wait
diff --git a/enterprises/ai-services/tts/Dockerfile b/enterprises/ai-services/tts/Dockerfile
@@ -0,0 +1,31 @@
+FROM docker.io/pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime
+
+# Set the MKL_SERVICE_FORCE_INTEL environment variable
+ENV MKL_SERVICE_FORCE_INTEL=1
+
+# Install dependencies
+RUN apt-get -qq update; \
+    apt-get install -qqy --no-install-recommends \
+        gnupg2 wget ca-certificates apt-transport-https \
+        autoconf automake cmake dpkg-dev file make patch libc6-dev git
+
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    sudo \
+    unzip \
+    curl \
+    wget \
+    git \
+    git-lfs \
+    jq \
+  && rm -rf /var/lib/apt/lists/*
+RUN apt-get -y update \
+    && apt-get install -y software-properties-common \
+    && apt-get -y update \
+    && add-apt-repository universe
+
+WORKDIR /app
+COPY ./requirements.txt /app
+RUN pip3 install -r requirements.txt
diff --git a/enterprises/ai-services/tts/app.py b/enterprises/ai-services/tts/app.py
@@ -0,0 +1,163 @@
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from pydantic import BaseModel
+from fastapi.responses import JSONResponse
+import uvicorn
+import uuid
+from enum import Enum
+import torch
+import torchaudio
+import ffmpeg
+import soundfile as sf  # Ensure `soundfile` is installed
+from io import BytesIO
+from pathlib import Path
+from enum import Enum
+from whisperspeech.pipeline import Pipeline
+from speakers import speaker_trump, speaker_5304, default_speaker
+import logging
+import time
+import os
+import base64
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+class AudioFormat(str, Enum):
+    WAV = "wav"    # Supported by both backends
+    MP3 = "mp3"    # Supported by ffmpeg
+    FLAC = "flac"  # Supported by both
+    AAC = "aac"    # Supported by ffmpeg
+    OGG = "ogg"    # Supported by ffmpeg
+    OPUS = "opus"  # Supported by ffmpeg
+    PCM = "pcm"    # Raw PCM data
+
+
+# Format to backend mapping
+FORMAT_BACKENDS = {
+    AudioFormat.WAV: ["soundfile", "ffmpeg"],
+    AudioFormat.MP3: ["ffmpeg"],
+    AudioFormat.FLAC: ["soundfile", "ffmpeg"],
+    AudioFormat.AAC: ["ffmpeg"],
+    AudioFormat.OGG: ["ffmpeg"],
+    AudioFormat.OPUS: ["ffmpeg"],
+    AudioFormat.PCM: ["soundfile"]
+}
+
+
+def encode_audio_to_base64(byte_data: bytes) -> str:
+
+    try:
+        base64_encoded = base64.b64encode(byte_data).decode('utf-8')
+        return base64_encoded
+    except IOError as e:
+        raise IOError(f"Error reading audio file: {e}")
+
+
+class AudioProcessor:
+    def __init__(self):
+        self.available_backends = torchaudio.list_audio_backends()
+        logger.info(f"Available backends: {self.available_backends}")
+
+        # Check for FFmpeg support
+        self.has_ffmpeg = "ffmpeg" in self.available_backends
+        if not self.has_ffmpeg:
+            logger.warning(
+                "FFMPEG backend not available. Some formats may not be supported.")
+
+    def _get_best_backend(self, format: AudioFormat) -> str:
+        """Determine the best backend for the given format."""
+        supported_backends = FORMAT_BACKENDS[format]
+        for backend in supported_backends:
+            if backend in self.available_backends:
+                return backend
+        raise ValueError(f"No available backend supports format {format}")
+
+    def get_audio_bytes(self, audio: torch.Tensor, sample_rate: int, output_format: AudioFormat) -> bytes:
+        """Return raw bytes of the audio in the specified format."""
+        backend = self._get_best_backend(output_format)
+        logger.info(f"Using backend '{backend}' for format '{output_format}'.")
+
+        if backend == "soundfile":
+            return self._get_bytes_with_soundfile(audio, sample_rate, output_format)
+        elif backend == "ffmpeg":
+            return self._get_bytes_with_ffmpeg(audio, sample_rate, output_format)
+
+    def _get_bytes_with_soundfile(self, audio: torch.Tensor, sample_rate: int, output_format: AudioFormat) -> bytes:
+        """Get raw bytes using the soundfile backend."""
+        audio_np = audio.cpu().numpy().T
+        buffer = BytesIO()
+        sf.write(buffer, audio_np, sample_rate, format=output_format.value)
+        buffer.seek(0)
+        return buffer.read()
+
+    def _get_bytes_with_ffmpeg(self, audio: torch.Tensor, sample_rate: int, output_format: AudioFormat) -> bytes:
+        """Get raw bytes using the ffmpeg backend."""
+        wav_buffer = BytesIO()
+        torchaudio.save(wav_buffer, audio.cpu(), sample_rate, format="wav")
+        wav_buffer.seek(0)
+
+        process = (
+            ffmpeg
+            .input("pipe:0", format="wav")
+            .output("pipe:1", format=output_format.value)
+            .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
+        )
+
+        stdout, stderr = process.communicate(input=wav_buffer.read())
+        if process.returncode != 0:
+            raise RuntimeError(f"FFmpeg failed: {stderr.decode()}")
+
+        return stdout
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe = Pipeline(
+    t2s_ref="collabora/whisperspeech:t2s-v1.1-small-en+pl.model",
+    s2a_ref="collabora/whisperspeech:s2a-q4-tiny-en+pl.model",
+    device=device,
+    torch_compile=False,
+)
+processor = AudioProcessor()
+
+
+def generate_audio(text, output_format: AudioFormat, **kwargs) -> bytes:
+    """Generate audio and return raw bytes."""
+    global pipe, processor
+    atoks = pipe.generate_atoks(text, **kwargs)
+    audio = pipe.vocoder.decode(atoks)
+    sample_rate = 24000
+    if audio.dim() == 1:
+        audio = audio.unsqueeze(0)
+    return processor.get_audio_bytes(audio, sample_rate, output_format)
+
+
+class TTSRequest(BaseModel):
+    text: str
+    voice: str
+    format: AudioFormat
+
+
+app = FastAPI()
+
+
+@app.post("/tts")
+async def text_to_speech(request: TTSRequest):
+    try:
+        # Load the Whisper model and processor
+        data = encode_audio_to_base64(
+            generate_audio(request.text[:os.environ.get("MAX_CHARACTER", 4096)], request.format))
+        return {
+            "id": "audio_"+str(uuid.uuid4()),
+            "expires_at": int(time.time()) + os.environ.get("EXPIRES_AFTER_SECONDS", 24*3600),
+            "data": data,
+            "transcript": request.text
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail="Error processing the audio file: "+e)
+
+if __name__ == "__main__":
+    import uvicorn
+
+    # Print supported formats at startup
+
+    uvicorn.run(app, host="0.0.0.0", port=22312)
diff --git a/enterprises/ai-services/tts/requirements.txt b/enterprises/ai-services/tts/requirements.txt
@@ -0,0 +1,23 @@
+uuid
+fastapi
+uvicorn
+pydantic
+openai-whisper==20231117
+huggingface_hub
+IPython
+pyarrow
+matplotlib
+librosa
+soundfile
+datasets
+encodec
+boto3
+fire
+vector_quantize_pytorch
+webdataset
+whisperspeech
+--extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.2.0
+torchaudio==2.2.0
+numpy==1.26.4
+ffmpeg-python