From 1379185d82f02676533a7729e0d77a6115686bce Mon Sep 17 00:00:00 2001 From: rchan Date: Wed, 12 Jun 2024 23:19:46 +0100 Subject: [PATCH 1/9] update dockerfiles and add app docker --- docker/app/Dockerfile | 14 ++++++++++++++ docker/create_index/Dockerfile | 2 +- docker/reginald/Dockerfile | 2 +- docker/slack_bot/Dockerfile | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 docker/app/Dockerfile diff --git a/docker/app/Dockerfile b/docker/app/Dockerfile new file mode 100644 index 00000000..05fd4d1b --- /dev/null +++ b/docker/app/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.11.4 + +WORKDIR /app + +# Setup Python prerequisites +RUN pip3 install --upgrade pip poetry setuptools wheel + +# Build Python project +COPY reginald reginald +COPY pyproject.toml . +COPY README.md . +RUN poetry install --extras api_bot + +CMD ["poetry", "run", "reginald", "app"] diff --git a/docker/create_index/Dockerfile b/docker/create_index/Dockerfile index e9de7ee7..8144f917 100644 --- a/docker/create_index/Dockerfile +++ b/docker/create_index/Dockerfile @@ -11,4 +11,4 @@ COPY pyproject.toml . COPY README.md . RUN poetry install -CMD ["poetry", "run", "reginald_create_index"] +CMD ["poetry", "run", "reginald", "create-index"] diff --git a/docker/reginald/Dockerfile b/docker/reginald/Dockerfile index 78471694..e36cb312 100644 --- a/docker/reginald/Dockerfile +++ b/docker/reginald/Dockerfile @@ -11,4 +11,4 @@ COPY pyproject.toml . COPY README.md . RUN poetry install -CMD ["poetry", "run", "reginald_run"] +CMD ["poetry", "run", "reginald", "run-all"] diff --git a/docker/slack_bot/Dockerfile b/docker/slack_bot/Dockerfile index 964c39f7..917e969b 100644 --- a/docker/slack_bot/Dockerfile +++ b/docker/slack_bot/Dockerfile @@ -11,4 +11,4 @@ COPY pyproject.toml . COPY README.md . RUN poetry install --extras api_bot -CMD ["poetry", "run", "reginald_run_api_bot"] +CMD ["poetry", "run", "reginald", "bot"] From f75fb3e5a49bdf21209d58acc96c152355c804b7 Mon Sep 17 00:00:00 2001 From: rchan Date: Wed, 12 Jun 2024 23:20:03 +0100 Subject: [PATCH 2/9] update pulumi main scripts --- azure/hack_week/__main__.py | 22 ++++++------ azure/production/__main__.py | 66 ++++++++++++++++++++++++++++++------ 2 files changed, 68 insertions(+), 20 deletions(-) diff --git a/azure/hack_week/__main__.py b/azure/hack_week/__main__.py index ee6c9b2f..64746b5d 100644 --- a/azure/hack_week/__main__.py +++ b/azure/hack_week/__main__.py @@ -7,6 +7,8 @@ storage, ) +from reginald.defaults import DEFAULT_ARGS + # Get some configuration variables stack_name = pulumi.get_stack() config = pulumi.Config() @@ -166,23 +168,23 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value="4096", + value=DEFAULT_ARGS["max_input_size"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value="3", + value=DEFAULT_ARGS["k"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value="512", + value=DEFAULT_ARGS["chunk_size"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value="0.1", + value=DEFAULT_ARGS["chunk_overlap_ratio"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value="512", + value=DEFAULT_ARGS["num_output"], ), containerinstance.EnvironmentVariableArgs( name="OPENAI_AZURE_API_BASE", @@ -253,23 +255,23 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value="4096", + value=DEFAULT_ARGS["max_input_size"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value="3", + value=DEFAULT_ARGS["k"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value="512", + value=DEFAULT_ARGS["chunk_size"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value="0.1", + value=DEFAULT_ARGS["chunk_overlap_ratio"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value="512", + value=DEFAULT_ARGS["num_output"], ), ], ports=[], diff --git a/azure/production/__main__.py b/azure/production/__main__.py index 8e5c56aa..315d41ed 100644 --- a/azure/production/__main__.py +++ b/azure/production/__main__.py @@ -7,6 +7,8 @@ storage, ) +from reginald.defaults import DEFAULT_ARGS + # Get some configuration variables stack_name = pulumi.get_stack() config = pulumi.Config() @@ -102,10 +104,6 @@ image="ghcr.io/alan-turing-institute/reginald_slackbot:main", name="reginald-production", # maximum of 63 characters environment_variables=[ - containerinstance.EnvironmentVariableArgs( - name="REGINALD_MODEL", - value="llama-index-llama-cpp", - ), containerinstance.EnvironmentVariableArgs( name="SLACK_APP_TOKEN", secure_value=config.get_secret("REGINALD_SLACK_APP_TOKEN"), @@ -154,29 +152,77 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value="4096", + value=DEFAULT_ARGS["max_input_size"], + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_K", + value=DEFAULT_ARGS["k"], + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_SIZE", + value=DEFAULT_ARGS["chunk_size"], + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", + value=DEFAULT_ARGS["chunk_overlap_ratio"], + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_NUM_OUTPUT", + value=DEFAULT_ARGS["num_output"], + ), + ], + ports=[], + resources=containerinstance.ResourceRequirementsArgs( + requests=containerinstance.ResourceRequestsArgs( + cpu=2, + memory_in_gb=16, + ), + ), + volume_mounts=[ + containerinstance.VolumeMountArgs( + mount_path="/app/data", + name="llama-data", + ), + ], + ), + # reg index creation container + containerinstance.ContainerArgs( + image="ghcr.io/alan-turing-institute/reginald_create_index:main", + name="reginald-create-index", # maximum of 63 characters + environment_variables=[ + containerinstance.EnvironmentVariableArgs( + name="GITHUB_TOKEN", + secure_value=config.get_secret("GITHUB_TOKEN"), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_WHICH_INDEX", + value="reg", + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_MAX_INPUT_SIZE", + value=DEFAULT_ARGS["max_input_size"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value="3", + value=DEFAULT_ARGS["k"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value="512", + value=DEFAULT_ARGS["chunk_size"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value="0.1", + value=DEFAULT_ARGS["chunk_overlap_ratio"], ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value="512", + value=DEFAULT_ARGS["num_output"], ), ], ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=4, + cpu=2, memory_in_gb=16, ), ), From 7f6c40290a3dbbac5043c0011f07b1fa96250cdd Mon Sep 17 00:00:00 2001 From: rchan Date: Wed, 12 Jun 2024 23:33:15 +0100 Subject: [PATCH 3/9] rename docker and how they get uploaded to github --- .github/workflows/build_docker_image.yaml | 6 ++++-- azure/hack_week/__main__.py | 4 ++-- docker/{app => reginald_app}/Dockerfile | 0 docker/{reginald => run_all}/Dockerfile | 0 4 files changed, 6 insertions(+), 4 deletions(-) rename docker/{app => reginald_app}/Dockerfile (100%) rename docker/{reginald => run_all}/Dockerfile (100%) diff --git a/.github/workflows/build_docker_image.yaml b/.github/workflows/build_docker_image.yaml index c26b1d48..f30dd748 100644 --- a/.github/workflows/build_docker_image.yaml +++ b/.github/workflows/build_docker_image.yaml @@ -20,12 +20,14 @@ jobs: fail-fast: false matrix: include: - - dockerfile: docker/reginald/Dockerfile - image: ghcr.io/${{ github.repository }}_reginald + - dockerfile: docker/run_all/Dockerfile + image: ghcr.io/${{ github.repository }}_run_all - dockerfile: docker/slack_bot/Dockerfile image: ghcr.io/${{ github.repository }}_slackbot - dockerfile: docker/create_index/Dockerfile image: ghcr.io/${{ github.repository }}_create_index + - dockerfile: docker/reginald_app/Dockerfile + image: ghcr.io/${{ github.repository }}_app permissions: packages: write contents: read diff --git a/azure/hack_week/__main__.py b/azure/hack_week/__main__.py index 64746b5d..f2c6628d 100644 --- a/azure/hack_week/__main__.py +++ b/azure/hack_week/__main__.py @@ -100,7 +100,7 @@ containers=[ # Reginald chat completion container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_reginald:main", + image="ghcr.io/alan-turing-institute/reginald_run_all:main", name="reginald-completion", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -143,7 +143,7 @@ ), # Reginald (public) container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_reginald:main", + image="ghcr.io/alan-turing-institute/reginald_run_all:main", name="reginald-gpt-azure", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( diff --git a/docker/app/Dockerfile b/docker/reginald_app/Dockerfile similarity index 100% rename from docker/app/Dockerfile rename to docker/reginald_app/Dockerfile diff --git a/docker/reginald/Dockerfile b/docker/run_all/Dockerfile similarity index 100% rename from docker/reginald/Dockerfile rename to docker/run_all/Dockerfile From 07178128eff1247f9d46a52e07ad3f81c6d923ea Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 13 Jun 2024 04:11:23 +0100 Subject: [PATCH 4/9] bump action versions --- .github/workflows/build_docker_image.yaml | 11 +++++++---- .github/workflows/lint_code.yaml | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_docker_image.yaml b/.github/workflows/build_docker_image.yaml index f30dd748..cef363ba 100644 --- a/.github/workflows/build_docker_image.yaml +++ b/.github/workflows/build_docker_image.yaml @@ -34,10 +34,13 @@ jobs: steps: - name: Check out the repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - name: Log in to the Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} @@ -45,12 +48,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: ${{ matrix.image }} - name: Build and push Docker images - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: file: ${{ matrix.dockerfile }} push: true diff --git a/.github/workflows/lint_code.yaml b/.github/workflows/lint_code.yaml index bf783801..4d67c065 100644 --- a/.github/workflows/lint_code.yaml +++ b/.github/workflows/lint_code.yaml @@ -13,7 +13,7 @@ jobs: name: Lint with pre-commit runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.11 From c27b3961d43858b9aa1b254bdef330677b860a3b Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 13 Jun 2024 07:21:54 +0100 Subject: [PATCH 5/9] update action versions for uploading docker workflow --- .github/workflows/build_docker_image.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_docker_image.yaml b/.github/workflows/build_docker_image.yaml index cef363ba..c93daf04 100644 --- a/.github/workflows/build_docker_image.yaml +++ b/.github/workflows/build_docker_image.yaml @@ -36,11 +36,8 @@ jobs: - name: Check out the repo uses: actions/checkout@v4 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Log in to the Container registry - uses: docker/login-action@v3 + uses: docker/login-action@v3.2.0 with: registry: ghcr.io username: ${{ github.actor }} @@ -48,12 +45,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v5.5.0 with: images: ${{ matrix.image }} - name: Build and push Docker images - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v5.4.0 with: file: ${{ matrix.dockerfile }} push: true From 55c9f3db9c5aac392bb4415111fb0c1e0d97209a Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 13 Jun 2024 08:06:51 +0100 Subject: [PATCH 6/9] hopefully smaller and faster docker builds --- docker/create_index/Dockerfile | 18 +++++++++++++----- docker/reginald_app/Dockerfile | 16 ++++++++++++---- docker/run_all/Dockerfile | 16 ++++++++++++---- docker/slack_bot/Dockerfile | 16 ++++++++++++---- 4 files changed, 49 insertions(+), 17 deletions(-) diff --git a/docker/create_index/Dockerfile b/docker/create_index/Dockerfile index 8144f917..0840e7e8 100644 --- a/docker/create_index/Dockerfile +++ b/docker/create_index/Dockerfile @@ -1,14 +1,22 @@ -FROM python:3.11.4 +FROM python:3.11.9 WORKDIR /app # Setup Python prerequisites -RUN pip3 install --upgrade pip poetry setuptools wheel +RUN pip3 install --upgrade pip poetry==1.8.3 setuptools wheel + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache # Build Python project -COPY reginald reginald COPY pyproject.toml . -COPY README.md . -RUN poetry install +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --without dev CMD ["poetry", "run", "reginald", "create-index"] diff --git a/docker/reginald_app/Dockerfile b/docker/reginald_app/Dockerfile index 05fd4d1b..097e58b3 100644 --- a/docker/reginald_app/Dockerfile +++ b/docker/reginald_app/Dockerfile @@ -1,14 +1,22 @@ -FROM python:3.11.4 +FROM python:3.11.9 WORKDIR /app # Setup Python prerequisites RUN pip3 install --upgrade pip poetry setuptools wheel +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + # Build Python project -COPY reginald reginald COPY pyproject.toml . -COPY README.md . -RUN poetry install --extras api_bot +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --extras api_bot --without dev CMD ["poetry", "run", "reginald", "app"] diff --git a/docker/run_all/Dockerfile b/docker/run_all/Dockerfile index e36cb312..0bdeaf04 100644 --- a/docker/run_all/Dockerfile +++ b/docker/run_all/Dockerfile @@ -1,14 +1,22 @@ -FROM python:3.11.4 +FROM python:3.11.9 WORKDIR /app # Setup Python prerequisites RUN pip3 install --upgrade pip poetry setuptools wheel +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + # Build Python project -COPY reginald reginald COPY pyproject.toml . -COPY README.md . -RUN poetry install +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --without dev CMD ["poetry", "run", "reginald", "run-all"] diff --git a/docker/slack_bot/Dockerfile b/docker/slack_bot/Dockerfile index 917e969b..05c07306 100644 --- a/docker/slack_bot/Dockerfile +++ b/docker/slack_bot/Dockerfile @@ -1,14 +1,22 @@ -FROM python:3.11.4 +FROM python:3.11.9 WORKDIR /app # Setup Python prerequisites RUN pip3 install --upgrade pip poetry setuptools wheel +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + # Build Python project -COPY reginald reginald COPY pyproject.toml . -COPY README.md . -RUN poetry install --extras api_bot +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --extras api_bot --without dev CMD ["poetry", "run", "reginald", "bot"] From bfd56ee0d193bcc9eafba445327c0c38a601f3ca Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 13 Jun 2024 12:13:47 +0100 Subject: [PATCH 7/9] draft pulumi scripts --- azure/production/__main__.py | 90 ++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 19 deletions(-) diff --git a/azure/production/__main__.py b/azure/production/__main__.py index 315d41ed..8d133c3a 100644 --- a/azure/production/__main__.py +++ b/azure/production/__main__.py @@ -101,9 +101,13 @@ containers=[ # api-bot container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_slackbot:main", + image="ghcr.io/alan-turing-institute/reginald_slackbot:pulumi-reg-2pt0-update", name="reginald-production", # maximum of 63 characters environment_variables=[ + containerinstance.EnvironmentVariableArgs( + name="REGINALD_MODEL", + value="llama-index-llama-cpp", + ), containerinstance.EnvironmentVariableArgs( name="SLACK_APP_TOKEN", secure_value=config.get_secret("REGINALD_SLACK_APP_TOKEN"), @@ -139,8 +143,8 @@ containers=[ # all_data index creation container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_create_index:main", - name="reginald-create-index", # maximum of 63 characters + image="ghcr.io/alan-turing-institute/reginald_create_index:pulumi-reg-2pt0-update", + name="reginald-create-index-all-data", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( name="GITHUB_TOKEN", @@ -152,30 +156,30 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value=DEFAULT_ARGS["max_input_size"], + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value=DEFAULT_ARGS["k"], + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value=DEFAULT_ARGS["chunk_size"], + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value=DEFAULT_ARGS["chunk_overlap_ratio"], + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value=DEFAULT_ARGS["num_output"], + value=str(DEFAULT_ARGS["num_output"]), ), ], ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( cpu=2, - memory_in_gb=16, + memory_in_gb=8, ), ), volume_mounts=[ @@ -187,8 +191,8 @@ ), # reg index creation container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_create_index:main", - name="reginald-create-index", # maximum of 63 characters + image="ghcr.io/alan-turing-institute/reginald_create_index:pulumi-reg-2pt0-update", + name="reginald-create-index-reg", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( name="GITHUB_TOKEN", @@ -200,30 +204,78 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value=DEFAULT_ARGS["max_input_size"], + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value=DEFAULT_ARGS["k"], + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value=DEFAULT_ARGS["chunk_size"], + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value=DEFAULT_ARGS["chunk_overlap_ratio"], + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value=DEFAULT_ARGS["num_output"], + value=str(DEFAULT_ARGS["num_output"]), ), ], ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=2, - memory_in_gb=16, + cpu=1, + memory_in_gb=4, + ), + ), + volume_mounts=[ + containerinstance.VolumeMountArgs( + mount_path="/app/data", + name="llama-data", + ), + ], + ), + # public index creation container + containerinstance.ContainerArgs( + image="ghcr.io/alan-turing-institute/reginald_create_index:pulumi-reg-2pt0-update", + name="reginald-create-index-public", # maximum of 63 characters + environment_variables=[ + containerinstance.EnvironmentVariableArgs( + name="GITHUB_TOKEN", + secure_value=config.get_secret("GITHUB_TOKEN"), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_WHICH_INDEX", + value="public", + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_MAX_INPUT_SIZE", + value=str(DEFAULT_ARGS["max_input_size"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_K", + value=str(DEFAULT_ARGS["k"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_SIZE", + value=str(DEFAULT_ARGS["chunk_size"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_NUM_OUTPUT", + value=str(DEFAULT_ARGS["num_output"]), + ), + ], + ports=[], + resources=containerinstance.ResourceRequirementsArgs( + requests=containerinstance.ResourceRequestsArgs( + cpu=1, + memory_in_gb=4, ), ), volume_mounts=[ @@ -236,7 +288,7 @@ ], os_type=containerinstance.OperatingSystemTypes.LINUX, resource_group_name=resource_group.name, - restart_policy=containerinstance.ContainerGroupRestartPolicy.NEVER, + restart_policy=containerinstance.ContainerGroupRestartPolicy.ON_FAILURE, sku=containerinstance.ContainerGroupSku.STANDARD, volumes=[ containerinstance.VolumeArgs( From f14616f7886d4a8c098a8e4496df543bc16935d6 Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 13 Jun 2024 12:16:25 +0100 Subject: [PATCH 8/9] reduce batch size of sentence-transformer --- reginald/models/llama_index/llama_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reginald/models/llama_index/llama_utils.py b/reginald/models/llama_index/llama_utils.py index 65e5a593..75209a1b 100644 --- a/reginald/models/llama_index/llama_utils.py +++ b/reginald/models/llama_index/llama_utils.py @@ -91,7 +91,7 @@ def setup_settings( # initialise embedding model to use to create the index vectors embed_model = HuggingFaceEmbedding( model_name="sentence-transformers/all-mpnet-base-v2", - embed_batch_size=128, + embed_batch_size=64, ) # construct the prompt helper From cf48e58c30274e0c42b371e685989e39f539cfec Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 13 Jun 2024 16:47:49 +0100 Subject: [PATCH 9/9] make pulumi pull from main --- azure/hack_week/__main__.py | 28 ++++++++++++++-------------- azure/production/__main__.py | 14 +++++++------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/azure/hack_week/__main__.py b/azure/hack_week/__main__.py index f2c6628d..3b014530 100644 --- a/azure/hack_week/__main__.py +++ b/azure/hack_week/__main__.py @@ -168,23 +168,23 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value=DEFAULT_ARGS["max_input_size"], + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value=DEFAULT_ARGS["k"], + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value=DEFAULT_ARGS["chunk_size"], + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value=DEFAULT_ARGS["chunk_overlap_ratio"], + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value=DEFAULT_ARGS["num_output"], + value=str(DEFAULT_ARGS["num_output"]), ), containerinstance.EnvironmentVariableArgs( name="OPENAI_AZURE_API_BASE", @@ -206,8 +206,8 @@ ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=1, - memory_in_gb=12, + cpu=3, + memory_in_gb=14, ), ), volume_mounts=[ @@ -255,30 +255,30 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value=DEFAULT_ARGS["max_input_size"], + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value=DEFAULT_ARGS["k"], + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value=DEFAULT_ARGS["chunk_size"], + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value=DEFAULT_ARGS["chunk_overlap_ratio"], + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value=DEFAULT_ARGS["num_output"], + value=str(DEFAULT_ARGS["num_output"]), ), ], ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=4, - memory_in_gb=16, + cpu=1, + memory_in_gb=8, ), ), volume_mounts=[ diff --git a/azure/production/__main__.py b/azure/production/__main__.py index 8d133c3a..1bf31f94 100644 --- a/azure/production/__main__.py +++ b/azure/production/__main__.py @@ -101,7 +101,7 @@ containers=[ # api-bot container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_slackbot:pulumi-reg-2pt0-update", + image="ghcr.io/alan-turing-institute/reginald_slackbot:main", name="reginald-production", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -143,7 +143,7 @@ containers=[ # all_data index creation container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_create_index:pulumi-reg-2pt0-update", + image="ghcr.io/alan-turing-institute/reginald_create_index:main", name="reginald-create-index-all-data", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -179,7 +179,7 @@ resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( cpu=2, - memory_in_gb=8, + memory_in_gb=6, ), ), volume_mounts=[ @@ -191,7 +191,7 @@ ), # reg index creation container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_create_index:pulumi-reg-2pt0-update", + image="ghcr.io/alan-turing-institute/reginald_create_index:main", name="reginald-create-index-reg", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -227,7 +227,7 @@ resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( cpu=1, - memory_in_gb=4, + memory_in_gb=5, ), ), volume_mounts=[ @@ -239,7 +239,7 @@ ), # public index creation container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_create_index:pulumi-reg-2pt0-update", + image="ghcr.io/alan-turing-institute/reginald_create_index:main", name="reginald-create-index-public", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -275,7 +275,7 @@ resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( cpu=1, - memory_in_gb=4, + memory_in_gb=5, ), ), volume_mounts=[