Merge branch 'master' into feat-hf-api-scan

mudler · Jun 19, 2024 · 23ad54b · 23ad54b
2 parents 60c589c + 4f030f9
commit 23ad54b
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 43 deletions.
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
@@ -46,7 +46,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,12 +119,12 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
@@ -64,7 +64,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -232,7 +232,7 @@ jobs:
             grpc-base-image: "ubuntu:22.04"
             runs-on: 'arc-runner-set'
             makeflags: "--jobs=3 --output-sync=target"
-  
+
   core-image-build:
     uses: ./.github/workflows/image_build.yml
     with:
@@ -273,7 +273,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11-core'
@@ -284,7 +284,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-core'
@@ -295,7 +295,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11-ffmpeg-core'
@@ -306,7 +306,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'

diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
@@ -19,11 +19,11 @@ on:
         type: string
       cuda-major-version:
         description: 'CUDA major version'
-        default: "11"
+        default: "12"
         type: string
       cuda-minor-version:
         description: 'CUDA minor version'
-        default: "7"
+        default: "5"
         type: string
       platforms:
         description: 'Platforms'
@@ -324,7 +324,7 @@ jobs:
           docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
           docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
           docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-  
+
       - name: job summary
         run: |
           echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -40,7 +40,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
         env:
-          CUDA_VERSION: 12-4
+          CUDA_VERSION: 12-5
       - name: Cache grpc
         id: cache-grpc
         uses: actions/cache@v4
@@ -167,26 +167,26 @@ jobs:
           ROCM_VERSION: "6.1"
           AMDGPU_VERSION: "6.1"
         run: |
-            set -ex 
+            set -ex
 
             sudo apt-get update
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg 
-            
-            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - 
-              
+            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
+
+            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
+
             printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
-            
+
             printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
             printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
             sudo apt-get update
 
             sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
                 hipblas-dev rocm-dev \
                 rocblas-dev
-          
+
             sudo apt-get clean
             sudo rm -rf /var/lib/apt/lists/*
-            sudo ldconfig 
+            sudo ldconfig
       - name: Cache grpc
         id: cache-grpc
         uses: actions/cache@v4
@@ -291,7 +291,7 @@ jobs:
           export C_INCLUDE_PATH=/usr/local/include
           export CPLUS_INCLUDE_PATH=/usr/local/include
           export PATH=$PATH:$GOPATH/bin
-          
+
           BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
       - uses: actions/upload-artifact@v4
         with:
@@ -309,4 +309,4 @@ jobs:
         with:
           detached: true
           connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          limit-access-to-actor: true
diff --git a/Dockerfile b/Dockerfile
@@ -33,7 +33,7 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta
 ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
 
 # Install grpc compilers
-RUN go install google.golang.org/protobuf/cmd/[email protected].1 && \
+RUN go install google.golang.org/protobuf/cmd/[email protected].2 && \
     go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
 
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
@@ -98,8 +98,8 @@ RUN pip install --user grpcio-tools
 FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 
 ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=11
-ARG CUDA_MINOR_VERSION=8
+ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MINOR_VERSION=5
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 
@@ -292,7 +292,7 @@ ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
 ENV MAKEFLAGS=${MAKEFLAGS}
 
-ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MAJOR_VERSION=12
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all

diff --git a/Makefile b/Makefile
@@ -400,7 +400,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -810,6 +810,17 @@ docker:
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
 
+docker-cuda11:
+	docker build \
+		--build-arg CUDA_MAJOR_VERSION=11 \
+		--build-arg CUDA_MINOR_VERSION=8 \
+		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
+		--build-arg GO_TAGS="$(GO_TAGS)" \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
+		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		-t $(DOCKER_IMAGE)-cuda11 .
+
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \

diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -27,10 +27,7 @@
   icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png
   urls:
     - https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-72b-gguf
-  description: |
-    Dolphin 2.9.2 Qwen2 72B 🐬
-
-    Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
+  description: "Dolphin 2.9.2 Qwen2 72B \U0001F42C\n\nCurated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations\n"
   overrides:
     parameters:
       model: dolphin-2.9.2-qwen2-Q4_K_M.gguf
@@ -40,10 +37,7 @@
       uri: huggingface://cognitivecomputations/dolphin-2.9.2-qwen2-72b-gguf/qwen2-Q4_K_M.gguf
 - !!merge <<: *qwen2
   name: "dolphin-2.9.2-qwen2-7b"
-  description: |
-    Dolphin 2.9.2 Qwen2 7B 🐬
-
-    Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
+  description: "Dolphin 2.9.2 Qwen2 7B \U0001F42C\n\nCurated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations\n"
   urls:
     - https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-7b
     - https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-7b-gguf
@@ -69,8 +63,8 @@
     - filename: Samantha-Qwen-2-7B-Q4_K_M.gguf
       sha256: 5d1cf1c35a7a46c536a96ba0417d08b9f9e09c24a4e25976f72ad55d4904f6fe
       uri: huggingface://bartowski/Samantha-Qwen-2-7B-GGUF/Samantha-Qwen-2-7B-Q4_K_M.gguf
-## START Mistral
 - &mistral03
+  ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
   name: "mistral-7b-instruct-v0.3"
   icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
@@ -244,9 +238,9 @@
     - https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF
     - https://huggingface.co/google/gemma-1.1-7b-it
   description: |
-      This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release.
+    This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release.
 
-      Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,".
+    Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,".
   overrides:
     parameters:
       model: gemma-1.1-7b-it-Q4_K_M.gguf
@@ -639,6 +633,25 @@
     - filename: duloxetine-4b-v1-Q4_K_M-imat.gguf
       uri: huggingface://Lewdiculous/duloxetine-4b-v1-GGUF-IQ-Imatrix/duloxetine-4b-v1-Q4_K_M-imat.gguf
       sha256: cd381f31c810ea8db2219e30701b3316085f5904c1ea3b116682518e82768c1a
+- !!merge <<: *llama3
+  name: "l3-umbral-mind-rp-v1.0-8b-iq-imatrix"
+  urls:
+    - https://huggingface.co/Lewdiculous/L3-Umbral-Mind-RP-v1.0-8B-GGUF-IQ-Imatrix
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/fEFozVCpNO9Q3Eb6LAA4i.webp
+  description: |
+    The goal of this merge was to make an RP model better suited for role-plays with heavy themes such as but not limited to:
+
+    Mental illness
+    Self-harm
+    Trauma
+    Suicide
+  overrides:
+    parameters:
+      model: L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf
+  files:
+    - filename: L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf
+      sha256: 2262eeba2d9de50884f4e298e4b55f1e4c653c3b33415ae9b3ee81dc3b8ec49a
+      uri: huggingface://Lewdiculous/L3-Umbral-Mind-RP-v1.0-8B-GGUF-IQ-Imatrix/L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf
 - !!merge <<: *llama3
   name: "llama-salad-8x8b"
   urls:
@@ -1146,8 +1159,8 @@
       model: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
   files:
     - filename: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
-      sha256: 9efb6ebc00de74012d0fb36134cce07d624a870fc12f38b16b57ce447b86e27e
       uri: huggingface://bartowski/Tess-v2.5-Phi-3-medium-128k-14B-GGUF/Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
+      sha256: 37267609552586bfae6b29bb1b5da7243863b1a8d49e3156229fb82c4407d17d
 - !!merge <<: *llama3
   name: "llama3-iterative-dpo-final"
   urls: