Skip to content

Commit

Permalink
Merge branch 'master' into feat-hf-api-scan
Browse files Browse the repository at this point in the history
  • Loading branch information
dave-gray101 authored Jun 19, 2024
2 parents 60c589c + 4f030f9 commit 23ad54b
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 43 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/image-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
Expand Down Expand Up @@ -119,12 +119,12 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
makeflags: "--jobs=4 --output-sync=target"
18 changes: 9 additions & 9 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
Expand All @@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
Expand All @@ -86,7 +86,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
Expand All @@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
Expand Down Expand Up @@ -232,7 +232,7 @@ jobs:
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"

core-image-build:
uses: ./.github/workflows/image_build.yml
with:
Expand Down Expand Up @@ -273,7 +273,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
Expand All @@ -284,7 +284,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
Expand All @@ -295,7 +295,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
Expand All @@ -306,7 +306,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
cuda-minor-version: "5"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/image_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ on:
type: string
cuda-major-version:
description: 'CUDA major version'
default: "11"
default: "12"
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "7"
default: "5"
type: string
platforms:
description: 'Platforms'
Expand Down Expand Up @@ -324,7 +324,7 @@ jobs:
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
Expand Down
22 changes: 11 additions & 11 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
env:
CUDA_VERSION: 12-4
CUDA_VERSION: 12-5
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
Expand Down Expand Up @@ -167,26 +167,26 @@ jobs:
ROCM_VERSION: "6.1"
AMDGPU_VERSION: "6.1"
run: |
set -ex
set -ex
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \
rocblas-dev
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig
sudo ldconfig
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
Expand Down Expand Up @@ -291,7 +291,7 @@ jobs:
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
- uses: actions/upload-artifact@v4
with:
Expand All @@ -309,4 +309,4 @@ jobs:
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
limit-access-to-actor: true
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin

# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/[email protected].1 && \
RUN go install google.golang.org/protobuf/cmd/[email protected].2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af

COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
Expand Down Expand Up @@ -98,8 +98,8 @@ RUN pip install --user grpcio-tools
FROM requirements-${IMAGE_TYPE} AS requirements-drivers

ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=8
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=5

ENV BUILD_TYPE=${BUILD_TYPE}

Expand Down Expand Up @@ -292,7 +292,7 @@ ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS}

ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MAJOR_VERSION=12
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
Expand Down
13 changes: 12 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .

run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
Expand Down Expand Up @@ -810,6 +810,17 @@ docker:
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) .

docker-cuda11:
docker build \
--build-arg CUDA_MAJOR_VERSION=11 \
--build-arg CUDA_MINOR_VERSION=8 \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE)-cuda11 .

docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \
Expand Down
37 changes: 25 additions & 12 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@
icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png
urls:
- https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-72b-gguf
description: |
Dolphin 2.9.2 Qwen2 72B 🐬
Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
description: "Dolphin 2.9.2 Qwen2 72B \U0001F42C\n\nCurated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations\n"
overrides:
parameters:
model: dolphin-2.9.2-qwen2-Q4_K_M.gguf
Expand All @@ -40,10 +37,7 @@
uri: huggingface://cognitivecomputations/dolphin-2.9.2-qwen2-72b-gguf/qwen2-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "dolphin-2.9.2-qwen2-7b"
description: |
Dolphin 2.9.2 Qwen2 7B 🐬
Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
description: "Dolphin 2.9.2 Qwen2 7B \U0001F42C\n\nCurated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations\n"
urls:
- https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-7b
- https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-7b-gguf
Expand All @@ -69,8 +63,8 @@
- filename: Samantha-Qwen-2-7B-Q4_K_M.gguf
sha256: 5d1cf1c35a7a46c536a96ba0417d08b9f9e09c24a4e25976f72ad55d4904f6fe
uri: huggingface://bartowski/Samantha-Qwen-2-7B-GGUF/Samantha-Qwen-2-7B-Q4_K_M.gguf
## START Mistral
- &mistral03
## START Mistral
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
name: "mistral-7b-instruct-v0.3"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
Expand Down Expand Up @@ -244,9 +238,9 @@
- https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF
- https://huggingface.co/google/gemma-1.1-7b-it
description: |
This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release.
This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release.
Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,".
Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,".
overrides:
parameters:
model: gemma-1.1-7b-it-Q4_K_M.gguf
Expand Down Expand Up @@ -639,6 +633,25 @@
- filename: duloxetine-4b-v1-Q4_K_M-imat.gguf
uri: huggingface://Lewdiculous/duloxetine-4b-v1-GGUF-IQ-Imatrix/duloxetine-4b-v1-Q4_K_M-imat.gguf
sha256: cd381f31c810ea8db2219e30701b3316085f5904c1ea3b116682518e82768c1a
- !!merge <<: *llama3
name: "l3-umbral-mind-rp-v1.0-8b-iq-imatrix"
urls:
- https://huggingface.co/Lewdiculous/L3-Umbral-Mind-RP-v1.0-8B-GGUF-IQ-Imatrix
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/fEFozVCpNO9Q3Eb6LAA4i.webp
description: |
The goal of this merge was to make an RP model better suited for role-plays with heavy themes such as but not limited to:
Mental illness
Self-harm
Trauma
Suicide
overrides:
parameters:
model: L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf
files:
- filename: L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf
sha256: 2262eeba2d9de50884f4e298e4b55f1e4c653c3b33415ae9b3ee81dc3b8ec49a
uri: huggingface://Lewdiculous/L3-Umbral-Mind-RP-v1.0-8B-GGUF-IQ-Imatrix/L3-Umbral-Mind-RP-v1.0-8B-Q4_K_M-imat.gguf
- !!merge <<: *llama3
name: "llama-salad-8x8b"
urls:
Expand Down Expand Up @@ -1146,8 +1159,8 @@
model: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
files:
- filename: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
sha256: 9efb6ebc00de74012d0fb36134cce07d624a870fc12f38b16b57ce447b86e27e
uri: huggingface://bartowski/Tess-v2.5-Phi-3-medium-128k-14B-GGUF/Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
sha256: 37267609552586bfae6b29bb1b5da7243863b1a8d49e3156229fb82c4407d17d
- !!merge <<: *llama3
name: "llama3-iterative-dpo-final"
urls:
Expand Down

0 comments on commit 23ad54b

Please sign in to comment.