Merge branch 'master' into feat-request-middleware

mudler · Nov 18, 2024 · d02a4b6 · d02a4b6
2 parents 51f861e + faf203e
commit d02a4b6
Show file tree

Hide file tree

Showing 24 changed files with 136 additions and 35 deletions.
diff --git a/Makefile b/Makefile
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=ae8de6d50a09d49545e0afab2e50cc4acfb280e2
+CPPLLAMA_VERSION?=ce2e59ba107cf71ed566040ff20a15d1c58e09c2
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=f19463ece2d43fd0b605dc513d8800eeb4e2315e
+WHISPER_CPP_VERSION?=01d3bd7d5ccd1956a7ddf1b57ee92d69f35aad93
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -45,6 +45,7 @@ CGO_LDFLAGS_WHISPER+=-lggml
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
+NATIVE?=false
 
 TEST_DIR=/tmp/test
 
@@ -83,6 +84,11 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
 
+# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+
 ifeq ($(OS),Darwin)
 
 	ifeq ($(OSX_SIGNING_IDENTITY),)
@@ -775,7 +781,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
-	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
 
 backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp

diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 transformers
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 grpcio-tools
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 packaging==24.1
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.67.1
+grpcio==1.68.0
 pillow
 protobuf
 certifi
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 wheel

diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 librosa==0.9.1
 faster-whisper==0.9.0

diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 librosa
 faster-whisper

diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 certifi
 llvmlite==0.43.0
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -2,5 +2,5 @@ torch==2.4.1
 accelerate
 transformers
 bitsandbytes
-sentence-transformers==3.3.0
+sentence-transformers==3.3.1
 transformers
diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 accelerate
-sentence-transformers==3.3.0
+sentence-transformers==3.3.1
 transformers
diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 torch==2.4.1
 accelerate
-sentence-transformers==3.3.0
+sentence-transformers==3.3.1
 transformers
diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.4.1+rocm6.0
 accelerate
-sentence-transformers==3.3.0
+sentence-transformers==3.3.1
 transformers
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
@@ -4,5 +4,5 @@ torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
-sentence-transformers==3.3.0
+sentence-transformers==3.3.1
 transformers
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 datasets

diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 scipy==1.14.0
 certifi
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
             git clone https://github.com/vllm-project/vllm
         fi
         pushd vllm
-            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.1 protobuf bitsandbytes
+            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.0 protobuf bitsandbytes
             uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
             VLLM_TARGET_DEVICE=cpu python setup.py install
         popd

diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 setuptools
diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -602,6 +602,28 @@
     - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
       sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
       uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg
+  name: "llama3.2-3b-shiningvaliant2-i1"
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.2-3B-ShiningValiant2
+    - https://huggingface.co/mradermacher/Llama3.2-3B-ShiningValiant2-i1-GGUF
+  description: |
+    Shining Valiant 2 is a chat model built on Llama 3.2 3b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
+
+        Finetuned on meta-llama/Llama-3.2-3B-Instruct for best available general performance
+        Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
+        Also available for Llama 3.1 70b and Llama 3.1 8b!
+
+    Version
+    This is the 2024-09-27 release of Shining Valiant 2 for Llama 3.2 3b.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+      sha256: 700521dc6a8a50e2d0bb5ccde12399209004155f9c68751aeac7feccf2cd4957
+      uri: huggingface://mradermacher/Llama3.2-3B-ShiningValiant2-i1-GGUF/Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
 - &qwen25
   ## Qwen2.5
   name: "qwen2.5-14b-instruct"
@@ -1242,8 +1264,8 @@
       model: calme-3.2-baguette-3b.Q4_K_M.gguf
   files:
     - filename: calme-3.2-baguette-3b.Q4_K_M.gguf
-      sha256: 336f17f88b954ff5ac9afefca348360d0d09639129659d45ab0605631a7c6c7e
       uri: huggingface://MaziyarPanahi/calme-3.2-baguette-3b-GGUF/calme-3.2-baguette-3b.Q4_K_M.gguf
+      sha256: 4e62fe0108643bbfd842add5a1bf199e9b81b0181309b15f483e1f07c2b5fbb2
 - !!merge <<: *qwen25
   icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
   name: "calme-3.1-baguette-3b"
@@ -1257,8 +1279,8 @@
       model: calme-3.1-baguette-3b.Q4_K_M.gguf
   files:
     - filename: calme-3.1-baguette-3b.Q4_K_M.gguf
-      sha256: 3839a1a24e0de4e0dc6a720f10c5bfa393fdcc0fc7fb01c3ab5ea311f2d188f2
       uri: huggingface://MaziyarPanahi/calme-3.1-baguette-3b-GGUF/calme-3.1-baguette-3b.Q4_K_M.gguf
+      sha256: 351058680d633749fa64efde205bd5f3d942aacada3204c594d9acfab2fc8774
 - !!merge <<: *qwen25
   name: "calme-3.3-qwenloi-3b"
   icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
@@ -1382,14 +1404,7 @@
   urls:
     - https://huggingface.co/Nexusflow/Athene-V2-Agent
     - https://huggingface.co/bartowski/Athene-V2-Agent-GGUF
-  description: |
-    Athene-V2-Agent is an open-source Agent LLM that surpasses the state-of-the-art in function calling and agentic capabilities.
-
-    💪 Versatile Agent Capability: Athene-V2-Agent is an agent model, capable of operating in environments with deeply nested dependencies with the environment. It is capable of reasoning and doing planning for trajectories with many tool calls necessary to answer a single query.
-
-    📊 Performance Highlights: Athene-V2-Agent surpasses GPT-4o in single FC tasks by 18% in function calling success rates, and by 17% in Agentic success rates.
-
-    🔧 Generalization to the Unseen: Athene-V2-Agent has never been trained on the functions or agentic settings used in evaluation.
+  description: "Athene-V2-Agent is an open-source Agent LLM that surpasses the state-of-the-art in function calling and agentic capabilities.\n\n\U0001F4AA Versatile Agent Capability: Athene-V2-Agent is an agent model, capable of operating in environments with deeply nested dependencies with the environment. It is capable of reasoning and doing planning for trajectories with many tool calls necessary to answer a single query.\n\n\U0001F4CA Performance Highlights: Athene-V2-Agent surpasses GPT-4o in single FC tasks by 18% in function calling success rates, and by 17% in Agentic success rates.\n\n\U0001F527 Generalization to the Unseen: Athene-V2-Agent has never been trained on the functions or agentic settings used in evaluation.\n"
   overrides:
     parameters:
       model: Athene-V2-Agent-Q4_K_M.gguf
@@ -1411,6 +1426,64 @@
     - filename: Athene-V2-Chat-Q4_K_M.gguf
       sha256: bda8b784ad55982891e5aa69b08ce4030c91a2e28ad9c4c35284d45d3c7aeb16
       uri: huggingface://bartowski/Athene-V2-Chat-GGUF/Athene-V2-Chat-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-7b-nerd-uncensored-v1.7"
+  urls:
+    - https://huggingface.co/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7
+    - https://huggingface.co/mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF
+  description: |
+    Model created by analyzing and selecting the optimal layers from other Qwen2.5-7B models based on their dimensional utilization efficiency, measured by the Normalized Effective Rank (NER). Computed like:
+    Input: Weight matrix for each model layer
+    Compute singular values σᵢ where σᵢ ≥ 0 # σᵢ represents the importance of each dimension
+    Filter values above numerical threshold (>1e-12)
+    Sum all singular values: S = Σσᵢ # S acts as normalization factor
+    Create probability distribution: pᵢ = σᵢ/S # converts singular values to probabilities summing to 1
+    Compute Shannon entropy: H = -Σ(pᵢ * log₂(pᵢ)) # measures information content
+    Calculate maximum possible entropy: H_max = log₂(n)
+    Final NER score = H/H_max # normalizes score to [0,1] range
+    Results in value between 0 and 1 for each model layer
+  overrides:
+    parameters:
+      model: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+      sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8
+      uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  icon: https://i.imgur.com/OxX2Usi.png
+  name: "evathene-v1.0"
+  urls:
+    - https://huggingface.co/sophosympatheia/Evathene-v1.0
+    - https://huggingface.co/bartowski/Evathene-v1.0-GGUF
+  description: |
+    This 72B parameter model is a merge of Nexusflow/Athene-V2-Chat with EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1. See the merge recipe below for details.
+
+    This model is uncensored. You are responsible for whatever you do with it.
+
+    This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.
+  overrides:
+    parameters:
+      model: Evathene-v1.0-Q4_K_M.gguf
+  files:
+    - filename: Evathene-v1.0-Q4_K_M.gguf
+      sha256: 96401ba9d798faa8a01f579b54523c5f75277e91bf1f0eee93db285f76f61e7e
+      uri: huggingface://bartowski/Evathene-v1.0-GGUF/Evathene-v1.0-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "miniclaus-qw1.5b-unamgs"
+  icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png
+  urls:
+    - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS
+    - https://huggingface.co/bartowski/miniclaus-qw1.5B-UNAMGS-GGUF
+  description: |
+    Trained with Magpie-Align/Magpie-Pro-MT-300K-v0.1
+    Using MGS & UNA (MLP) on this tiny but powerful model.
+  overrides:
+    parameters:
+      model: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+  files:
+    - filename: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+      sha256: a0dadd7147cc4a8e8df59659556e4d824ef5c26fd2f39381fe467b2ff9cc1289
+      uri: huggingface://bartowski/miniclaus-qw1.5B-UNAMGS-GGUF/miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
 - &archfunct
   license: apache-2.0
   tags:
@@ -2845,6 +2918,28 @@
     - filename: magnum-v2-4b.i1-Q4_K_M.gguf
       sha256: 692618059fee8870759d67d275ebc59bc0474b18ae3571b3ebdec8f9da786a64
       uri: huggingface://mradermacher/magnum-v2-4b-i1-GGUF/magnum-v2-4b.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-nemotron-sunfall-v0.7.0-i1"
+  urls:
+    - https://huggingface.co/crestf411/L3.1-nemotron-sunfall-v0.7.0
+    - https://huggingface.co/mradermacher/L3.1-nemotron-sunfall-v0.7.0-i1-GGUF
+  description: |
+    Significant revamping of the dataset metadata generation process, resulting in higher quality dataset overall. The "Diamond Law" experiment has been removed as it didn't seem to affect the model output enough to warrant set up complexity.
+    Recommended starting point:
+        Temperature: 1
+        MinP: 0.05~0.1
+        DRY: 0.8 1.75 2 0
+    At early context, I recommend keeping XTC disabled. Once you hit higher context sizes (10k+), enabling XTC at 0.1 / 0.5 seems to significantly improve the output, but YMMV. If the output drones on and is uninspiring, XTC can be extremely effective.
+    General heuristic:
+        Lots of slop? Temperature is too low. Raise it, or enable XTC. For early context, temp bump is probably preferred.
+        Is the model making mistakes about subtle or obvious details in the scene? Temperature is too high, OR XTC is enabled and/or XTC settings are too high. Lower temp and/or disable XTC.
+  overrides:
+    parameters:
+      model: L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+      sha256: f9aa88f3b220e35662a2d62d1f615a3b425e348a8f9e2939f05bf57385119f76
+      uri: huggingface://mradermacher/L3.1-nemotron-sunfall-v0.7.0-i1-GGUF/L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"