Merge branch 'mudler:master' into master

abdulrahman305 · Oct 16, 2024 · 6d7dbe6 · 6d7dbe6
2 parents d3d8062 + 5f130fe
commit 6d7dbe6
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 23 deletions.
diff --git a/Makefile b/Makefile
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d4c19c0f5cdb1e512573e8c86c79e8d0238c73c4
+CPPLLAMA_VERSION?=a89f75e1b7b90cb2d4d4c52ca53ef9e9b466aa45
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
+WHISPER_CPP_VERSION?=b6049060dd2341b7816d2bce7dc7451c1665828e
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md
@@ -28,5 +28,6 @@ The list below is a list of software that integrates with LocalAI.
 - https://github.com/cedriking/spark
 - [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
 - [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
+- [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases
 
 Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -3,21 +3,21 @@ aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==24.2.0
 certifi==2024.8.30
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
 colorama==0.4.6
 dataclasses-json==0.6.7
 debugpy==1.8.7
 frozenlist==1.4.1
 greenlet==3.1.1
 idna==3.10
 langchain==0.3.3
-langchain-community==0.3.1
+langchain-community==0.3.2
 marshmallow==3.22.0
 marshmallow-enum==1.5.1
 multidict==6.1.0
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.1.1
+numpy==2.1.2
 openai==1.51.2
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
@@ -30,4 +30,4 @@ tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.3
-yarl==1.15.1
+yarl==1.15.2
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
@@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 
 	// No GPU found or no specific binaries found, try to load the CPU variant(s)
 
-	// Select the Fallback by default
-	selectedProcess := backendPath(assetDir, LLamaCPPFallback)
+	// Select a binary based on availability/capability
+	selectedProcess := ""
+
+	// Check if we have a native build (llama-cpp) and use that
+	if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
+		log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
+		selectedProcess = backendPath(assetDir, LLamaCPPFallback)
+	}
+
+	// Check if we have a native build (llama-cpp) and use that instead
+	// As a reminder, we do ultimately attempt again with the fallback variant
+	// If things fail with what we select here
+	if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
+		log.Debug().Msgf("[%s] attempting to load with native variant", backend)
+		selectedProcess = backendPath(assetDir, LLamaCPP)
+	}
 
 	// IF we find any optimized binary, we use that
 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
@@ -269,14 +283,29 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 		}
 	}
 
-	// Check if the binary exists!
+	// Safety measure: check if the binary exists otherwise return empty string
 	if _, err := os.Stat(selectedProcess); err == nil {
 		return selectedProcess
 	}
 
 	return ""
 }
 
+func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
+	// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
+	// We failed somehow starting the binary. For instance, could be that we are missing
+	// some libraries if running in binary-only mode.
+	// In this case, we attempt to load the model with the fallback variant.
+
+	// If not llama-cpp backend, return the error immediately
+	if backend != LLamaCPP {
+		return nil, err
+	}
+
+	log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
+	return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
+}
+
 // starts the grpcModelProcess for the backend, and returns a grpc client
 // It also loads the model
 func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
@@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
 
 	model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
 	if err != nil {
-		// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
-		// We failed somehow starting the binary. For instance, could be that we are missing
-		// some libraries if running in binary-only mode.
-		// In this case, we attempt to load the model with the fallback variant.
-
-		// If not llama-cpp backend, return error immediately
-		if backend != LLamaCPP {
-			return nil, err
-		}
-
-		// Otherwise attempt with fallback
-		log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback)
-		model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
+		model, err = attemptLoadingOnFailure(backend, ml, o, err)
 		if err != nil {
 			return nil, err
 		}