Skip to content

Commit

Permalink
Merge branch 'mudler:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
abdulrahman305 authored Oct 16, 2024
2 parents d3d8062 + 5f130fe commit 6d7dbe6
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 23 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=d4c19c0f5cdb1e512573e8c86c79e8d0238c73c4
CPPLLAMA_VERSION?=a89f75e1b7b90cb2d4d4c52ca53ef9e9b466aa45

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
WHISPER_CPP_VERSION?=b6049060dd2341b7816d2bce7dc7451c1665828e

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down
1 change: 1 addition & 0 deletions docs/content/docs/integrations.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ The list below is a list of software that integrates with LocalAI.
- https://github.com/cedriking/spark
- [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
- [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
- [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases

Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
2 changes: 1 addition & 1 deletion docs/themes/hugo-theme-relearn
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@ aiosignal==1.3.1
async-timeout==4.0.3
attrs==24.2.0
certifi==2024.8.30
charset-normalizer==3.3.2
charset-normalizer==3.4.0
colorama==0.4.6
dataclasses-json==0.6.7
debugpy==1.8.7
frozenlist==1.4.1
greenlet==3.1.1
idna==3.10
langchain==0.3.3
langchain-community==0.3.1
langchain-community==0.3.2
marshmallow==3.22.0
marshmallow-enum==1.5.1
multidict==6.1.0
mypy-extensions==1.0.0
numexpr==2.10.1
numpy==2.1.1
numpy==2.1.2
openai==1.51.2
openapi-schema-pydantic==1.2.4
packaging>=23.2
Expand All @@ -30,4 +30,4 @@ tqdm==4.66.5
typing-inspect==0.9.0
typing_extensions==4.12.2
urllib3==2.2.3
yarl==1.15.1
yarl==1.15.2
49 changes: 33 additions & 16 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str

// No GPU found or no specific binaries found, try to load the CPU variant(s)

// Select the Fallback by default
selectedProcess := backendPath(assetDir, LLamaCPPFallback)
// Select a binary based on availability/capability
selectedProcess := ""

// Check if we have a native build (llama-cpp) and use that
if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
selectedProcess = backendPath(assetDir, LLamaCPPFallback)
}

// Check if we have a native build (llama-cpp) and use that instead
// As a reminder, we do ultimately attempt again with the fallback variant
// If things fail with what we select here
if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
log.Debug().Msgf("[%s] attempting to load with native variant", backend)
selectedProcess = backendPath(assetDir, LLamaCPP)
}

// IF we find any optimized binary, we use that
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
Expand All @@ -269,14 +283,29 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
}
}

// Check if the binary exists!
// Safety measure: check if the binary exists otherwise return empty string
if _, err := os.Stat(selectedProcess); err == nil {
return selectedProcess
}

return ""
}

func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
// We failed somehow starting the binary. For instance, could be that we are missing
// some libraries if running in binary-only mode.
// In this case, we attempt to load the model with the fallback variant.

// If not llama-cpp backend, return the error immediately
if backend != LLamaCPP {
return nil, err
}

log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
}

// starts the grpcModelProcess for the backend, and returns a grpc client
// It also loads the model
func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
Expand Down Expand Up @@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e

model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
if err != nil {
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
// We failed somehow starting the binary. For instance, could be that we are missing
// some libraries if running in binary-only mode.
// In this case, we attempt to load the model with the fallback variant.

// If not llama-cpp backend, return error immediately
if backend != LLamaCPP {
return nil, err
}

// Otherwise attempt with fallback
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback)
model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
model, err = attemptLoadingOnFailure(backend, ml, o, err)
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 6d7dbe6

Please sign in to comment.