diff --git a/.github/ISSUE_TEMPLATE/01-bug-low.yml b/.github/ISSUE_TEMPLATE/01-bug-low.yml
new file mode 100644
index 0000000000..bc9fb29783
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/01-bug-low.yml
@@ -0,0 +1,59 @@
+name: Low Severity Bugs
+description: Used to report low severity bugs in llamafiles (e.g. cosmetic issues, non critical UI glitches)
+title: "Bug: "
+labels: ["bug", "low severity"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to fill out this bug report!
+ Please include information about your system, the steps to reproduce the bug,
+ and the version of llamafiles that you are using.
+ If possible, please provide a minimal code example that reproduces the bug.
+ You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
+ for additional technical logging that may allow us to narrow down where the fault occurred.
+ - type: input
+ id: contact
+ attributes:
+ label: Contact Details
+ description: How can we get in touch with you if we need more info?
+ placeholder: ex. email@example.com
+ validations:
+ required: false
+ - type: textarea
+ id: what-happened
+ attributes:
+ label: What happened?
+ description: Also tell us, what did you expect to happen?
+ placeholder: Tell us what you see!
+ validations:
+ required: true
+ - type: textarea
+ id: version
+ attributes:
+ label: Version
+ description: What version of our software are you running? (use `--version` to get a version string)
+ placeholder: "llamafile v0.8.4"
+ validations:
+ required: true
+ - type: dropdown
+ id: operating-system
+ attributes:
+ label: What operating system are you seeing the problem on?
+ multiple: true
+ options:
+ - Linux
+ - Mac
+ - Windows
+ - FreeBSD
+ - OpenBSD
+ - NetBSD
+ - BIOS
+ validations:
+ required: false
+ - type: textarea
+ id: logs
+ attributes:
+ label: Relevant log output
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+ render: shell
diff --git a/.github/ISSUE_TEMPLATE/02-bug-medium.yml b/.github/ISSUE_TEMPLATE/02-bug-medium.yml
new file mode 100644
index 0000000000..2b8241de9f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/02-bug-medium.yml
@@ -0,0 +1,59 @@
+name: Medium Severity Bug
+description: Used to report medium severity bugs in llamafiles (e.g. Malfunctioning Features but generally still useable)
+title: "Bug: "
+labels: ["bug", "medium severity"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to fill out this bug report!
+ Please include information about your system, the steps to reproduce the bug,
+ and the version of llamafiles that you are using.
+ If possible, please provide a minimal code example that reproduces the bug.
+ You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
+ for additional technical logging that may allow us to narrow down where the fault occurred.
+ - type: input
+ id: contact
+ attributes:
+ label: Contact Details
+ description: How can we get in touch with you if we need more info?
+ placeholder: ex. email@example.com
+ validations:
+ required: false
+ - type: textarea
+ id: what-happened
+ attributes:
+ label: What happened?
+ description: Also tell us, what did you expect to happen?
+ placeholder: Tell us what you see!
+ validations:
+ required: true
+ - type: textarea
+ id: version
+ attributes:
+ label: Version
+ description: What version of our software are you running? (use `--version` to get a version string)
+ placeholder: "llamafile v0.8.4"
+ validations:
+ required: true
+ - type: dropdown
+ id: operating-system
+ attributes:
+ label: What operating system are you seeing the problem on?
+ multiple: true
+ options:
+ - Linux
+ - Mac
+ - Windows
+ - FreeBSD
+ - OpenBSD
+ - NetBSD
+ - BIOS
+ validations:
+ required: false
+ - type: textarea
+ id: logs
+ attributes:
+ label: Relevant log output
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+ render: shell
diff --git a/.github/ISSUE_TEMPLATE/03-bug-high.yml b/.github/ISSUE_TEMPLATE/03-bug-high.yml
new file mode 100644
index 0000000000..49abc44595
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/03-bug-high.yml
@@ -0,0 +1,59 @@
+name: High Severity Bug
+description: Used to report high severity bugs in llamafiles (e.g. Malfunctioning features hindering important common workflow)
+title: "Bug: "
+labels: ["bug", "high severity"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to fill out this bug report!
+ Please include information about your system, the steps to reproduce the bug,
+ and the version of llamafiles that you are using.
+ If possible, please provide a minimal code example that reproduces the bug.
+ You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
+ for additional technical logging that may allow us to narrow down where the fault occurred.
+ - type: input
+ id: contact
+ attributes:
+ label: Contact Details
+ description: How can we get in touch with you if we need more info?
+ placeholder: ex. email@example.com
+ validations:
+ required: false
+ - type: textarea
+ id: what-happened
+ attributes:
+ label: What happened?
+ description: Also tell us, what did you expect to happen?
+ placeholder: Tell us what you see!
+ validations:
+ required: true
+ - type: textarea
+ id: version
+ attributes:
+ label: Version
+ description: What version of our software are you running? (use `--version` to get a version string)
+ placeholder: "llamafile v0.8.4"
+ validations:
+ required: true
+ - type: dropdown
+ id: operating-system
+ attributes:
+ label: What operating system are you seeing the problem on?
+ multiple: true
+ options:
+ - Linux
+ - Mac
+ - Windows
+ - FreeBSD
+ - OpenBSD
+ - NetBSD
+ - BIOS
+ validations:
+ required: false
+ - type: textarea
+ id: logs
+ attributes:
+ label: Relevant log output
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+ render: shell
diff --git a/.github/ISSUE_TEMPLATE/04-bug-critical.yml b/.github/ISSUE_TEMPLATE/04-bug-critical.yml
new file mode 100644
index 0000000000..9a96d4a9d6
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/04-bug-critical.yml
@@ -0,0 +1,59 @@
+name: Critical Severity Bug
+description: Used to report critical severity bugs in llamafiles (e.g. Crashing, Corrupted, Dataloss)
+title: "Bug: "
+labels: ["bug", "critical severity"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to fill out this bug report!
+ Please include information about your system, the steps to reproduce the bug,
+ and the version of llamafiles that you are using.
+ If possible, please provide a minimal code example that reproduces the bug.
+ You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
+ for additional technical logging that may allow us to narrow down where the fault occurred.
+ - type: input
+ id: contact
+ attributes:
+ label: Contact Details
+ description: How can we get in touch with you if we need more info?
+ placeholder: ex. email@example.com
+ validations:
+ required: false
+ - type: textarea
+ id: what-happened
+ attributes:
+ label: What happened?
+ description: Also tell us, what did you expect to happen?
+ placeholder: Tell us what you see!
+ validations:
+ required: true
+ - type: textarea
+ id: version
+ attributes:
+ label: Version
+ description: What version of our software are you running? (use `--version` to get a version string)
+ placeholder: "llamafile v0.8.4"
+ validations:
+ required: true
+ - type: dropdown
+ id: operating-system
+ attributes:
+ label: What operating system are you seeing the problem on?
+ multiple: true
+ options:
+ - Linux
+ - Mac
+ - Windows
+ - FreeBSD
+ - OpenBSD
+ - NetBSD
+ - BIOS
+ validations:
+ required: false
+ - type: textarea
+ id: logs
+ attributes:
+ label: Relevant log output
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+ render: shell
diff --git a/.github/ISSUE_TEMPLATE/05-enhancement.yml b/.github/ISSUE_TEMPLATE/05-enhancement.yml
new file mode 100644
index 0000000000..5910564dad
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/05-enhancement.yml
@@ -0,0 +1,51 @@
+name: Enhancement template
+description: Used to request enhancements for llamafiles
+title: "Feature Request: "
+labels: ["enhancement"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/Mozilla-Ocho/llamafile/discussions/categories/ideas)
+
+ - type: checkboxes
+ id: prerequisites
+ attributes:
+ label: Prerequisites
+ description: Please confirm the following before submitting your enhancement request.
+ options:
+ - label: I am running the latest code. Mention the version if possible as well.
+ required: true
+ - label: I carefully followed the [README.md](https://github.com/Mozilla-Ocho/llamafile/blob/master/README.md).
+ required: true
+ - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
+ required: true
+ - label: I reviewed the [Discussions](https://github.com/Mozilla-Ocho/llamafile/discussions), and have a new and useful enhancement to share.
+ required: true
+
+ - type: textarea
+ id: feature-description
+ attributes:
+ label: Feature Description
+ description: Please provide a detailed written description of what you were trying to do, and what you expected `llamafiles` to do as an enhancement.
+ placeholder: Detailed description of the enhancement
+ validations:
+ required: true
+
+ - type: textarea
+ id: motivation
+ attributes:
+ label: Motivation
+ description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llamafiles` users.
+ placeholder: Explanation of why this feature is needed and its benefits
+ validations:
+ required: true
+
+ - type: textarea
+ id: possible-implementation
+ attributes:
+ label: Possible Implementation
+ description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
+ placeholder: Detailed description of potential implementation
+ validations:
+ required: false
diff --git a/.github/ISSUE_TEMPLATE/06-refactor.yml b/.github/ISSUE_TEMPLATE/06-refactor.yml
new file mode 100644
index 0000000000..4b90cf6e2c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/06-refactor.yml
@@ -0,0 +1,28 @@
+name: Refactor (Maintainers)
+description: Used to track refactoring opportunities
+title: "Refactor: "
+labels: ["refactor"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Don't forget to [check for existing refactor issue tickets](https://github.com/Mozilla-Ocho/llamafile/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
+ Also you may want to check [Pull request refactor label as well](https://github.com/Mozilla-Ocho/llamafile/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
+
+ - type: textarea
+ id: background-description
+ attributes:
+ label: Background Description
+ description: Please provide a detailed written description of the pain points you are trying to solve.
+ placeholder: Detailed description behind your motivation to request refactor
+ validations:
+ required: true
+
+ - type: textarea
+ id: possible-approaches
+ attributes:
+ label: Possible Refactor Approaches
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
+ placeholder: Your idea of possible refactoring opportunity/approaches
+ validations:
+ required: false
diff --git a/.github/ISSUE_TEMPLATE/07-refactor.yml b/.github/ISSUE_TEMPLATE/07-refactor.yml
new file mode 100644
index 0000000000..4b90cf6e2c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/07-refactor.yml
@@ -0,0 +1,28 @@
+name: Refactor (Maintainers)
+description: Used to track refactoring opportunities
+title: "Refactor: "
+labels: ["refactor"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Don't forget to [check for existing refactor issue tickets](https://github.com/Mozilla-Ocho/llamafile/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
+ Also you may want to check [Pull request refactor label as well](https://github.com/Mozilla-Ocho/llamafile/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
+
+ - type: textarea
+ id: background-description
+ attributes:
+ label: Background Description
+ description: Please provide a detailed written description of the pain points you are trying to solve.
+ placeholder: Detailed description behind your motivation to request refactor
+ validations:
+ required: true
+
+ - type: textarea
+ id: possible-approaches
+ attributes:
+ label: Possible Refactor Approaches
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
+ placeholder: Your idea of possible refactoring opportunity/approaches
+ validations:
+ required: false
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000..5fca63441f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,13 @@
+blank_issues_enabled: true
+contact_links:
+ - name: Got an idea?
+ url: https://github.com/Mozilla-Ocho/llamafile/discussions/categories/ideas
+ about: Pop it there. It may then become an enhancement ticket.
+ - name: Got a question?
+ url: https://github.com/Mozilla-Ocho/llamafile/discussions/categories/q-a
+ about: Ask a question there!
+ - name: Is your problem more about the underlying llama.cpp engine?
+ url: https://github.com/ggerganov/llama.cpp/issues/new/choose
+ about: Head to the llama.cpp reporting page instead
+
+
diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 0000000000..066eb4847e
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,29 @@
+# https://github.com/actions/labeler
+documentation:
+ - changed-files:
+ - any-glob-to-any-file:
+ - README.md
+ - LICENSE
+ - docs/**
+testing:
+ - changed-files:
+ - any-glob-to-any-file:
+ - tests/**
+build:
+ - changed-files:
+ - any-glob-to-any-file:
+ - cmake/**
+ - CMakeLists.txt
+ - CMakePresets.json
+ - codecov.yml
+llama.cpp:
+ - changed-files:
+ - any-glob-to-any-file: llama.cpp/**
+llamafile:
+ - changed-files:
+ - any-glob-to-any-file: llamafile/**
+devops:
+ - changed-files:
+ - any-glob-to-any-file:
+ - .devops/**
+ - .github/**
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bd291b0e25..575b413aa7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,45 +1,63 @@
name: CI
on:
push:
- branches: [ master, main ]
+ branches: [ master, main, fix ]
pull_request:
- branches: [ master, main ]
+ branches: [ master, main, fix ]
+
jobs:
- Tests:
+ ubuntu-focal-make:
timeout-minutes: 60
- runs-on: ${{ matrix.os }}
- strategy:
- fail-fast: false
- matrix:
- os: [ macos-latest ] # ubuntu-latest, windows-latest are currently non-functional, requiring adaptation for proper functionality.
+ runs-on: ubuntu-latest
+
steps:
- - name: Checkout Repository
+ - name: Clone
+ id: checkout
uses: actions/checkout@v4
- - name: Build and Install CosmoCC
- shell: bash
+
+ - name: Dependencies
+ id: depends
run: |
- mkdir -p cosmocc
- cd cosmocc
- curl -o cosmocc.zip -L https://cosmo.zip/pub/cosmocc/cosmocc.zip
- unzip cosmocc.zip
- cd ..
- ./cosmocc/bin/make -j8 && ./cosmocc/bin/make install
- - name: Create LLM Executable
- shell: bash
+ sudo apt-get update
+ sudo apt-get install make
+
+ - name: Cache cosmocc toolchain
+ id: cache-cosmocc-toolchain
+ uses: actions/cache@v4
+ env:
+ cache-name: cache-cosmocc-toolchain
+ with:
+ path: |
+ .cosmocc
+ o/depend
+ o/depend.test
+ key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/config.mk') }}
+ restore-keys: |
+ ${{ runner.os }}-build-${{ env.cache-name }}
+
+ - name: Setup cosmocc and ape loader
run: |
- curl -o mistral.gguf -L https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+ sudo make cosmocc-ci PREFIX=/usr
+
+ - name: Build
+ run: |
+ sudo make -j $(nproc)
+
+ - name: Make Llamafile
+ run: |
+ cp ./models/TinyLLama-v0.1-5M-F16.gguf tinyllama.gguf
cat << EoF > .args
-m
- mistral.gguf
+ tinyllama.gguf
...
EoF
- cp /usr/local/bin/llamafile llamafile_exe
- chmod +x llamafile_exe
- zipalign -j0 \
- llamafile_exe \
- mistral.gguf \
+ cp o//llama.cpp/main/main \
+ tinyllama.llamafile
+ o//llamafile/zipalign -j0 \
+ tinyllama.llamafile \
+ tinyllama.gguf \
.args
- - name: Execute LLM CLI
- shell: bash
+
+ - name: Execute LLM CLI CPU # GA doesn't have "support_simdgroup_reduction" for RMS_NORM :'(
run: |
- ./llamafile_exe --temp 0.7 --n-predict 50 -p '[INST]Write a story about llamas[/INST]'
+ ./tinyllama.llamafile -e -p '## Famous Speech\n\nFour score and seven' -n 50 -ngl 0
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
new file mode 100644
index 0000000000..ae86e99275
--- /dev/null
+++ b/.github/workflows/editorconfig.yml
@@ -0,0 +1,27 @@
+name: EditorConfig Checker
+
+on:
+ workflow_dispatch: # allows manual triggering
+ inputs:
+ create_release:
+ description: 'Create new release'
+ required: true
+ type: boolean
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ editorconfig:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: editorconfig-checker/action-editorconfig-checker@main
+ - run: editorconfig-checker
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
new file mode 100644
index 0000000000..799e4d574d
--- /dev/null
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,17 @@
+name: "Pull Request Labeler"
+on:
+- pull_request_target
+
+jobs:
+ labeler:
+ permissions:
+ contents: read
+ pull-requests: write
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ repository: "Mozilla-Ocho/llamafile"
+ - uses: actions/labeler@v5
+ with:
+ configuration-path: '.github/labeler.yml'
diff --git a/Makefile b/Makefile
index 72212ee21c..5c9ddabe98 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ include llama.cpp/BUILD.mk
# the root package is `o//` by default
# building a package also builds its sub-packages
.PHONY: o/$(MODE)/
-o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile
+o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile o/$(MODE)/depend.test
# for installing to `make PREFIX=/usr/local`
.PHONY: install
@@ -28,17 +28,22 @@ install: llamafile/zipalign.1 \
llama.cpp/perplexity/perplexity.1 \
llama.cpp/llava/llava-quantize.1 \
o/$(MODE)/llamafile/zipalign \
+ o/$(MODE)/llamafile/tokenize \
o/$(MODE)/llama.cpp/main/main \
o/$(MODE)/llama.cpp/imatrix/imatrix \
o/$(MODE)/llama.cpp/quantize/quantize \
+ o/$(MODE)/llama.cpp/llama-bench/llama-bench \
o/$(MODE)/llama.cpp/perplexity/perplexity \
o/$(MODE)/llama.cpp/llava/llava-quantize
mkdir -p $(PREFIX)/bin
$(INSTALL) o/$(MODE)/llamafile/zipalign $(PREFIX)/bin/zipalign
+ $(INSTALL) o/$(MODE)/llamafile/tokenize $(PREFIX)/bin/llamafile-tokenize
$(INSTALL) o/$(MODE)/llama.cpp/main/main $(PREFIX)/bin/llamafile
$(INSTALL) o/$(MODE)/llama.cpp/imatrix/imatrix $(PREFIX)/bin/llamafile-imatrix
$(INSTALL) o/$(MODE)/llama.cpp/quantize/quantize $(PREFIX)/bin/llamafile-quantize
+ $(INSTALL) o/$(MODE)/llama.cpp/llama-bench/llama-bench $(PREFIX)/bin/llamafile-bench
$(INSTALL) build/llamafile-convert $(PREFIX)/bin/llamafile-convert
+ $(INSTALL) build/llamafile-upgrade-engine $(PREFIX)/bin/llamafile-upgrade-engine
$(INSTALL) o/$(MODE)/llama.cpp/perplexity/perplexity $(PREFIX)/bin/llamafile-perplexity
$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
mkdir -p $(PREFIX)/share/man/man1
@@ -49,5 +54,14 @@ install: llamafile/zipalign.1 \
$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1
+.PHONY: check
+check: o/$(MODE)/llamafile/check
+
+.PHONY: check
+cosmocc: $(COSMOCC) # cosmocc toolchain setup
+
+.PHONY: check
+cosmocc-ci: $(COSMOCC) $(PREFIX)/bin/ape # cosmocc toolchain setup in ci context
+
include build/deps.mk
include build/tags.mk
diff --git a/README.md b/README.md
index a842731501..7eef3e989a 100644
--- a/README.md
+++ b/README.md
@@ -1,32 +1,38 @@
# llamafile
+[![ci status](https://github.com/Mozilla-Ocho/llamafile/actions/workflows/ci.yml/badge.svg)](https://github.com/Mozilla-Ocho/llamafile/actions/workflows/ci.yml)
+[![](https://dcbadge.vercel.app/api/server/teDuGYVTB2)](https://discord.gg/teDuGYVTB2)
+
**llamafile lets you distribute and run LLMs with a single file. ([announcement blog post](https://hacks.mozilla.org/2023/11/introducing-llamafile/))**
-Our goal is to make open source large language models much more
-accessible to both developers and end users. We're doing that by
-combining [llama.cpp](https://github.com/ggerganov/llama.cpp) with [Cosmopolitan Libc](https://github.com/jart/cosmopolitan) into one
-framework that collapses all the complexity of LLMs down to
+Our goal is to make open LLMs much more
+accessible to both developers and end users. We're doing that by
+combining [llama.cpp](https://github.com/ggerganov/llama.cpp) with [Cosmopolitan Libc](https://github.com/jart/cosmopolitan) into one
+framework that collapses all the complexity of LLMs down to
a single-file executable (called a "llamafile") that runs
-locally on most computers, with no installation.
+locally on most computers, with no installation.
+
+
+llamafile is a Mozilla Builders project.
## Quickstart
-The easiest way to try it for yourself is to download our example
-llamafile for the [LLaVA](https://llava-vl.github.io/) model (license: [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/),
-[OpenAI](https://openai.com/policies/terms-of-use)). LLaVA is a new LLM that can do more
-than just chat; you can also upload images and ask it questions
-about them. With llamafile, this all happens locally; no data
+The easiest way to try it for yourself is to download our example
+llamafile for the [LLaVA](https://llava-vl.github.io/) model (license: [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/),
+[OpenAI](https://openai.com/policies/terms-of-use)). LLaVA is a new LLM that can do more
+than just chat; you can also upload images and ask it questions
+about them. With llamafile, this all happens locally; no data
ever leaves your computer.
-1. Download [llava-v1.5-7b-q4.llamafile](https://huggingface.co/jartine/llava-v1.5-7B-GGUF/resolve/main/llava-v1.5-7b-q4.llamafile?download=true) (3.97 GB).
+1. Download [llava-v1.5-7b-q4.llamafile](https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true) (4.29 GB).
2. Open your computer's terminal.
-3. If you're using macOS, Linux, or BSD, you'll need to grant permission
-for your computer to execute this new file. (You only need to do this
+3. If you're using macOS, Linux, or BSD, you'll need to grant permission
+for your computer to execute this new file. (You only need to do this
once.)
```sh
@@ -38,11 +44,11 @@ chmod +x llava-v1.5-7b-q4.llamafile
5. Run the llamafile. e.g.:
```sh
-./llava-v1.5-7b-q4.llamafile -ngl 9999
+./llava-v1.5-7b-q4.llamafile
```
-6. Your browser should open automatically and display a chat interface.
-(If it doesn't, just open your browser and point it at http://localhost:8080.)
+6. Your browser should open automatically and display a chat interface.
+(If it doesn't, just open your browser and point it at http://localhost:8080)
7. When you're done chatting, return to your terminal and hit
`Control-C` to shut down llamafile.
@@ -51,7 +57,7 @@ chmod +x llava-v1.5-7b-q4.llamafile
### JSON API Quickstart
-When llamafile is started in server mode, in addition to hosting a web
+When llamafile is started, in addition to hosting a web
UI chat server at , an [OpenAI
API](https://platform.openai.com/docs/api-reference/chat) compatible
chat completions endpoint is provided too. It's designed to support the
@@ -164,33 +170,38 @@ ChatCompletionMessage(content='There once was a programmer named Mike\nWho wrote
We also provide example llamafiles for other models, so you can easily
try out llamafile with different kinds of LLMs.
-| Model | Size | License | llamafile |
-| --- | --- | --- | --- |
-| LLaVA 1.5 | 3.97 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [llava-v1.5-7b-q4.llamafile](https://huggingface.co/jartine/llava-v1.5-7B-GGUF/resolve/main/llava-v1.5-7b-q4.llamafile?download=true) |
-| Mistral-7B-Instruct | 5.15 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mistral-7b-instruct-v0.2.Q5\_K\_M.llamafile](https://huggingface.co/jartine/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.llamafile?download=true) |
-| Mixtral-8x7B-Instruct | 30.03 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mixtral-8x7b-instruct-v0.1.Q5\_K\_M.llamafile](https://huggingface.co/jartine/Mixtral-8x7B-Instruct-v0.1-llamafile/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile?download=true) |
-| WizardCoder-Python-34B | 22.23 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [wizardcoder-python-34b-v1.0.Q5\_K\_M.llamafile](https://huggingface.co/jartine/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q5_K_M.llamafile?download=true) |
-| WizardCoder-Python-13B | 7.33 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [wizardcoder-python-13b.llamafile](https://huggingface.co/jartine/wizardcoder-13b-python/resolve/main/wizardcoder-python-13b.llamafile?download=true) |
-| TinyLlama-1.1B | 0.76 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [TinyLlama-1.1B-Chat-v1.0.Q5\_K\_M.llamafile](https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile?download=true) |
-| Rocket-3B | 1.89 GB | [cc-by-sa-4.0](https://creativecommons.org/licenses/by-sa/4.0/deed.en) | [rocket-3b.Q5\_K\_M.llamafile](https://huggingface.co/jartine/rocket-3B-llamafile/resolve/main/rocket-3b.Q5_K_M.llamafile?download=true) |
-| Phi-2 | 1.96 GB | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | [phi-2.Q5\_K\_M.llamafile](https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q5_K_M.llamafile?download=true) |
+| Model | Size | License | llamafile | other quants |
+| --- | --- | --- | --- | --- |
+| LLaVA 1.5 | 3.97 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [llava-v1.5-7b-q4.llamafile](https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile) |
+| TinyLlama-1.1B | 2.05 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [TinyLlama-1.1B-Chat-v1.0.F16.llamafile](https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile/resolve/main/TinyLlama-1.1B-Chat-v1.0.F16.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile) |
+| Mistral-7B-Instruct | 3.85 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mistral-7b-instruct-v0.2.Q4\_0.llamafile](https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q4_0.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.2-llamafile) |
+| Phi-3-mini-4k-instruct | 7.67 GB | [Apache 2.0](https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/blob/main/LICENSE) | [Phi-3-mini-4k-instruct.F16.llamafile](https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.F16.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile) |
+| Mixtral-8x7B-Instruct | 30.03 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mixtral-8x7b-instruct-v0.1.Q5\_K\_M.llamafile](https://huggingface.co/Mozilla/Mixtral-8x7B-Instruct-v0.1-llamafile/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Mixtral-8x7B-Instruct-v0.1-llamafile) |
+| WizardCoder-Python-34B | 22.23 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [wizardcoder-python-34b-v1.0.Q5\_K\_M.llamafile](https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile) |
+| WizardCoder-Python-13B | 7.33 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [wizardcoder-python-13b.llamafile](https://huggingface.co/jartine/wizardcoder-13b-python/resolve/main/wizardcoder-python-13b.llamafile?download=true) | [See HF repo](https://huggingface.co/jartine/wizardcoder-13b-python) |
+| LLaMA-3-Instruct-70B | 37.25 GB | [llama3](https://huggingface.co/Mozilla/Meta-Llama-3-8B-Instruct-llamafile/blob/main/Meta-Llama-3-Community-License-Agreement.txt) | [Meta-Llama-3-70B-Instruct.Q4\_0.llamafile](https://huggingface.co/Mozilla/Meta-Llama-3-70B-Instruct-llamafile/resolve/main/Meta-Llama-3-70B-Instruct.Q4_0.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Meta-Llama-3-70B-Instruct-llamafile) |
+| LLaMA-3-Instruct-8B | 5.37 GB | [llama3](https://huggingface.co/Mozilla/Meta-Llama-3-8B-Instruct-llamafile/blob/main/Meta-Llama-3-Community-License-Agreement.txt) | [Meta-Llama-3-8B-Instruct.Q5\_K\_M.llamafile](https://huggingface.co/Mozilla/Meta-Llama-3-8B-Instruct-llamafile/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Meta-Llama-3-8B-Instruct-llamafile) |
+| Rocket-3B | 1.89 GB | [cc-by-sa-4.0](https://creativecommons.org/licenses/by-sa/4.0/deed.en) | [rocket-3b.Q5\_K\_M.llamafile](https://huggingface.co/Mozilla/rocket-3B-llamafile/resolve/main/rocket-3b.Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/rocket-3B-llamafile) |
+| *Text Embedding Models* | | | | |
+| E5-Mistral-7B-Instruct | 5.16 GB | [MIT](https://choosealicense.com/licenses/mit/) | [e5-mistral-7b-instruct-Q5_K_M.llamafile](https://huggingface.co/Mozilla/e5-mistral-7b-instruct/resolve/main/e5-mistral-7b-instruct-Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/e5-mistral-7b-instruct) |
+| mxbai-embed-large-v1 | 0.7 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mxbai-embed-large-v1-f16.llamafile](https://huggingface.co/Mozilla/mxbai-embed-large-v1-llamafile/resolve/main/mxbai-embed-large-v1-f16.llamafile?download=true) | [See HF Repo](https://huggingface.co/Mozilla/mxbai-embed-large-v1-llamafile) |
Here is an example for the Mistral command-line llamafile:
```sh
-./mistral-7b-instruct-v0.2.Q5_K_M.llamafile -ngl 9999 --temp 0.7 -p '[INST]Write a story about llamas[/INST]'
+./mistral-7b-instruct-v0.2.Q5_K_M.llamafile --temp 0.7 -p '[INST]Write a story about llamas[/INST]'
```
And here is an example for WizardCoder-Python command-line llamafile:
```sh
-./wizardcoder-python-13b.llamafile -ngl 9999 --temp 0 -e -r '```\n' -p '```c\nvoid *memcpy_sse2(char *dst, const char *src, size_t size) {\n'
+./wizardcoder-python-13b.llamafile --temp 0 -e -r '```\n' -p '```c\nvoid *memcpy_sse2(char *dst, const char *src, size_t size) {\n'
```
And here's an example for the LLaVA command-line llamafile:
```sh
-./llava-v1.5-7b-q4.llamafile -ngl 9999 --temp 0.2 --image lemurs.jpg -e -p '### User: What do you see?\n### Assistant:'
+./llava-v1.5-7b-q4.llamafile --temp 0.2 --image lemurs.jpg -e -p '### User: What do you see?\n### Assistant:'
```
As before, macOS, Linux, and BSD users will need to use the "chmod"
@@ -208,13 +219,13 @@ later in this document.
## How llamafile works
-A llamafile is an executable LLM that you can run on your own
-computer. It contains the weights for a given open source LLM, as well
-as everything needed to actually run that model on your computer.
-There's nothing to install or configure (with a few caveats, discussed
+A llamafile is an executable LLM that you can run on your own
+computer. It contains the weights for a given open LLM, as well
+as everything needed to actually run that model on your computer.
+There's nothing to install or configure (with a few caveats, discussed
in subsequent sections of this document).
-This is all accomplished by combining llama.cpp with Cosmopolitan Libc,
+This is all accomplished by combining llama.cpp with Cosmopolitan Libc,
which provides some useful capabilities:
1. llamafiles can run on multiple CPU microarchitectures. We
@@ -228,10 +239,10 @@ and most UNIX shells. It's also able to be easily converted (by either
you or your users) to the platform-native format, whenever required.
3. llamafiles can run on six OSes (macOS, Windows, Linux,
-FreeBSD, OpenBSD, and NetBSD). If you make your own llama files, you'll
-only need to build your code once, using a Linux-style toolchain. The
-GCC-based compiler we provide is itself an Actually Portable Executable,
-so you can build your software for all six OSes from the comfort of
+FreeBSD, OpenBSD, and NetBSD). If you make your own llama files, you'll
+only need to build your code once, using a Linux-style toolchain. The
+GCC-based compiler we provide is itself an Actually Portable Executable,
+so you can build your software for all six OSes from the comfort of
whichever one you prefer most for development.
4. The weights for an LLM can be embedded within the llamafile.
@@ -241,26 +252,26 @@ archive. It enables quantized weights distributed online to be prefixed
with a compatible version of the llama.cpp software, thereby ensuring
its originally observed behaviors can be reproduced indefinitely.
-5. Finally, with the tools included in this project you can create your
-*own* llamafiles, using any compatible model weights you want. You can
-then distribute these llamafiles to other people, who can easily make
+5. Finally, with the tools included in this project you can create your
+*own* llamafiles, using any compatible model weights you want. You can
+then distribute these llamafiles to other people, who can easily make
use of them regardless of what kind of computer they have.
## Using llamafile with external weights
-Even though our example llamafiles have the weights built-in, you don't
-*have* to use llamafile that way. Instead, you can download *just* the
-llamafile software (without any weights included) from our releases page.
-You can then use it alongside any external weights you may have on hand.
-External weights are particularly useful for Windows users because they
-enable you to work around Windows' 4GB executable file size limit.
+Even though our example llamafiles have the weights built-in, you don't
+*have* to use llamafile that way. Instead, you can download *just* the
+llamafile software (without any weights included) from our releases page.
+You can then use it alongside any external weights you may have on hand.
+External weights are particularly useful for Windows users because they
+enable you to work around Windows' 4GB executable file size limit.
For Windows users, here's an example for the Mistral LLM:
```sh
curl -L -o llamafile.exe https://github.com/Mozilla-Ocho/llamafile/releases/download/0.6/llamafile-0.6
curl -L -o mistral.gguf https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-./llamafile.exe -m mistral.gguf -ngl 9999
+./llamafile.exe -m mistral.gguf
```
Windows users may need to change `./llamafile.exe` to `.\llamafile.exe`
@@ -288,13 +299,13 @@ sudo sh -c "echo ':APE:M::MZqFpD::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/regis
sudo sh -c "echo ':APE-jart:M::jartsr::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/register"
```
-As mentioned above, on Windows you may need to rename your llamafile by
-adding `.exe` to the filename.
+As mentioned above, on Windows you may need to rename your llamafile by
+adding `.exe` to the filename.
Also as mentioned above, Windows also has a maximum file size limit of 4GB
for executables. The LLaVA server executable above is just 30MB shy of
that limit, so it'll work on Windows, but with larger models like
-WizardCoder 13B, you need to store the weights in a separate file. An
+WizardCoder 13B, you need to store the weights in a separate file. An
example is provided above; see "Using llamafile with external weights."
On WSL, it's recommended that the WIN32 interop feature be disabled:
@@ -303,6 +314,12 @@ On WSL, it's recommended that the WIN32 interop feature be disabled:
sudo sh -c "echo -1 > /proc/sys/fs/binfmt_misc/WSLInterop"
```
+In the instance of getting a `Permission Denied` on disabling interop through CLI, it can be permanently disabled by adding the following in `/etc/wsl.conf`
+```sh
+[interop]
+enabled=false
+```
+
On Raspberry Pi, if you get "mmap error 12" then it means your kernel is
configured with fewer than 48 bits of address space. You need to upgrade
to RPI 5. You can still use RPI 4 if you either (1) rebuild your kernel,
@@ -313,7 +330,7 @@ if you have CrowdStrike and then ask to be whitelisted.
## Supported OSes
-llamafile supports the following operating systems, which require a minimum
+llamafile supports the following operating systems, which require a minimum
stock install:
- Linux 2.6.18+ (i.e. every distro since RHEL5 c. 2007)
@@ -339,9 +356,8 @@ llamafile supports the following CPUs:
print an error and refuse to run. This means that if you have an Intel
CPU, it needs to be Intel Sandybridge or newer (circa 2011+), and if
you have an AMD CPU, then it needs to be Bulldozer or newer (circa
- 2011+). Support for AVX2, FMA, F16C, and VNNI are conditionally
- enabled at runtime if you have a newer CPU. There's no support for
- AVX512 runtime dispatching yet.
+ 2011+). Support for AVX512, AVX2, FMA, F16C, and VNNI are
+ conditionally enabled at runtime if you have a newer CPU.
- **ARM64** microprocessors must have ARMv8a+. This means everything
from Apple Silicon to 64-bit Raspberry Pis will work, provided your
@@ -412,7 +428,7 @@ llama.cpp command line interface, utilizing WizardCoder-Python-13B
weights:
```sh
-llamafile -ngl 9999 \
+llamafile \
-m wizardcoder-python-13b-v1.0.Q8_0.gguf \
--temp 0 -r '}\n' -r '```\n' \
-e -p '```c\nvoid *memcpy(void *dst, const void *src, size_t size) {\n'
@@ -563,11 +579,38 @@ that describes the changes, and mention it in your Hugging Face commit.
## Documentation
-There's a man page for each of the llamafile programs installed when you
+There's a manual page for each of the llamafile programs installed when you
run `sudo make install`. The command manuals are also typeset as PDF
files that you can download from our GitHub releases page. Lastly, most
commands will display that information when passing the `--help` flag.
+## Running llamafile with models downloaded by third-party applications
+
+This section answers the question *"I already have a model downloaded locally by application X, can I use it with llamafile?"*. The general answer is "yes, as long as those models are locally stored in GGUF format" but its implementation can be more or less hacky depending on the application. A few examples (tested on a Mac) follow.
+
+### LM Studio
+[LM Studio](https://lmstudio.ai/) stores downloaded models in `~/.cache/lm-studio/models`, in subdirectories with the same name of the models (following HuggingFace's `account_name/model_name` format), with the same filename you saw when you chose to download the file.
+
+ So if you have downloaded e.g. the `llama-2-7b.Q2_K.gguf` file for `TheBloke/Llama-2-7B-GGUF`, you can run llamafile as follows:
+
+```
+cd ~/.cache/lm-studio/models/TheBloke/Llama-2-7B-GGUF
+llamafile -m llama-2-7b.Q2_K.gguf
+```
+
+### Ollama
+
+When you download a new model with [ollama](https://ollama.com), all its metadata will be stored in a manifest file under `~/.ollama/models/manifests/registry.ollama.ai/library/`. The directory and manifest file name are the model name as returned by `ollama list`. For instance, for `llama3:latest` the manifest file will be named `.ollama/models/manifests/registry.ollama.ai/library/llama3/latest`.
+
+The manifest maps each file related to the model (e.g. GGUF weights, license, prompt template, etc) to a sha256 digest. The digest corresponding to the element whose `mediaType` is `application/vnd.ollama.image.model` is the one referring to the model's GGUF file.
+
+Each sha256 digest is also used as a filename in the `~/.ollama/models/blobs` directory (if you look into that directory you'll see *only* those sha256-* filenames). This means you can directly run llamafile by passing the sha256 digest as the model filename. So if e.g. the `llama3:latest` GGUF file digest is `sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29`, you can run llamafile as follows:
+
+```
+cd ~/.ollama/models/blobs
+llamafile -m sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
+```
+
## Technical details
Here is a succinct overview of the tricks we used to create the fattest
@@ -661,8 +704,8 @@ for further details.
## A note about models
-The example llamafiles provided above should not be interpreted as
-endorsements or recommendations of specific models, licenses, or data
+The example llamafiles provided above should not be interpreted as
+endorsements or recommendations of specific models, licenses, or data
sets on the part of Mozilla.
## Security
@@ -687,7 +730,7 @@ Our approach to security has these benefits:
2. The main CLI command won't be able to access the network at all. This
is enforced by the operating system kernel. It also won't be able to
write to the file system. This keeps your computer safe in the event
- that a bug is ever discovered in the the GGUF file format that lets
+ that a bug is ever discovered in the GGUF file format that lets
an attacker craft malicious weights files and post them online. The
only exception to this rule is if you pass the `--prompt-cache` flag
without also specifying `--prompt-cache-ro`. In that case, security
@@ -710,5 +753,5 @@ should that be desired.
The llamafile logo on this page was generated with the assistance of DALL·E 3.
-![star-history-2023123](https://github.com/Mozilla-Ocho/llamafile/assets/42821/978d49be-e383-44df-ae6c-3f542c6130f9)
+[![Star History Chart](https://api.star-history.com/svg?repos=Mozilla-Ocho/llamafile&type=Date)](https://star-history.com/#Mozilla-Ocho/llamafile&Date)
diff --git a/build/config.mk b/build/config.mk
index 4726d2719b..a7f255202c 100644
--- a/build/config.mk
+++ b/build/config.mk
@@ -2,7 +2,7 @@
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
PREFIX = /usr/local
-COSMOCC = .cosmocc/3.3.2
+COSMOCC = .cosmocc/3.3.10
TOOLCHAIN = $(COSMOCC)/bin/cosmo
AR = $(TOOLCHAIN)ar
@@ -13,12 +13,13 @@ MKDEPS = $(COSMOCC)/bin/mkdeps
INSTALL = install
ARFLAGS = rcsD
-CCFLAGS = -g -O3 -fexceptions
-CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes
-TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=alderlake
+CCFLAGS = -g -O3 -fexceptions -fsignaling-nans
+CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes -DLLAMAFILE_DEBUG
+TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=znver4
TMPDIR = o//tmp
IGNORE := $(shell mkdir -p $(TMPDIR))
+ARCH := $(shell uname -m)
# apple still distributes a 17 year old version of gnu make
ifeq ($(MAKE_VERSION), 3.81)
@@ -50,5 +51,5 @@ clean:; rm -rf o
.PHONY: distclean
distclean:; rm -rf o .cosmocc
-.cosmocc/3.3.2:
- build/download-cosmocc.sh $@ 3.3.2 a695012ffbeac5e26e3c4a740debc15273f47e9a8bdc55e8b76a623154d5914b
+.cosmocc/3.3.10:
+ build/download-cosmocc.sh $@ 3.3.10 00d61c1215667314f66e288c8285bae38cc6137fca083e5bba6c74e3a52439de
diff --git a/build/cudacc b/build/cudacc
new file mode 100755
index 0000000000..35d489b3e2
--- /dev/null
+++ b/build/cudacc
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+find_nvcc() {
+ CC=$(command -v nvcc 2>/dev/null) && return
+ CC="$CUDA_PATH/bin/nvcc"
+ [ -x "$CC" ] && return
+ CC="/opt/cuda/bin/nvcc"
+ [ -x "$CC" ] && return
+ CC="/usr/local/cuda/bin/nvcc"
+ [ -x "$CC" ] && return
+ return 1
+}
+
+find_hipcc() {
+ CC=$(command -v hipcc 2>/dev/null) && return
+ CC="$HIP_PATH/bin/hipcc"
+ [ -x "$CC" ] && return
+ CC="/opt/rocm/bin/hipcc"
+ [ -x "$CC" ] && return
+ CC="/usr/local/rocm/bin/hipcc"
+ [ -x "$CC" ] && return
+ return 1
+}
+
+if find_hipcc; then
+ VENDOR=AMD
+ FLAGS=
+elif find_nvcc; then
+ VENDOR=NVIDIA
+ FLAGS="--forward-unknown-to-host-compiler"
+else
+ echo 'error: need either hipcc (AMD) or nvcc (NVIDIA) on $PATH' >&2
+ exit 1
+fi
+
+FIRST=1
+for x; do
+ if [ $FIRST -eq 1 ]; then
+ set --
+ FIRST=0
+ fi
+ if [ $VENDOR = AMD ]; then
+ if [ x"$x" = x"-lcublas" ]; then
+ set -- "$@" -lhipblas -lrocblas
+ continue
+ elif [ x"$x" = x"--use_fast_math" ]; then
+ continue
+ fi
+ fi
+ set -- "$@" "$x"
+done
+
+exec "$CC" $FLAGS "$@"
diff --git a/build/deps.mk b/build/deps.mk
index b5cc9673b5..1479154cd3 100644
--- a/build/deps.mk
+++ b/build/deps.mk
@@ -9,6 +9,10 @@ o/$(MODE)/depend: $(SRCS) $(HDRS) $(INCS)
@mkdir -p $(@D)
$(MKDEPS) -o $@ -r o/$(MODE)/ $(SRCS) $(HDRS) $(INCS)
+o/$(MODE)/depend.test: $(SRCS) $(HDRS) $(INCS)
+ @mkdir -p $(@D)
+ $(MKDEPS) -o $@ -r o/$(MODE)/ $(SRCS) $(HDRS) $(INCS)
+
$(SRCS):
$(HDRS):
$(INCS):
diff --git a/build/llamafile-upgrade-engine b/build/llamafile-upgrade-engine
new file mode 100755
index 0000000000..5efc2288b7
--- /dev/null
+++ b/build/llamafile-upgrade-engine
@@ -0,0 +1,127 @@
+#!/bin/sh
+BIN="${0%/*}"
+PROG="${0##*/}"
+
+print_full_help() {
+ cat << EOF
+Usage: $PROG [OPTION]... (new)
+Upgrade llamafile archives.
+
+Options:
+ -h, --help display this help and exit
+ -f, --force skip version check
+ -v, --verbose verbose mode
+
+Arguments:
+ the name of the old llamafile archive to be upgraded
+ (new) the name of the new llamafile archive to be created
+ if not defined output will be .updated.llamafile
+
+Example:
+ $PROG old.llamafile new.llamafile
+ This command will upgrade the old_llamafile to a new llamafile named new_llamafile.
+
+When you run this program, it's recommended that you've
+downloaded or installed an official llamafile-VERSION.zip
+from https://github.com/Mozilla-Ocho/llamafile/releases
+because they include prebuilt DLLs for CUDA and ROCm.
+You can verify your llamafile has them w/ unzip -vl
+EOF
+}
+
+abort() {
+ echo "Error: $1" >&2
+ cat << EOF >&2
+Usage: $PROG [OPTION]... (new)
+Upgrade llamafile archives.
+Refer to --help for full instructions.
+EOF
+ exit 1
+}
+
+if [ x"$1" = x"-h" ] || [ x"$1" = x"--help" ]; then
+ print_full_help >&2
+ exit 0
+fi
+
+
+# find paths of golden llamafile binaries
+#
+# 1. if user downloaded `llamafile-VERSION.zip`, extracted it, and ran
+# `./llamafile-VERSION/bin/llamafile-upgrade-engine` directly, then we can
+# support that by looking for a `llamafile` in the same bin folder.
+#
+# 2. otherwise, perform a $PATH lookup for llamafile
+#
+LLAMAFILE="$BIN/llamafile"
+if [ ! -x "$LLAMAFILE" ]; then
+ LLAMAFILE="$(command -v llamafile)" || abort "llamafile not found in PATH"
+fi
+ZIPALIGN="$BIN/zipalign"
+if [ ! -x "$ZIPALIGN" ]; then
+ ZIPALIGN="$(command -v zipalign)" || abort "zipalign not found in PATH"
+fi
+
+# Parse command-line options
+force_upgrade=false
+verbose=false
+while getopts "fv" opt; do
+ case $opt in
+ f)
+ force_upgrade=true
+ echo "Skipping version check."
+ ;;
+ v)
+ verbose=true
+ echo "Verbose Output Mode."
+ ;;
+ esac
+done
+
+# Shift the option parameters
+shift $((OPTIND - 1))
+
+# Remove .llamafile extension from arguments if present
+if [ -z "${1}" ]; then
+ abort "Missing path to old llamafile archive to be upgraded"
+else
+ old_llamafile="${1%.llamafile}"
+fi
+
+if [ -z "$2" ]; then
+ new_llamafile="${old_llamafile}.updated"
+else
+ new_llamafile="${2%.llamafile}"
+fi
+
+# Obtain versions of old and new llamafiles
+old_llamafile_engine_version="$("./$old_llamafile".llamafile --version)" || abort "Failed to get version of old llamafile"
+new_llamafile_engine_version="$("$LLAMAFILE" --version)" || abort "Failed to get version of new llamafile"
+
+# Check if llamafile has been upgraded
+echo "== Engine Version Check ==" >&2
+echo "Engine version from $old_llamafile: $old_llamafile_engine_version" >&2
+echo "Engine version from $LLAMAFILE: $new_llamafile_engine_version" >&2
+if [ "$old_llamafile_engine_version" = "$new_llamafile_engine_version" ] && [ "$force_upgrade" != "true" ]; then
+ echo "Upgrade not required. Exiting..." >&2
+ exit 0
+fi
+
+if [ "$verbose" = "true" ]; then
+ echo "== Current Content ==" >&2
+ zipinfo "${old_llamafile}.llamafile" || abort "Failed to get current content of old llamafile"
+fi
+
+tempdir="$(mktemp -d)" || abort "Failed to create temporary directory"
+trap 'rm -rf "$tempdir"' EXIT
+
+echo "== Repackaging / Upgrading ==" >&2
+echo "extracting..." >&2
+unzip "${old_llamafile}.llamafile" -d "$tempdir" || abort "Failed to extract old llamafile"
+echo "repackaging..." >&2
+cp "$LLAMAFILE" "${new_llamafile}.llamafile" || abort "Failed to copy new llamafile"
+"$ZIPALIGN" -j0 "${new_llamafile}.llamafile" "$tempdir"/*.gguf "$tempdir"/.args || abort "Failed to repackaging"
+
+echo "== Completed ==" >&2
+echo "Original File: ${old_llamafile}.llamafile" >&2
+echo "Upgraded File: ${new_llamafile}.llamafile" >&2
diff --git a/build/objdump b/build/objdump
index 27916ef56a..7db7eda2a5 100755
--- a/build/objdump
+++ b/build/objdump
@@ -1,6 +1,6 @@
#!/bin/sh
if printf '%s\n' "$*" | grep aarch64 >/dev/null 2>&1; then
- exec aarch64-unknown-cosmo-objdump "$@"
+ exec aarch64-unknown-cosmo-objdump $1 ${2%/*}/.aarch64/${2##*/}
else
exec x86_64-unknown-cosmo-objdump "$@"
fi
diff --git a/build/rules.mk b/build/rules.mk
index 05d7942e72..b5d4964f75 100644
--- a/build/rules.mk
+++ b/build/rules.mk
@@ -26,6 +26,10 @@ o/$(MODE)/%: o/$(MODE)/%.o
o/$(MODE)/%.com: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
+%.runs: %
+ $<
+ @touch $@
+
.PRECIOUS: %.1.asc
%.1.asc: %.1
-MANWIDTH=80 MAN_KEEP_FORMATTING=1 man $< >$@.tmp && mv -f $@.tmp $@
@@ -35,3 +39,10 @@ o/$(MODE)/%.zip.o: % $(COSMOCC)
@mkdir -p $(dir $@)/.aarch64
$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a x86_64 -o $@ $<
$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a aarch64 -o $(dir $@)/.aarch64/$(notdir $@) $<
+
+$(PREFIX)/bin/ape: $(COSMOCC) # cosmocc toolchain setup in restricted ci context
+ # Install ape loader
+ $(INSTALL) $(COSMOCC)/bin/ape-$(ARCH).elf $(PREFIX)/bin/ape
+
+ # Config binfmt_misc to use ape loader for ape.elf files
+ echo ':APE:M::MZqFpD::/usr/bin/ape:' > /proc/sys/fs/binfmt_misc/register
\ No newline at end of file
diff --git a/llama.cpp/BUILD.mk b/llama.cpp/BUILD.mk
index 5e86dae344..735488b95f 100644
--- a/llama.cpp/BUILD.mk
+++ b/llama.cpp/BUILD.mk
@@ -24,8 +24,41 @@ include llama.cpp/main/BUILD.mk
include llama.cpp/imatrix/BUILD.mk
include llama.cpp/quantize/BUILD.mk
include llama.cpp/perplexity/BUILD.mk
+include llama.cpp/llama-bench/BUILD.mk
-$(LLAMA_CPP_OBJS): private CCFLAGS += -DGGML_MULTIPLATFORM
+$(LLAMA_CPP_OBJS): private \
+ CCFLAGS += \
+ -DNDEBUG \
+ -DGGML_MULTIPLATFORM \
+ -DGGML_USE_LLAMAFILE
+
+o/$(MODE)/llama.cpp/ggml-alloc.o \
+o/$(MODE)/llama.cpp/ggml-backend.o \
+o/$(MODE)/llama.cpp/grammar-parser.o \
+o/$(MODE)/llama.cpp/json-schema-to-grammar.o \
+o/$(MODE)/llama.cpp/llama.o \
+o/$(MODE)/llama.cpp/stb_image.o \
+o/$(MODE)/llama.cpp/unicode.o \
+o/$(MODE)/llama.cpp/sampling.o \
+o/$(MODE)/llama.cpp/ggml-alloc.o \
+o/$(MODE)/llama.cpp/common.o: private \
+ CCFLAGS += -Os
+
+o/$(MODE)/llama.cpp/ggml-quants.o: private CXXFLAGS += -Os
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
+
+o/$(MODE)/llama.cpp/ggml-vector.o: private CXXFLAGS += -Os
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge
+o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o: private TARGET_ARCH += -Xx86_64-mtune=bdver2 -Xx86_64-mfma
+o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o: private TARGET_ARCH += -Xx86_64-mtune=ivybridge -Xx86_64-mf16c
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o: private TARGET_ARCH += -Xx86_64-mtune=znver4 -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512vl -Xx86_64-mavx512bf16
+o/$(MODE)/llama.cpp/ggml-vector-arm82.o: private TARGET_ARCH += -Xaarch64-march=armv8.2-a+fp16
+
+$(LLAMA_CPP_OBJS): llama.cpp/BUILD.mk
.PHONY: o/$(MODE)/llama.cpp
o/$(MODE)/llama.cpp: \
@@ -34,4 +67,5 @@ o/$(MODE)/llama.cpp: \
o/$(MODE)/llama.cpp/server \
o/$(MODE)/llama.cpp/imatrix \
o/$(MODE)/llama.cpp/quantize \
- o/$(MODE)/llama.cpp/perplexity
+ o/$(MODE)/llama.cpp/perplexity \
+ o/$(MODE)/llama.cpp/llama-bench
diff --git a/llama.cpp/LICENSE b/llama.cpp/LICENSE
index 83b046d827..bd835ddf71 100644
--- a/llama.cpp/LICENSE
+++ b/llama.cpp/LICENSE
@@ -1,6 +1,7 @@
MIT License
Copyright (c) 2023 Georgi Gerganov
+Copyright (c) 2023 Iwan Kawrakow
Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng.
Copyright (c) 2023 Yuji Hirose
Copyright (c) 2022 Niels Lohmann
diff --git a/llama.cpp/README.llamafile b/llama.cpp/README.llamafile
index 02f1e61b1d..68d7e73222 100644
--- a/llama.cpp/README.llamafile
+++ b/llama.cpp/README.llamafile
@@ -9,21 +9,23 @@ LICENSE
ORIGIN
https://github.com/ggerganov/llama.cpp/pull/4406/
- cb49e0f8c906e5da49e9f6d64a57742a9a241c6a
- 2024-02-27
+ 152da28ae54139e3754189b9e6e1c28e11277502
+ 2024-05-23
LOCAL MODIFICATIONS
+ - Remove MAP_POPULATE because it makes mmap(tinyllama) block for 100ms
- Refactor ggml.c, llama.cpp, and llava to use llamafile_open() APIs
- Unify main, server, and llava-cli into single llamafile program
- Make cuBLAS / hipBLAS optional by introducing tinyBLAS library
- Add support to main() programs for Cosmo /zip/.args files
- Introduce pledge() SECCOMP sandboxing to improve security
- Call exit() rather than abort() when GGML_ASSERT() fails
+ - Clamp bf16/f32 values before passing to K quantizers
- Make GPU logger callback API safer and less generic
- Write log to /dev/null when main.log fails to open
- - Use _rand64() rather than time() as default seed
- Make main and llava-cli print timings on ctrl-c
+ - Make emebeddings CLI program shell scriptable
- Avoid bind() conflicts on port 8080 w/ server
- Use runtime dispatching for matmul quants
- Remove operating system #ifdef statements
diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp
index 02d4282172..e1313a8bda 100644
--- a/llama.cpp/common.cpp
+++ b/llama.cpp/common.cpp
@@ -1,16 +1,16 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-
-// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
+// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
+
#include "common.h"
+#include "json.h"
+#include "json-schema-to-grammar.h"
+#include "llamafile/debug.h"
#include "llama.h"
-#include "ggml-cuda.h"
-#include "ggml-metal.h"
#include
#include
#include
-#include
#include
-#include
#include
#include
#include
@@ -22,6 +22,8 @@
#include
#include
#include
+#include
+#include
#if defined(__APPLE__) && defined(__MACH__)
#include
@@ -33,7 +35,6 @@
#ifndef NOMINMAX
# define NOMINMAX
#endif
-#include
#include
#include
#include
@@ -43,17 +44,44 @@
#include
#include
#endif
+#if defined(LLAMA_USE_CURL)
+#include
+#include
+#include
+#include
+#endif
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
-int32_t get_num_physical_cores() {
+#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL))
+#define GGML_USE_CUDA_SYCL
+#endif
+
+#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN)
+#define GGML_USE_CUDA_SYCL_VULKAN
+#endif
+
+#if defined(LLAMA_USE_CURL)
#ifdef __linux__
+#include
+#elif defined(_WIN32)
+#define PATH_MAX MAX_PATH
+#else
+#include
+#endif
+#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
+#endif // LLAMA_USE_CURL
+
+using json = nlohmann::ordered_json;
+
+int32_t get_num_physical_cores() {
+ if (IsLinux()) {
// enumerate the set of thread siblings, num entries is num cores
std::unordered_set siblings;
for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) {
- std::ifstream thread_siblings("/sys/devices/system/cpu"
+ std::ifstream thread_siblings("/sys/devices/system/cpu/cpu"
+ std::to_string(cpu) + "/topology/thread_siblings");
if (!thread_siblings.is_open()) {
break; // no more cpus
@@ -66,7 +94,7 @@ int32_t get_num_physical_cores() {
if (!siblings.empty()) {
return static_cast(siblings.size());
}
-#elif defined(__APPLE__) && defined(__MACH__)
+ }
int32_t num_physical_cores;
size_t len = sizeof(num_physical_cores);
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
@@ -77,14 +105,84 @@ int32_t get_num_physical_cores() {
if (result == 0) {
return num_physical_cores;
}
-#elif defined(_WIN32)
- //TODO: Implement
-#endif
unsigned int n_threads = std::thread::hardware_concurrency();
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
-void process_escapes(std::string& input) {
+#if defined(__x86_64__) && (defined(__linux__) || defined(__COSMOPOLITAN__)) && !defined(__ANDROID__)
+#include
+
+static void cpuid(unsigned leaf, unsigned subleaf,
+ unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) {
+ __asm__("movq\t%%rbx,%%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx,%%rsi"
+ : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
+ : "0"(leaf), "2"(subleaf));
+}
+
+static int pin_cpu(int cpu) {
+ cpu_set_t mask;
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask);
+}
+
+static bool is_hybrid_cpu(void) {
+ unsigned eax, ebx, ecx, edx;
+ cpuid(7, 0, &eax, &ebx, &ecx, &edx);
+ return !!(edx & (1u << 15));
+}
+
+static bool is_running_on_efficiency_core(void) {
+ unsigned eax, ebx, ecx, edx;
+ cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx);
+ int intel_atom = 0x20;
+ int core_type = (eax & 0xff000000u) >> 24;
+ return core_type == intel_atom;
+}
+
+static int count_math_cpus(int cpu_count) {
+ int result = 0;
+ for (int cpu = 0; cpu < cpu_count; ++cpu) {
+ if (pin_cpu(cpu)) {
+ return -1;
+ }
+ if (is_running_on_efficiency_core()) {
+ continue; // efficiency cores harm lockstep threading
+ }
+ ++cpu; // hyperthreading isn't useful for linear algebra
+ ++result;
+ }
+ return result;
+}
+
+#endif // __x86_64__ && __linux__
+
+/**
+ * Returns number of CPUs on system that are useful for math.
+ */
+int cpu_get_num_math() {
+#if defined(__x86_64__) && (defined(__linux__) || defined(__COSMOPOLITAN__)) && !defined(__ANDROID__)
+ int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpu_count < 1) {
+ return get_num_physical_cores();
+ }
+ if (is_hybrid_cpu()) {
+ cpu_set_t affinity;
+ if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) {
+ int result = count_math_cpus(cpu_count);
+ pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
+ if (result > 0) {
+ return result;
+ }
+ }
+ }
+#endif
+ return get_num_physical_cores();
+}
+
+void process_escapes(std::string & input) {
std::size_t input_len = input.length();
std::size_t output_idx = 0;
@@ -125,786 +223,1204 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
bool result = true;
try {
if (!gpt_params_parse_ex(argc, argv, params)) {
- gpt_print_usage(argc, argv, gpt_params());
+ // [jart] don't show help if user didn't ask for help
+ // gpt_print_usage(argc, argv, gpt_params());
exit(0);
}
}
catch (const std::invalid_argument & ex) {
fprintf(stderr, "%s\n", ex.what());
- gpt_print_usage(argc, argv, gpt_params());
+ // [jart] don't show help if user didn't ask for help
+ // gpt_print_usage(argc, argv, gpt_params());
exit(1);
}
- if (FLAG_gpu == LLAMAFILE_GPU_DISABLE) {
- params.n_gpu_layers = 0;
- }
return result;
}
-bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
- bool invalid_param = false;
- std::string arg;
- bool passed_gpu_flags = false;
- const std::string arg_prefix = "--";
- llama_sampling_params & sparams = params.sparams;
-
- for (int i = 1; i < argc; i++) {
- arg = argv[i];
- if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
- std::replace(arg.begin(), arg.end(), '_', '-');
+bool parse_kv_override(const char * data, std::vector & overrides) {
+ const char * sep = strchr(data, '=');
+ if (sep == nullptr || sep - data >= 128) {
+ fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
+ return false;
+ }
+ llama_model_kv_override kvo;
+ std::strncpy(kvo.key, data, sep - data);
+ kvo.key[sep - data] = 0;
+ sep++;
+ if (strncmp(sep, "int:", 4) == 0) {
+ sep += 4;
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
+ kvo.val_i64 = std::atol(sep);
+ } else if (strncmp(sep, "float:", 6) == 0) {
+ sep += 6;
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
+ kvo.val_f64 = std::atof(sep);
+ } else if (strncmp(sep, "bool:", 5) == 0) {
+ sep += 5;
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
+ if (std::strcmp(sep, "true") == 0) {
+ kvo.val_bool = true;
+ } else if (std::strcmp(sep, "false") == 0) {
+ kvo.val_bool = false;
+ } else {
+ fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
+ return false;
}
+ } else if (strncmp(sep, "str:", 4) == 0) {
+ sep += 4;
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
+ if (strlen(sep) > 127) {
+ fprintf(stderr, "%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
+ return false;
+ }
+ strncpy(kvo.val_str, sep, 127);
+ kvo.val_str[127] = '\0';
+ } else {
+ fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
+ return false;
+ }
+ overrides.emplace_back(std::move(kvo));
+ return true;
+}
- if (arg == "--cli") {
- // do nothing
- } else if (arg == "-s" || arg == "--seed") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.seed = std::stoul(argv[i]);
- } else if (arg == "-t" || arg == "--threads") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_threads = std::stoi(argv[i]);
- if (params.n_threads <= 0) {
- params.n_threads = std::thread::hardware_concurrency();
- }
- } else if (arg == "-tb" || arg == "--threads-batch") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_threads_batch = std::stoi(argv[i]);
- if (params.n_threads_batch <= 0) {
- params.n_threads_batch = std::thread::hardware_concurrency();
- }
- } else if (arg == "-td" || arg == "--threads-draft") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_threads_draft = std::stoi(argv[i]);
- if (params.n_threads_draft <= 0) {
- params.n_threads_draft = std::thread::hardware_concurrency();
- }
- } else if (arg == "-tbd" || arg == "--threads-batch-draft") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_threads_batch_draft = std::stoi(argv[i]);
- if (params.n_threads_batch_draft <= 0) {
- params.n_threads_batch_draft = std::thread::hardware_concurrency();
- }
- } else if (arg == "-p" || arg == "--prompt") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.prompt = argv[i];
- } else if (arg == "-e" || arg == "--escape") {
- params.escape = true;
- } else if (arg == "--prompt-cache") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.path_prompt_cache = argv[i];
- } else if (arg == "--prompt-cache-all") {
- params.prompt_cache_all = true;
- } else if (arg == "--prompt-cache-ro") {
- params.prompt_cache_ro = true;
- } else if (arg == "-bf" || arg == "--binary-file") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::ifstream file(argv[i], std::ios::binary);
- if (!file) {
- fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
- invalid_param = true;
- break;
- }
- // store the external file name in params
- params.prompt_file = argv[i];
- std::ostringstream ss;
- ss << file.rdbuf();
- params.prompt = ss.str();
- fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), argv[i]);
- } else if (arg == "-f" || arg == "--file") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::ifstream file(argv[i]);
- if (!file) {
- fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
- invalid_param = true;
- break;
- }
- // store the external file name in params
- params.prompt_file = argv[i];
- std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt));
- if (!params.prompt.empty() && params.prompt.back() == '\n') {
- params.prompt.pop_back();
- }
- } else if (arg == "-n" || arg == "--n-predict") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_predict = std::stoi(argv[i]);
- } else if (arg == "--top-k") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.top_k = std::stoi(argv[i]);
- } else if (arg == "-c" || arg == "--ctx-size") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_ctx = std::stoi(argv[i]);
- } else if (arg == "--grp-attn-n" || arg == "-gan") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
-
- params.grp_attn_n = std::stoi(argv[i]);
- } else if (arg == "--grp-attn-w" || arg == "-gaw") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
-
- params.grp_attn_w = std::stoi(argv[i]);
- } else if (arg == "--rope-freq-base") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.rope_freq_base = std::stof(argv[i]);
- } else if (arg == "--rope-freq-scale") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.rope_freq_scale = std::stof(argv[i]);
- } else if (arg == "--rope-scaling") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::string value(argv[i]);
- /**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
- else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
- else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
- else { invalid_param = true; break; }
- } else if (arg == "--rope-scale") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.rope_freq_scale = 1.0f/std::stof(argv[i]);
- } else if (arg == "--yarn-orig-ctx") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.yarn_orig_ctx = std::stoi(argv[i]);
- } else if (arg == "--yarn-ext-factor") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.yarn_ext_factor = std::stof(argv[i]);
- } else if (arg == "--yarn-attn-factor") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.yarn_attn_factor = std::stof(argv[i]);
- } else if (arg == "--yarn-beta-fast") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.yarn_beta_fast = std::stof(argv[i]);
- } else if (arg == "--yarn-beta-slow") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.yarn_beta_slow = std::stof(argv[i]);
- } else if (arg == "--defrag-thold" || arg == "-dt") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.defrag_thold = std::stof(argv[i]);
- } else if (arg == "--samplers") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- const auto sampler_names = string_split(argv[i], ';');
- sparams.samplers_sequence = sampler_types_from_names(sampler_names, true);
- } else if (arg == "--sampling-seq") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.samplers_sequence = sampler_types_from_chars(argv[i]);
- } else if (arg == "--top-p") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.top_p = std::stof(argv[i]);
- } else if (arg == "--min-p") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.min_p = std::stof(argv[i]);
- } else if (arg == "--temp") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.temp = std::stof(argv[i]);
- sparams.temp = std::max(sparams.temp, 0.0f);
- } else if (arg == "--tfs") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.tfs_z = std::stof(argv[i]);
- } else if (arg == "--typical") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.typical_p = std::stof(argv[i]);
- } else if (arg == "--repeat-last-n") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.penalty_last_n = std::stoi(argv[i]);
- sparams.n_prev = std::max(sparams.n_prev, sparams.penalty_last_n);
- } else if (arg == "--repeat-penalty") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.penalty_repeat = std::stof(argv[i]);
- } else if (arg == "--frequency-penalty") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.penalty_freq = std::stof(argv[i]);
- } else if (arg == "--presence-penalty") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.penalty_present = std::stof(argv[i]);
- } else if (arg == "--dynatemp-range") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.dynatemp_range = std::stof(argv[i]);
- } else if (arg == "--dynatemp-exp") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.dynatemp_exponent = std::stof(argv[i]);
- } else if (arg == "--mirostat") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.mirostat = std::stoi(argv[i]);
- } else if (arg == "--mirostat-lr") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.mirostat_eta = std::stof(argv[i]);
- } else if (arg == "--mirostat-ent") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.mirostat_tau = std::stof(argv[i]);
- } else if (arg == "--cfg-negative-prompt") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.cfg_negative_prompt = argv[i];
- } else if (arg == "--cfg-negative-prompt-file") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::ifstream file(argv[i]);
- if (!file) {
- fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
- invalid_param = true;
- break;
- }
- std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(sparams.cfg_negative_prompt));
- if (!sparams.cfg_negative_prompt.empty() && sparams.cfg_negative_prompt.back() == '\n') {
- sparams.cfg_negative_prompt.pop_back();
- }
- } else if (arg == "--cfg-scale") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.cfg_scale = std::stof(argv[i]);
- } else if (arg == "-b" || arg == "--batch-size") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_batch = std::stoi(argv[i]);
- } else if (arg == "--keep") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_keep = std::stoi(argv[i]);
- } else if (arg == "--draft") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_draft = std::stoi(argv[i]);
- } else if (arg == "--chunks") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_chunks = std::stoi(argv[i]);
- } else if (arg == "-np" || arg == "--parallel") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_parallel = std::stoi(argv[i]);
- } else if (arg == "-ns" || arg == "--sequences") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_sequences = std::stoi(argv[i]);
- } else if (arg == "--p-accept" || arg == "-pa") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.p_accept = std::stof(argv[i]);
- } else if (arg == "--p-split" || arg == "-ps") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.p_split = std::stof(argv[i]);
- } else if (arg == "-m" || arg == "--model") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.model = argv[i];
- } else if (arg == "-md" || arg == "--model-draft") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.model_draft = argv[i];
- } else if (arg == "-a" || arg == "--alias") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.model_alias = argv[i];
- } else if (arg == "--lora") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.lora_adapter.emplace_back(argv[i], 1.0f);
- params.use_mmap = false;
- } else if (arg == "--lora-scaled") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- const char * lora_adapter = argv[i];
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
- params.use_mmap = false;
- } else if (arg == "--lora-base") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.lora_base = argv[i];
- } else if (arg == "--mmproj") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.mmproj = argv[i];
- } else if (arg == "--image") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.image = argv[i];
- } else if (arg == "-i" || arg == "--interactive") {
- params.interactive = true;
- } else if (arg == "--embedding") {
- params.embedding = true;
- } else if (arg == "--interactive-first") {
- params.interactive_first = true;
- } else if (arg == "-ins" || arg == "--instruct") {
- params.instruct = true;
- } else if (arg == "-cml" || arg == "--chatml") {
- params.chatml = true;
- } else if (arg == "--infill") {
- params.infill = true;
- } else if (arg == "--unsecure") {
- FLAG_unsecure = true;
- } else if (arg == "--nocompile") {
- FLAG_nocompile = true;
- } else if (arg == "--recompile") {
- FLAG_recompile = true;
- } else if (arg == "--tinyblas") {
- FLAG_tinyblas = true; // undocumented
- } else if (arg == "--gpu") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- FLAG_gpu = llamafile_gpu_parse(argv[i]);
- if (FLAG_gpu == LLAMAFILE_GPU_ERROR) {
- fprintf(stderr, "error: invalid --gpu flag value: %s\n", argv[i]);
- exit(1);
- }
- } else if (arg == "-dkvc" || arg == "--dump-kv-cache") {
- params.dump_kv_cache = true;
- } else if (arg == "-nkvo" || arg == "--no-kv-offload") {
- params.no_kv_offload = true;
- } else if (arg == "-ctk" || arg == "--cache-type-k") {
- params.cache_type_k = argv[++i];
- } else if (arg == "-ctv" || arg == "--cache-type-v") {
- params.cache_type_v = argv[++i];
- } else if (arg == "--multiline-input") {
- params.multiline_input = true;
- } else if (arg == "--simple-io") {
- params.simple_io = true;
- } else if (arg == "-cb" || arg == "--cont-batching") {
- params.cont_batching = true;
- } else if (arg == "--color") {
- params.use_color = true;
- } else if (arg == "--mlock") {
- params.use_mlock = true;
- } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") {
- passed_gpu_flags = true;
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_gpu_layers = std::stoi(argv[i]);
- if (params.n_gpu_layers <= 0) {
- FLAG_gpu = LLAMAFILE_GPU_DISABLE;
- }
- } else if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") {
- passed_gpu_flags = true;
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_gpu_layers_draft = std::stoi(argv[i]);
- } else if (arg == "--main-gpu" || arg == "-mg") {
- passed_gpu_flags = true;
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.main_gpu = std::stoi(argv[i]);
- } else if (arg == "--split-mode" || arg == "-sm") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::string arg_next = argv[i];
- if (arg_next == "none") {
- params.split_mode = LLAMA_SPLIT_MODE_NONE;
- } else if (arg_next == "layer") {
- params.split_mode = LLAMA_SPLIT_MODE_LAYER;
- } else if (arg_next == "row") {
- params.split_mode = LLAMA_SPLIT_MODE_ROW;
- } else {
- invalid_param = true;
- break;
- }
-
- } else if (arg == "--tensor-split" || arg == "-ts") {
- passed_gpu_flags = true;
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::string arg_next = argv[i];
-
- // split string by , and /
- const std::regex regex{R"([,/]+)"};
- std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1};
- std::vector split_arg{it, {}};
- if (split_arg.size() >= llama_max_devices()) {
- invalid_param = true;
- break;
- }
- for (size_t i = 0; i < llama_max_devices(); ++i) {
- if (i < split_arg.size()) {
- params.tensor_split[i] = std::stof(split_arg[i]);
- } else {
- params.tensor_split[i] = 0.0f;
- }
- }
- } else if (arg == "--no-mmap") {
- params.use_mmap = false;
- } else if (arg == "--numa") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::string value(argv[i]);
- /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; }
- else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
- else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
- else { invalid_param = true; break; }
- } else if (arg == "--verbose-prompt") {
- params.verbose_prompt = true;
- } else if (arg == "--no-display-prompt" || arg == "--silent-prompt") {
- params.display_prompt = false;
- } else if (arg == "-r" || arg == "--reverse-prompt") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.antiprompt.emplace_back(argv[i]);
- } else if (arg == "-ld" || arg == "--logdir") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.logdir = argv[i];
+bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) {
+ llama_sampling_params & sparams = params.sparams;
- if (params.logdir.back() != DIRECTORY_SEPARATOR) {
- params.logdir += DIRECTORY_SEPARATOR;
- }
- } else if (arg == "--save-all-logits" || arg == "--kl-divergence-base") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.logits_file = argv[i];
- } else if (arg == "--perplexity" || arg == "--all-logits") {
- params.logits_all = true;
- } else if (arg == "--ppl-stride") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.ppl_stride = std::stoi(argv[i]);
- } else if (arg == "-ptc" || arg == "--print-token-count") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_print = std::stoi(argv[i]);
- } else if (arg == "--ppl-output-type") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.ppl_output_type = std::stoi(argv[i]);
- } else if (arg == "--hellaswag") {
- params.hellaswag = true;
- } else if (arg == "--hellaswag-tasks") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.hellaswag_tasks = std::stoi(argv[i]);
- } else if (arg == "--winogrande") {
- params.winogrande = true;
- } else if (arg == "--winogrande-tasks") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.winogrande_tasks = std::stoi(argv[i]);
- } else if (arg == "--multiple-choice") {
- params.multiple_choice = true;
- } else if (arg == "--multiple-choice-tasks") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.multiple_choice_tasks = std::stoi(argv[i]);
- } else if (arg == "--kl-divergence") {
- params.kl_divergence = true;
- } else if (arg == "--ignore-eos") {
- params.ignore_eos = true;
- } else if (arg == "--no-penalize-nl") {
- sparams.penalize_nl = false;
- } else if (arg == "-l" || arg == "--logit-bias") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::stringstream ss(argv[i]);
- llama_token key;
- char sign;
- std::string value_str;
- try {
- if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) {
- sparams.logit_bias[key] = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f);
- } else {
- throw std::exception();
- }
- } catch (const std::exception&) {
- invalid_param = true;
- break;
- }
- } else if (arg == "-h" || arg == "--help") {
- return false;
+ if (arg == "--cli") {
+ return true;
+ }
+ if (arg == "--fast") {
+ FLAG_precise = false;
+ return true;
+ }
+ if (arg == "--precise") {
+ FLAG_precise = true;
+ return true;
+ }
+ if (arg == "--trap") {
+ FLAG_trap = true;
+ FLAG_unsecure = true; // for better backtraces
+ llamafile_trapping_enabled(+1);
+ return true;
+ }
+ if (arg == "--unsecure") {
+ FLAG_unsecure = true;
+ return true;
+ }
+ if (arg == "--nocompile") {
+ FLAG_nocompile = true;
+ return true;
+ }
+ if (arg == "--recompile") {
+ FLAG_recompile = true;
+ return true;
+ }
+ if (arg == "--tinyblas") {
+ FLAG_tinyblas = true; // undocumented
+ return true;
+ }
+ if (arg == "--gpu") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ FLAG_gpu = llamafile_gpu_parse(argv[i]);
+ if (FLAG_gpu == LLAMAFILE_GPU_ERROR) {
+ fprintf(stderr, "error: invalid --gpu flag value: %s\n", argv[i]);
+ exit(1);
+ }
+ return true;
+ }
- } else if (arg == "--version") {
- fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
- fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
- exit(0);
- } else if (arg == "--random-prompt") {
- params.random_prompt = true;
- } else if (arg == "--in-prefix-bos") {
- params.input_prefix_bos = true;
- } else if (arg == "--in-prefix") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.input_prefix = argv[i];
- } else if (arg == "--in-suffix") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.input_suffix = argv[i];
- } else if (arg == "--grammar") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- sparams.grammar = argv[i];
- } else if (arg == "--grammar-file") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- std::ifstream file(argv[i]);
- if (!file) {
- fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
- invalid_param = true;
- break;
- }
- std::copy(
- std::istreambuf_iterator(file),
- std::istreambuf_iterator(),
- std::back_inserter(sparams.grammar)
- );
- } else if (arg == "--override-kv") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- char * sep = strchr(argv[i], '=');
- if (sep == nullptr || sep - argv[i] >= 128) {
- fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
- invalid_param = true;
- break;
- }
- struct llama_model_kv_override kvo;
- std::strncpy(kvo.key, argv[i], sep - argv[i]);
- kvo.key[sep - argv[i]] = 0;
- sep++;
- if (strncmp(sep, "int:", 4) == 0) {
- sep += 4;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
- kvo.int_value = std::atol(sep);
- } else if (strncmp(sep, "float:", 6) == 0) {
- sep += 6;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
- kvo.float_value = std::atof(sep);
- } else if (strncmp(sep, "bool:", 5) == 0) {
- sep += 5;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
- if (std::strcmp(sep, "true") == 0) {
- kvo.bool_value = true;
- } else if (std::strcmp(sep, "false") == 0) {
- kvo.bool_value = false;
- } else {
- fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
- invalid_param = true;
- break;
- }
- } else {
- fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
- invalid_param = true;
- break;
- }
- params.kv_overrides.push_back(kvo);
+ if (arg == "-s" || arg == "--seed") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ // This is temporary, in the future the samplign state will be moved fully to llama_sampling_context.
+ params.seed = std::stoul(argv[i]);
+ sparams.seed = std::stoul(argv[i]);
+ return true;
+ }
+ if (arg == "-t" || arg == "--threads") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_threads = std::stoi(argv[i]);
+ if (params.n_threads <= 0) {
+ params.n_threads = std::thread::hardware_concurrency();
+ }
+ return true;
+ }
+ if (arg == "-tb" || arg == "--threads-batch") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_threads_batch = std::stoi(argv[i]);
+ if (params.n_threads_batch <= 0) {
+ params.n_threads_batch = std::thread::hardware_concurrency();
+ }
+ return true;
+ }
+ if (arg == "-td" || arg == "--threads-draft") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_threads_draft = std::stoi(argv[i]);
+ if (params.n_threads_draft <= 0) {
+ params.n_threads_draft = std::thread::hardware_concurrency();
+ }
+ return true;
+ }
+ if (arg == "-tbd" || arg == "--threads-batch-draft") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_threads_batch_draft = std::stoi(argv[i]);
+ if (params.n_threads_batch_draft <= 0) {
+ params.n_threads_batch_draft = std::thread::hardware_concurrency();
+ }
+ return true;
+ }
+ if (arg == "-p" || arg == "--prompt") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.prompt = argv[i];
+ return true;
+ }
+ if (arg == "-e" || arg == "--escape") {
+ params.escape = true;
+ return true;
+ }
+ if (arg == "--prompt-cache") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.path_prompt_cache = argv[i];
+ return true;
+ }
+ if (arg == "--prompt-cache-all") {
+ params.prompt_cache_all = true;
+ return true;
+ }
+ if (arg == "--prompt-cache-ro") {
+ params.prompt_cache_ro = true;
+ return true;
+ }
+ if (arg == "-bf" || arg == "--binary-file") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::ifstream file(argv[i], std::ios::binary);
+ if (!file) {
+ fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
+ invalid_param = true;
+ return true;
+ }
+ // store the external file name in params
+ params.prompt_file = argv[i];
+ std::ostringstream ss;
+ ss << file.rdbuf();
+ params.prompt = ss.str();
+ fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), argv[i]);
+ return true;
+ }
+ if (arg == "-f" || arg == "--file") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::ifstream file(argv[i]);
+ if (!file) {
+ fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
+ invalid_param = true;
+ return true;
+ }
+ // store the external file name in params
+ params.prompt_file = argv[i];
+ std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt));
+ if (!params.prompt.empty() && params.prompt.back() == '\n') {
+ params.prompt.pop_back();
+ }
+ return true;
+ }
+ if (arg == "-n" || arg == "--n-predict") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_predict = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--top-k") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.top_k = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "-c" || arg == "--ctx-size") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_ctx = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--grp-attn-n" || arg == "-gan") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.grp_attn_n = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--grp-attn-w" || arg == "-gaw") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.grp_attn_w = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--rope-freq-base") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.rope_freq_base = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--rope-freq-scale") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.rope_freq_scale = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--rope-scaling") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::string value(argv[i]);
+ /**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
+ else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
+ else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
+ else { invalid_param = true; }
+ return true;
+ }
+ if (arg == "--rope-scale") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.rope_freq_scale = 1.0f / std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--yarn-orig-ctx") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.yarn_orig_ctx = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--yarn-ext-factor") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.yarn_ext_factor = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--yarn-attn-factor") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.yarn_attn_factor = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--yarn-beta-fast") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.yarn_beta_fast = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--yarn-beta-slow") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.yarn_beta_slow = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--pooling") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::string value(argv[i]);
+ /**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; }
+ else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
+ else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
+ else { invalid_param = true; }
+ return true;
+ }
+ if (arg == "--defrag-thold" || arg == "-dt") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.defrag_thold = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--samplers") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ const auto sampler_names = string_split(argv[i], ';');
+ sparams.samplers_sequence = sampler_types_from_names(sampler_names, true);
+ return true;
+ }
+ if (arg == "--sampling-seq") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.samplers_sequence = sampler_types_from_chars(argv[i]);
+ return true;
+ }
+ if (arg == "--top-p") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.top_p = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--min-p") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.min_p = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--temp") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.temp = std::stof(argv[i]);
+ sparams.temp = std::max(sparams.temp, 0.0f);
+ return true;
+ }
+ if (arg == "--tfs") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.tfs_z = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--typical") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.typical_p = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--repeat-last-n") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.penalty_last_n = std::stoi(argv[i]);
+ sparams.n_prev = std::max(sparams.n_prev, sparams.penalty_last_n);
+ return true;
+ }
+ if (arg == "--repeat-penalty") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.penalty_repeat = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--frequency-penalty") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.penalty_freq = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--presence-penalty") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.penalty_present = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--dynatemp-range") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.dynatemp_range = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--dynatemp-exp") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.dynatemp_exponent = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--mirostat") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.mirostat = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--mirostat-lr") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.mirostat_eta = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--mirostat-ent") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.mirostat_tau = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "--cfg-negative-prompt") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.cfg_negative_prompt = argv[i];
+ return true;
+ }
+ if (arg == "--cfg-negative-prompt-file") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::ifstream file(argv[i]);
+ if (!file) {
+ fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
+ invalid_param = true;
+ return true;
+ }
+ std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(sparams.cfg_negative_prompt));
+ if (!sparams.cfg_negative_prompt.empty() && sparams.cfg_negative_prompt.back() == '\n') {
+ sparams.cfg_negative_prompt.pop_back();
+ }
+ return true;
+ }
+ if (arg == "--cfg-scale") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.cfg_scale = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "-b" || arg == "--batch-size") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_batch = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "-ub" || arg == "--ubatch-size") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_ubatch = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--keep") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_keep = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--draft") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_draft = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--chunks") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_chunks = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "-np" || arg == "--parallel") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_parallel = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "-ns" || arg == "--sequences") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_sequences = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--p-split" || arg == "-ps") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.p_split = std::stof(argv[i]);
+ return true;
+ }
+ if (arg == "-m" || arg == "--model") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.model = argv[i];
+ return true;
+ }
+ if (arg == "-md" || arg == "--model-draft") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.model_draft = argv[i];
+ return true;
+ }
+ if (arg == "-a" || arg == "--alias") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.model_alias = argv[i];
+ return true;
+ }
+ if (arg == "-mu" || arg == "--model-url") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.model_url = argv[i];
+ return true;
+ }
+ if (arg == "-hfr" || arg == "--hf-repo") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.hf_repo = argv[i];
+ return true;
+ }
+ if (arg == "-hff" || arg == "--hf-file") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.hf_file = argv[i];
+ return true;
+ }
+ if (arg == "--lora") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.lora_adapter.emplace_back(argv[i], 1.0f);
+ params.use_mmap = false;
+ return true;
+ }
+ if (arg == "--lora-scaled") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ const char* lora_adapter = argv[i];
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
+ params.use_mmap = false;
+ return true;
+ }
+ if (arg == "--lora-base") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.lora_base = argv[i];
+ return true;
+ }
+ if (arg == "--control-vector") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.control_vectors.push_back({ 1.0f, argv[i], });
+ return true;
+ }
+ if (arg == "--control-vector-scaled") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ const char* fname = argv[i];
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.control_vectors.push_back({ std::stof(argv[i]), fname, });
+ return true;
+ }
+ if (arg == "--control-vector-layer-range") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.control_vector_layer_start = std::stoi(argv[i]);
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.control_vector_layer_end = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--mmproj") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.mmproj = argv[i];
+ return true;
+ }
+ if (arg == "--image") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.image.emplace_back(argv[i]);
+ return true;
+ }
+ if (arg == "-i" || arg == "--interactive") {
+ params.interactive = true;
+ return true;
+ }
+ if (arg == "--interactive-specials") {
+ params.interactive_specials = true;
+ return true;
+ }
+ if (arg == "--embedding") {
+ params.embedding = true;
+ return true;
+ }
+ if (arg == "--interactive-first") {
+ params.interactive_first = true;
+ return true;
+ }
+ if (arg == "-ins" || arg == "--instruct") {
+ params.instruct = true;
+ return true;
+ }
+ if (arg == "-cnv" || arg == "--conversation") {
+ params.conversation = true;
+ return true;
+ }
+ if (arg == "-cml" || arg == "--chatml") {
+ params.chatml = true;
+ return true;
+ }
+ if (arg == "--infill") {
+ params.infill = true;
+ return true;
+ }
+ if (arg == "-dkvc" || arg == "--dump-kv-cache") {
+ params.dump_kv_cache = true;
+ return true;
+ }
+ if (arg == "-nkvo" || arg == "--no-kv-offload") {
+ params.no_kv_offload = true;
+ return true;
+ }
+ if (arg == "-ctk" || arg == "--cache-type-k") {
+ params.cache_type_k = argv[++i];
+ return true;
+ }
+ if (arg == "-ctv" || arg == "--cache-type-v") {
+ params.cache_type_v = argv[++i];
+ return true;
+ }
+ if (arg == "--multiline-input") {
+ params.multiline_input = true;
+ return true;
+ }
+ if (arg == "--simple-io") {
+ params.simple_io = true;
+ return true;
+ }
+ if (arg == "-cb" || arg == "--cont-batching") {
+ params.cont_batching = true;
+ return true;
+ }
+ if (arg == "-fa" || arg == "--flash-attn") {
+ params.flash_attn = true;
+ return true;
+ }
+ if (arg == "--color") {
+ params.use_color = true;
+ return true;
+ }
+ if (arg == "--mlock") {
+ params.use_mlock = true;
+ return true;
+ }
+ if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_gpu_layers = std::stoi(argv[i]);
+ if (params.n_gpu_layers <= 0)
+ FLAG_gpu = LLAMAFILE_GPU_DISABLE;
+ return true;
+ }
+ if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_gpu_layers_draft = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--main-gpu" || arg == "-mg") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.main_gpu = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--split-mode" || arg == "-sm") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::string arg_next = argv[i];
+ if (arg_next == "none") {
+ params.split_mode = LLAMA_SPLIT_MODE_NONE;
+ }
+ else if (arg_next == "layer") {
+ params.split_mode = LLAMA_SPLIT_MODE_LAYER;
+ }
+ else if (arg_next == "row") {
+ params.split_mode = LLAMA_SPLIT_MODE_ROW;
+ }
+ else {
+ invalid_param = true;
+ return true;
+ }
+ return true;
+ }
+ if (arg == "--tensor-split" || arg == "-ts") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::string arg_next = argv[i];
+
+ // split string by , and /
+ const std::regex regex{ R"([,/]+)" };
+ std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 };
+ std::vector split_arg{ it, {} };
+ if (split_arg.size() >= llama_max_devices()) {
+ invalid_param = true;
+ return true;
+ }
+ for (size_t i = 0; i < llama_max_devices(); ++i) {
+ if (i < split_arg.size()) {
+ params.tensor_split[i] = std::stof(split_arg[i]);
+ }
+ else {
+ params.tensor_split[i] = 0.0f;
+ }
+ }
+ return true;
+ }
+ if (arg == "--rpc") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.rpc_servers = argv[i];
+ return true;
+ }
+ if (arg == "--no-mmap") {
+ params.use_mmap = false;
+ return true;
+ }
+ if (arg == "--numa") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::string value(argv[i]);
+ /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; }
+ else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
+ else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
+ else { invalid_param = true; }
+ return true;
+ }
+ if (arg == "--verbose-prompt") {
+ params.verbose_prompt = true;
+ return true;
+ }
+ if (arg == "--no-display-prompt" || arg == "--silent-prompt") {
+ params.display_prompt = false;
+ return true;
+ }
+ if (arg == "-r" || arg == "--reverse-prompt") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.antiprompt.emplace_back(argv[i]);
+ return true;
+ }
+ if (arg == "-ld" || arg == "--logdir") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.logdir = argv[i];
+
+ if (params.logdir.back() != DIRECTORY_SEPARATOR) {
+ params.logdir += DIRECTORY_SEPARATOR;
+ }
+ return true;
+ }
+ if (arg == "-lcs" || arg == "--lookup-cache-static") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.lookup_cache_static = argv[i];
+ return true;
+ }
+ if (arg == "-lcd" || arg == "--lookup-cache-dynamic") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.lookup_cache_dynamic = argv[i];
+ return true;
+ }
+ if (arg == "--save-all-logits" || arg == "--kl-divergence-base") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.logits_file = argv[i];
+ return true;
+ }
+ if (arg == "--perplexity" || arg == "--all-logits") {
+ params.logits_all = true;
+ return true;
+ }
+ if (arg == "--ppl-stride") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.ppl_stride = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "-ptc" || arg == "--print-token-count") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.n_print = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--check-tensors") {
+ params.check_tensors = true;
+ return true;
+ }
+ if (arg == "--ppl-output-type") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.ppl_output_type = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--hellaswag") {
+ params.hellaswag = true;
+ return true;
+ }
+ if (arg == "--hellaswag-tasks") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.hellaswag_tasks = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--winogrande") {
+ params.winogrande = true;
+ return true;
+ }
+ if (arg == "--winogrande-tasks") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.winogrande_tasks = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--multiple-choice") {
+ params.multiple_choice = true;
+ return true;
+ }
+ if (arg == "--multiple-choice-tasks") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.multiple_choice_tasks = std::stoi(argv[i]);
+ return true;
+ }
+ if (arg == "--kl-divergence") {
+ params.kl_divergence = true;
+ return true;
+ }
+ if (arg == "--ignore-eos") {
+ params.ignore_eos = true;
+ return true;
+ }
+ if (arg == "--penalize-nl") {
+ sparams.penalize_nl = true;
+ return true;
+ }
+ if (arg == "-l" || arg == "--logit-bias") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::stringstream ss(argv[i]);
+ llama_token key;
+ char sign;
+ std::string value_str;
+ try {
+ if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) {
+ sparams.logit_bias[key] = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f);
+ }
+ else {
+ throw std::exception();
+ }
+ }
+ catch (const std::exception&) {
+ invalid_param = true;
+ return true;
+ }
+ return true;
+ }
+ if (arg == "-h" || arg == "--help") {
+ gpt_print_usage(argc, argv, gpt_params());
+ exit(0);
+ }
+ if (arg == "--version") {
+ fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
+ fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
+ exit(0);
+ }
+ if (arg == "--random-prompt") {
+ params.random_prompt = true;
+ return true;
+ }
+ if (arg == "--in-prefix-bos") {
+ params.input_prefix_bos = true;
+ return true;
+ }
+ if (arg == "--in-prefix") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.input_prefix = argv[i];
+ return true;
+ }
+ if (arg == "--in-suffix") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.input_suffix = argv[i];
+ return true;
+ }
+ if (arg == "--grammar") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.grammar = argv[i];
+ return true;
+ }
+ if (arg == "--grammar-file") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ std::ifstream file(argv[i]);
+ if (!file) {
+ fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
+ invalid_param = true;
+ return true;
+ }
+ std::copy(
+ std::istreambuf_iterator(file),
+ std::istreambuf_iterator(),
+ std::back_inserter(sparams.grammar)
+ );
+ return true;
+ }
+ if (arg == "-j" || arg == "--json-schema") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ sparams.grammar = json_schema_to_grammar(json::parse(argv[i]));
+ return true;
+ }
+ if (arg == "--override-kv") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ if (!parse_kv_override(argv[i], params.kv_overrides)) {
+ fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
+ invalid_param = true;
+ return true;
+ }
+ return true;
+ }
#ifndef LOG_DISABLE_LOGS
- // Parse args for logging parameters
- } else if ( log_param_single_parse( argv[i] ) ) {
- // Do nothing, log_param_single_parse automatically does it's thing
- // and returns if a match was found and parsed.
- } else if ( log_param_pair_parse( /*check_but_dont_parse*/ true, argv[i] ) ) {
- // We have a matching known parameter requiring an argument,
- // now we need to check if there is anything after this argv
- // and flag invalid_param or parse it.
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- if( !log_param_pair_parse( /*check_but_dont_parse*/ false, argv[i-1], argv[i]) ) {
- invalid_param = true;
- break;
- }
- // End of Parse args for logging parameters
+ // Parse args for logging parameters
+ if (log_param_single_parse(argv[i])) {
+ // Do nothing, log_param_single_parse automatically does it's thing
+ // and returns if a match was found and parsed.
+ return true;
+ }
+ if (log_param_pair_parse( /*check_but_dont_parse*/ true, argv[i])) {
+ // We have a matching known parameter requiring an argument,
+ // now we need to check if there is anything after this argv
+ // and flag invalid_param or parse it.
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ if (!log_param_pair_parse( /*check_but_dont_parse*/ false, argv[i - 1], argv[i])) {
+ invalid_param = true;
+ return true;
+ }
+ return true;
+ }
+ // End of Parse args for logging parameters
#endif // LOG_DISABLE_LOGS
- } else {
- throw std::invalid_argument("error: unknown argument: " + arg);
+
+ return false;
+}
+
+void gpt_params_handle_model_default(gpt_params & params) {
+ if (!params.hf_repo.empty()) {
+ // short-hand to avoid specifying --hf-file -> default it to --model
+ if (params.hf_file.empty()) {
+ if (params.model.empty()) {
+ throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n");
+ }
+ params.hf_file = params.model;
+ } else if (params.model.empty()) {
+ std::string cache_directory = get_cache_directory();
+ const bool success = create_directory_with_parents(cache_directory);
+ if (!success) {
+ throw std::runtime_error("failed to create cache directory: " + cache_directory);
+ }
+ params.model = cache_directory + string_split(params.hf_file, '/').back();
+ }
+ } else if (!params.model_url.empty()) {
+ if (params.model.empty()) {
+ auto f = string_split(params.model_url, '#').front();
+ f = string_split(f, '?').front();
+ f = string_split(f, '/').back();
+ params.model = "models/" + f;
}
+ } else if (params.model.empty()) {
+ params.model = DEFAULT_MODEL_PATH;
}
- if (invalid_param) {
- throw std::invalid_argument("error: invalid parameter for argument: " + arg);
+}
+
+bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
+ bool invalid_param = false;
+ std::string arg;
+ const std::string arg_prefix = "--";
+ llama_sampling_params & sparams = params.sparams;
+
+ for (int i = 1; i < argc; i++) {
+ arg = argv[i];
+ if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
+ std::replace(arg.begin(), arg.end(), '_', '-');
+ }
+ if (!gpt_params_find_arg(argc, argv, arg, params, i, invalid_param)) {
+ throw std::invalid_argument("error: unknown argument: " + arg);
+ }
+ if (invalid_param) {
+ throw std::invalid_argument("error: invalid parameter for argument: " + arg);
+ }
}
+
if (params.prompt_cache_all &&
(params.interactive || params.interactive_first ||
params.instruct)) {
@@ -912,6 +1428,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
}
+ gpt_params_handle_model_default(params);
+
if (params.escape) {
process_escapes(params.prompt);
process_escapes(params.input_prefix);
@@ -950,7 +1468,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" -h, --help show this help message and exit\n");
printf(" --version show version and build info\n");
printf(" -i, --interactive run in interactive mode\n");
+ printf(" --interactive-specials allow special tokens in user text, in interactive mode\n");
printf(" --interactive-first run in interactive mode and wait for input right away\n");
+ printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n");
printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n");
printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n");
printf(" --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n");
@@ -983,7 +1503,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" binary file containing multiple choice tasks.\n");
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
- printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
+ printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
+ printf(" -ub N, --ubatch-size N\n");
+ printf(" physical maximum batch size (default: %d)\n", params.n_ubatch);
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
printf(" (default: %s)\n", sampler_type_names.c_str());
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
@@ -1009,6 +1531,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
printf(" --grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)\n");
printf(" --grammar-file FNAME file to read grammar from\n");
+ printf(" -j SCHEMA, --json-schema SCHEMA\n");
+ printf(" JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object.\n");
+ printf(" For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead\n");
printf(" --cfg-negative-prompt PROMPT\n");
printf(" negative prompt to use for guidance. (default: empty)\n");
printf(" --cfg-negative-prompt-file FNAME\n");
@@ -1024,12 +1549,14 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n");
printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow);
printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast);
+ printf(" --pooling {none,mean,cls}\n");
+ printf(" pooling type for embeddings, use model default if unspecified\n");
printf(" -dt N, --defrag-thold N\n");
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
- printf(" --no-penalize-nl do not penalize newline token\n");
+ printf(" --penalize-nl penalize newline tokens\n");
printf(" --temp N temperature (default: %.1f)\n", (double)sparams.temp);
- printf(" --logits-all return logits for all tokens in the batch (default: disabled)\n");
+ printf(" --all-logits return logits for all tokens in the batch (default: disabled)\n");
printf(" --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n");
printf(" --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
printf(" --winogrande compute Winogrande score over random tasks from datafile supplied with -f\n");
@@ -1042,11 +1569,11 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
printf(" -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel);
printf(" -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences);
- printf(" -pa N, --p-accept N speculative decoding accept probability (default: %.1f)\n", (double)params.p_accept);
printf(" -ps N, --p-split N speculative decoding split probability (default: %.1f)\n", (double)params.p_split);
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
+ printf(" -fa, --flash-attn enable Flash Attention (default: %s)\n", params.flash_attn ? "enabled" : "disabled");
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA. see examples/llava/README.md\n");
- printf(" --image IMAGE_FILE path to an image file. use with multimodal models\n");
+ printf(" --image IMAGE_FILE path to an image file. use with multimodal models. Specify multiple times for batching\n");
if (llama_supports_mlock()) {
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
}
@@ -1059,7 +1586,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" - numactl: use the CPU map provided by numactl\n");
printf(" if run without this previously, it is recommended to drop the system page cache before using this\n");
printf(" see https://github.com/ggerganov/llama.cpp/issues/1437\n");
- if (llama_supports_gpu_offload()) {
+ // if (llama_supports_gpu_offload()) {
printf(" -ngl N, --n-gpu-layers N\n");
printf(" number of layers to store in VRAM\n");
printf(" -ngld N, --n-gpu-layers-draft N\n");
@@ -1073,7 +1600,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n");
printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n");
printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu);
- }
+ // }
printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false");
printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false");
printf(" -gan N, --grp-attn-n N\n");
@@ -1092,17 +1619,34 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
printf(" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n");
printf(" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
+ printf(" --control-vector FNAME\n");
+ printf(" add a control vector\n");
+ printf(" --control-vector-scaled FNAME S\n");
+ printf(" add a control vector with user defined scaling S\n");
+ printf(" --control-vector-layer-range START END\n");
+ printf(" layer range to apply the control vector(s) to, start and end inclusive\n");
printf(" -m FNAME, --model FNAME\n");
- printf(" model path (default: %s)\n", params.model.c_str());
+ printf(" model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH);
printf(" -md FNAME, --model-draft FNAME\n");
- printf(" draft model for speculative decoding\n");
+ printf(" draft model for speculative decoding (default: unused)\n");
+ printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
+ printf(" model download url (default: unused)\n");
+ printf(" -hfr REPO, --hf-repo REPO\n");
+ printf(" Hugging Face model repository (default: unused)\n");
+ printf(" -hff FILE, --hf-file FILE\n");
+ printf(" Hugging Face model file (default: unused)\n");
printf(" -ld LOGDIR, --logdir LOGDIR\n");
printf(" path under which to save YAML logs (no logging if unset)\n");
+ printf(" -lcs FNAME, --lookup-cache-static FNAME\n");
+ printf(" path to static lookup cache to use for lookup decoding (not updated by generation)\n");
+ printf(" -lcd FNAME, --lookup-cache-dynamic FNAME\n");
+ printf(" path to dynamic lookup cache to use for lookup decoding (updated by generation)\n");
printf(" --override-kv KEY=TYPE:VALUE\n");
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
- printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
+ printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
printf(" -ptc N, --print-token-count N\n");
printf(" print token count every N tokens (default: %d)\n", params.n_print);
+ printf(" --check-tensors check model tensor data for invalid values\n");
printf("\n");
#ifndef LOG_DISABLE_LOGS
log_print_usage();
@@ -1139,6 +1683,77 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
GGML_UNREACHABLE();
}
+// Validate if a filename is safe to use
+// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
+bool validate_file_name(const std::string & filename) {
+ if (!filename.length()) {
+ // Empty filename invalid
+ return false;
+ }
+ if (filename.length() > 255) {
+ // Limit at common largest possible filename on Linux filesystems
+ // to avoid unnecessary further validation
+ // (On systems with smaller limits it will be caught by the OS)
+ return false;
+ }
+
+ std::u32string filename_utf32;
+ try {
+ std::wstring_convert, char32_t> converter;
+ filename_utf32 = converter.from_bytes(filename);
+
+ // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used,
+ // or invalid encodings were encountered. Reject such attempts
+ std::string filename_reencoded = converter.to_bytes(filename_utf32);
+ if (filename_reencoded != filename) {
+ return false;
+ }
+ } catch (const std::exception &) {
+ return false;
+ }
+
+ // Check for forbidden codepoints:
+ // - Control characters
+ // - Unicode equivalents of illegal characters
+ // - UTF-16 surrogate pairs
+ // - UTF-8 replacement character
+ // - Byte order mark (BOM)
+ // - Illegal characters: / \ : * ? " < > |
+ for (char32_t c : filename_utf32) {
+ if (c <= 0x1F // Control characters (C0)
+ || c == 0x7F // Control characters (DEL)
+ || (c >= 0x80 && c <= 0x9F) // Control characters (C1)
+ || c == 0xFF0E // Fullwidth Full Stop (period equivalent)
+ || c == 0x2215 // Division Slash (forward slash equivalent)
+ || c == 0x2216 // Set Minus (backslash equivalent)
+ || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
+ || c == 0xFFFD // Replacement Character (UTF-8)
+ || c == 0xFEFF // Byte Order Mark (BOM)
+ || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
+ || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
+ return false;
+ }
+ }
+
+ // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
+ // Unicode and other whitespace is not affected, only 0x20 space
+ if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') {
+ return false;
+ }
+
+ // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead)
+ if (filename.find("..") != std::string::npos) {
+ return false;
+ }
+
+ // Reject "."
+ if (filename == ".") {
+ return false;
+ }
+
+ return true;
+}
+
//
// String utils
//
@@ -1156,6 +1771,18 @@ std::vector string_split(std::string input, char separator) {
return parts;
}
+std::string string_strip(const std::string & str) {
+ size_t start = 0;
+ size_t end = str.size();
+ while (start < end && std::isspace(str[start])) {
+ start++;
+ }
+ while (end > start && std::isspace(str[end - 1])) {
+ end--;
+ }
+ return str.substr(start, end - start);
+}
+
std::vector sampler_types_from_names(const std::vector & names, bool allow_alt_names) {
std::unordered_map sampler_canonical_name_map {
{"top_k", llama_sampler_type::TOP_K},
@@ -1247,11 +1874,13 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
if (params.n_gpu_layers != -1) {
mparams.n_gpu_layers = params.n_gpu_layers;
}
+ mparams.rpc_servers = params.rpc_servers.c_str();
mparams.main_gpu = params.main_gpu;
mparams.split_mode = params.split_mode;
mparams.tensor_split = params.tensor_split;
mparams.use_mmap = params.use_mmap;
mparams.use_mlock = params.use_mlock;
+ mparams.check_tensors = params.check_tensors;
if (params.kv_overrides.empty()) {
mparams.kv_overrides = NULL;
} else {
@@ -1278,6 +1907,9 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
if (s == "q4_1") {
return GGML_TYPE_Q4_1;
}
+ if (s == "iq4_nl") {
+ return GGML_TYPE_IQ4_NL;
+ }
if (s == "q5_0") {
return GGML_TYPE_Q5_0;
}
@@ -1292,13 +1924,14 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
auto cparams = llama_context_default_params();
cparams.n_ctx = params.n_ctx;
+ cparams.n_seq_max = params.n_parallel;
cparams.n_batch = params.n_batch;
+ cparams.n_ubatch = params.n_ubatch;
cparams.n_threads = params.n_threads;
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
- cparams.mul_mat_q = params.mul_mat_q;
cparams.seed = params.seed;
cparams.logits_all = params.logits_all;
- cparams.embedding = params.embedding;
+ cparams.embeddings = params.embedding;
cparams.rope_scaling_type = params.rope_scaling_type;
cparams.rope_freq_base = params.rope_freq_base;
cparams.rope_freq_scale = params.rope_freq_scale;
@@ -1307,8 +1940,12 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
cparams.yarn_beta_fast = params.yarn_beta_fast;
cparams.yarn_beta_slow = params.yarn_beta_slow;
cparams.yarn_orig_ctx = params.yarn_orig_ctx;
+ cparams.pooling_type = params.pooling_type;
cparams.defrag_thold = params.defrag_thold;
+ cparams.cb_eval = params.cb_eval;
+ cparams.cb_eval_user_data = params.cb_eval_user_data;
cparams.offload_kqv = !params.no_kv_offload;
+ cparams.flash_attn = params.flash_attn;
cparams.type_k = kv_cache_type_from_str(params.cache_type_k);
cparams.type_v = kv_cache_type_from_str(params.cache_type_v);
@@ -1337,10 +1974,349 @@ void llama_batch_add(
batch.n_tokens++;
}
+#ifdef LLAMA_USE_CURL
+
+static bool starts_with(const std::string & str, const std::string & prefix) {
+ // While we wait for C++20's std::string::starts_with...
+ return str.rfind(prefix, 0) == 0;
+}
+
+static bool llama_download_file(const std::string & url, const std::string & path) {
+
+ // Initialize libcurl
+ std::unique_ptr curl(curl_easy_init(), &curl_easy_cleanup);
+ if (!curl) {
+ fprintf(stderr, "%s: error initializing libcurl\n", __func__);
+ return false;
+ }
+
+ bool force_download = false;
+
+ // Set the URL, allow to follow http redirection
+ curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+ curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
+
+#if defined(_WIN32)
+ // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
+ // operating system. Currently implemented under MS-Windows.
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
+#endif
+
+ // Check if the file already exists locally
+ struct stat model_file_info;
+ auto file_exists = (stat(path.c_str(), &model_file_info) == 0);
+
+ // If the file exists, check its JSON metadata companion file.
+ std::string metadata_path = path + ".json";
+ nlohmann::json metadata;
+ std::string etag;
+ std::string last_modified;
+
+ if (file_exists) {
+ // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
+ std::ifstream metadata_in(metadata_path);
+ if (metadata_in.good()) {
+ try {
+ metadata_in >> metadata;
+ fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
+ if (metadata.contains("url") && metadata.at("url").is_string()) {
+ auto previous_url = metadata.at("url").get();
+ if (previous_url != url) {
+ fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
+ return false;
+ }
+ }
+ if (metadata.contains("etag") && metadata.at("etag").is_string()) {
+ etag = metadata.at("etag");
+ }
+ if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
+ last_modified = metadata.at("lastModified");
+ }
+ } catch (const nlohmann::json::exception & e) {
+ fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
+ return false;
+ }
+ }
+ } else {
+ fprintf(stderr, "%s: no previous model file found %s\n", __func__, path.c_str());
+ }
+
+ // Send a HEAD request to retrieve the etag and last-modified headers
+ struct llama_load_model_from_url_headers {
+ std::string etag;
+ std::string last_modified;
+ };
+ llama_load_model_from_url_headers headers;
+ {
+ typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
+ auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
+ llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
+
+ static std::regex header_regex("([^:]+): (.*)\r\n");
+ static std::regex etag_regex("ETag", std::regex_constants::icase);
+ static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
+
+ std::string header(buffer, n_items);
+ std::smatch match;
+ if (std::regex_match(header, match, header_regex)) {
+ const std::string & key = match[1];
+ const std::string & value = match[2];
+ if (std::regex_match(key, match, etag_regex)) {
+ headers->etag = value;
+ } else if (std::regex_match(key, match, last_modified_regex)) {
+ headers->last_modified = value;
+ }
+ }
+ return n_items;
+ };
+
+ curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
+ curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback));
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
+
+ CURLcode res = curl_easy_perform(curl.get());
+ if (res != CURLE_OK) {
+ fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
+ return false;
+ }
+
+ long http_code = 0;
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
+ if (http_code != 200) {
+ // HEAD not supported, we don't know if the file has changed
+ // force trigger downloading
+ force_download = true;
+ fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
+ }
+ }
+
+ bool should_download = !file_exists || force_download;
+ if (!should_download) {
+ if (!etag.empty() && etag != headers.etag) {
+ fprintf(stderr, "%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
+ should_download = true;
+ } else if (!last_modified.empty() && last_modified != headers.last_modified) {
+ fprintf(stderr, "%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
+ should_download = true;
+ }
+ }
+ if (should_download) {
+ std::string path_temporary = path + ".downloadInProgress";
+ if (file_exists) {
+ fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
+ if (remove(path.c_str()) != 0) {
+ fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path.c_str());
+ return false;
+ }
+ }
+
+ // Set the output file
+ std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb"), fclose);
+ if (!outfile) {
+ fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
+ return false;
+ }
+
+ typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
+ auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
+ return fwrite(data, size, nmemb, (FILE *)fd);
+ };
+ curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback));
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
+
+ // display download progress
+ curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
+
+ // helper function to hide password in URL
+ auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
+ std::size_t protocol_pos = url.find("://");
+ if (protocol_pos == std::string::npos) {
+ return url; // Malformed URL
+ }
+
+ std::size_t at_pos = url.find('@', protocol_pos + 3);
+ if (at_pos == std::string::npos) {
+ return url; // No password in URL
+ }
+
+ return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
+ };
+
+ // start the download
+ fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
+ llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
+ auto res = curl_easy_perform(curl.get());
+ if (res != CURLE_OK) {
+ fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
+ return false;
+ }
+
+ long http_code = 0;
+ curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
+ if (http_code < 200 || http_code >= 400) {
+ fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
+ return false;
+ }
+
+ // Causes file to be closed explicitly here before we rename it.
+ outfile.reset();
+
+ // Write the updated JSON metadata file.
+ metadata.update({
+ {"url", url},
+ {"etag", headers.etag},
+ {"lastModified", headers.last_modified}
+ });
+ std::ofstream(metadata_path) << metadata.dump(4);
+ fprintf(stderr, "%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
+
+ if (rename(path_temporary.c_str(), path.c_str()) != 0) {
+ fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
+ return false;
+ }
+ }
+
+ return true;
+}
+
+struct llama_model * llama_load_model_from_url(
+ const char * model_url,
+ const char * path_model,
+ const struct llama_model_params & params) {
+ // Basic validation of the model_url
+ if (!model_url || strlen(model_url) == 0) {
+ fprintf(stderr, "%s: invalid model_url\n", __func__);
+ return NULL;
+ }
+
+ if (!llama_download_file(model_url, path_model)) {
+ return NULL;
+ }
+
+ // check for additional GGUFs split to download
+ int n_split = 0;
+ {
+ struct gguf_init_params gguf_params = {
+ /*.no_alloc = */ true,
+ /*.ctx = */ NULL,
+ };
+ auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
+ if (!ctx_gguf) {
+ fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model);
+ return NULL;
+ }
+
+ auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
+ if (key_n_split >= 0) {
+ n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
+ }
+
+ gguf_free(ctx_gguf);
+ }
+
+ if (n_split > 1) {
+ char split_prefix[PATH_MAX] = {0};
+ char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0};
+
+ // Verify the first split file format
+ // and extract split URL and PATH prefixes
+ {
+ if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
+ fprintf(stderr, "\n%s: unexpected model file name: %s"
+ " n_split=%d\n", __func__, path_model, n_split);
+ return NULL;
+ }
+
+ if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
+ fprintf(stderr, "\n%s: unexpected model url: %s"
+ " n_split=%d\n", __func__, model_url, n_split);
+ return NULL;
+ }
+ }
+
+ // Prepare download in parallel
+ std::vector> futures_download;
+ for (int idx = 1; idx < n_split; idx++) {
+ futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool {
+ char split_path[PATH_MAX] = {0};
+ llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split);
+
+ char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
+ llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split);
+
+ return llama_download_file(split_url, split_path);
+ }, idx));
+ }
+
+ // Wait for all downloads to complete
+ for (auto & f : futures_download) {
+ if (!f.get()) {
+ return NULL;
+ }
+ }
+ }
+
+ return llama_load_model_from_file(path_model, params);
+}
+
+struct llama_model * llama_load_model_from_hf(
+ const char * repo,
+ const char * model,
+ const char * path_model,
+ const struct llama_model_params & params) {
+ // construct hugging face model url:
+ //
+ // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf
+ // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf
+ //
+ // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf
+ // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf
+ //
+
+ std::string model_url = "https://huggingface.co/";
+ model_url += repo;
+ model_url += "/resolve/main/";
+ model_url += model;
+
+ return llama_load_model_from_url(model_url.c_str(), path_model, params);
+}
+
+#else
+
+struct llama_model * llama_load_model_from_url(
+ const char * /*model_url*/,
+ const char * /*path_model*/,
+ const struct llama_model_params & /*params*/) {
+ fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
+ return nullptr;
+}
+
+struct llama_model * llama_load_model_from_hf(
+ const char * /*repo*/,
+ const char * /*model*/,
+ const char * /*path_model*/,
+ const struct llama_model_params & /*params*/) {
+ fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
+ return nullptr;
+}
+
+#endif // LLAMA_USE_CURL
+
std::tuple llama_init_from_gpt_params(gpt_params & params) {
auto mparams = llama_model_params_from_gpt_params(params);
- llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
+ llama_model * model = nullptr;
+
+ if (!params.hf_repo.empty() && !params.hf_file.empty()) {
+ model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), mparams);
+ } else if (!params.model_url.empty()) {
+ model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams);
+ } else {
+ model = llama_load_model_from_file(params.model.c_str(), mparams);
+ }
+
if (model == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
return std::make_tuple(nullptr, nullptr);
@@ -1355,8 +2331,32 @@ std::tuple llama_init_from_gpt_par
return std::make_tuple(nullptr, nullptr);
}
+ if (!params.control_vectors.empty()) {
+ if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
+ if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_n_layer(model);
+
+ const auto cvec = llama_control_vector_load(params.control_vectors);
+ if (cvec.n_embd == -1) {
+ llama_free(lctx);
+ llama_free_model(model);
+ return std::make_tuple(nullptr, nullptr);
+ }
+
+ int err = llama_control_vector_apply(lctx,
+ cvec.data.data(),
+ cvec.data.size(),
+ cvec.n_embd,
+ params.control_vector_layer_start,
+ params.control_vector_layer_end);
+ if (err) {
+ llama_free(lctx);
+ llama_free_model(model);
+ return std::make_tuple(nullptr, nullptr);
+ }
+ }
+
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
- const std::string& lora_adapter = std::get<0>(params.lora_adapter[i]);
+ const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
float lora_scale = std::get<1>(params.lora_adapter[i]);
int err = llama_model_apply_lora_from_file(model,
lora_adapter.c_str(),
@@ -1377,12 +2377,13 @@ std::tuple llama_init_from_gpt_par
params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
}
- {
+ if (params.warmup) {
LOG("warming up the model with an empty run\n");
std::vector tmp = { llama_token_bos(model), llama_token_eos(model), };
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
llama_kv_cache_clear(lctx);
+ llama_synchronize(lctx);
llama_reset_timings(lctx);
}
@@ -1396,23 +2397,23 @@ std::tuple llama_init_from_gpt_par
std::vector llama_tokenize(
const struct llama_context * ctx,
const std::string & text,
- bool add_bos,
- bool special) {
- return llama_tokenize(llama_get_model(ctx), text, add_bos, special);
+ bool add_special,
+ bool parse_special) {
+ return llama_tokenize(llama_get_model(ctx), text, add_special, parse_special);
}
std::vector llama_tokenize(
const struct llama_model * model,
const std::string & text,
- bool add_bos,
- bool special) {
+ bool add_special,
+ bool parse_special) {
// upper limit for the number of tokens
- int n_tokens = text.length() + add_bos;
+ int n_tokens = text.length() + 2 * add_special;
std::vector result(n_tokens);
- n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_bos, special);
+ n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
if (n_tokens < 0) {
result.resize(-n_tokens);
- int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_bos, special);
+ int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
@@ -1420,12 +2421,12 @@ std::vector llama_tokenize(
return result;
}
-std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
std::vector result(8, 0);
- const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+ const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
if (n_tokens < 0) {
result.resize(-n_tokens);
- int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+ int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
@@ -1550,6 +2551,31 @@ bool create_directory_with_parents(const std::string & path) {
#endif // _WIN32
}
+std::string get_cache_directory() {
+ std::string cache_directory = "";
+ if (getenv("LLAMA_CACHE")) {
+ cache_directory = std::getenv("LLAMA_CACHE");
+ if (cache_directory.back() != DIRECTORY_SEPARATOR) {
+ cache_directory += DIRECTORY_SEPARATOR;
+ }
+ } else {
+ if (IsLinux()) { // [jart]
+ if (std::getenv("XDG_CACHE_HOME")) {
+ cache_directory = std::getenv("XDG_CACHE_HOME");
+ } else {
+ cache_directory = std::getenv("HOME") + std::string("/.cache/");
+ }
+ } else if (IsXnu()) {
+ cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
+ } else if (IsWindows()) {
+ cache_directory = std::getenv("APPDATA");
+ }
+ cache_directory += "llama.cpp";
+ cache_directory += DIRECTORY_SEPARATOR;
+ }
+ return cache_directory;
+}
+
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector & data) {
if (data.empty()) {
fprintf(stream, "%s:\n", prop_name);
@@ -1587,7 +2613,7 @@ void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const cha
size_t pos_start = 0;
size_t pos_found = 0;
- if (!data_str.empty() && (std::isspace(data_str[0]) || std::isspace(data_str.back()))) {
+ if (std::isspace(data_str[0]) || std::isspace(data_str.back())) {
data_str = std::regex_replace(data_str, std::regex("\n"), "\\n");
data_str = std::regex_replace(data_str, std::regex("\""), "\\\"");
data_str = std::regex_replace(data_str, std::regex(R"(\\[^n"])"), R"(\$&)");
@@ -1637,7 +2663,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false");
fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false");
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
- fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
+ fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false");
fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false");
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
@@ -1699,6 +2725,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
dump_string_yaml_multiline(stream, "in_suffix", params.input_prefix.c_str());
fprintf(stream, "instruct: %s # default: false\n", params.instruct ? "true" : "false");
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
+ fprintf(stream, "interactive_specials: %s # default: false\n", params.interactive_specials ? "true" : "false");
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);
fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str());
@@ -1732,15 +2759,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
fprintf(stream, "mirostat_ent: %f # default: 5.0\n", sparams.mirostat_tau);
fprintf(stream, "mirostat_lr: %f # default: 0.1\n", sparams.mirostat_eta);
fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false");
- fprintf(stream, "model: %s # default: models/7B/ggml-model.bin\n", params.model.c_str());
+ fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH);
fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str());
fprintf(stream, "multiline_input: %s # default: false\n", params.multiline_input ? "true" : "false");
fprintf(stream, "n_gpu_layers: %d # default: -1\n", params.n_gpu_layers);
fprintf(stream, "n_predict: %d # default: -1 (unlimited)\n", params.n_predict);
fprintf(stream, "n_probs: %d # only used by server binary, default: 0\n", sparams.n_probs);
fprintf(stream, "no_mmap: %s # default: false\n", !params.use_mmap ? "true" : "false");
- fprintf(stream, "no_mul_mat_q: %s # default: false\n", !params.mul_mat_q ? "true" : "false");
- fprintf(stream, "no_penalize_nl: %s # default: false\n", !sparams.penalize_nl ? "true" : "false");
+ fprintf(stream, "penalize_nl: %s # default: false\n", sparams.penalize_nl ? "true" : "false");
fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type);
fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride);
fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.penalty_present);
@@ -1768,6 +2794,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
fprintf(stream, "seed: %u # default: -1 (random seed)\n", params.seed);
fprintf(stream, "simple_io: %s # default: false\n", params.simple_io ? "true" : "false");
fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false");
+ fprintf(stream, "flash_attn: %s # default: false\n", params.flash_attn ? "true" : "false");
fprintf(stream, "temp: %f # default: 0.8\n", sparams.temp);
const std::vector tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices());
@@ -1791,17 +2818,17 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) {
static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
- view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
+ view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
llama_kv_cache_view_cell * c_curr = view.cells;
llama_seq_id * cs_curr = view.cells_sequences;
- for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) {
+ for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
if (i % row_size == 0) {
printf("\n%5d: ", i);
}
int seq_count = 0;
- for (int j = 0; j < view.n_max_seq; j++) {
+ for (int j = 0; j < view.n_seq_max; j++) {
if (cs_curr[j] >= 0) { seq_count++; }
}
putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
@@ -1814,14 +2841,14 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
- view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
+ view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
std::unordered_map seqs;
llama_kv_cache_view_cell * c_curr = view.cells;
llama_seq_id * cs_curr = view.cells_sequences;
- for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) {
- for (int j = 0; j < view.n_max_seq; j++) {
+ for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
+ for (int j = 0; j < view.n_seq_max; j++) {
if (cs_curr[j] < 0) { continue; }
if (seqs.find(cs_curr[j]) == seqs.end()) {
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
@@ -1840,11 +2867,11 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
c_curr = view.cells;
cs_curr = view.cells_sequences;
- for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) {
+ for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
if (i % row_size == 0) {
printf("\n%5d: ", i);
}
- for (int j = 0; j < view.n_max_seq; j++) {
+ for (int j = 0; j < view.n_seq_max; j++) {
if (cs_curr[j] >= 0) {
const auto & it = seqs.find(cs_curr[j]);
putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
@@ -1857,3 +2884,207 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
printf("\n=== Done dumping\n");
}
+
+void llama_embd_normalize(const float * inp, float * out, int n) {
+ double sum = 0.0;
+ for (int i = 0; i < n; i++) {
+ sum += inp[i] * inp[i];
+ }
+ sum = sqrt(sum);
+
+ const float norm = sum > 0.0 ? 1.0f / sum : 0.0f;
+
+ for (int i = 0; i < n; i++) {
+ out[i] = inp[i] * norm;
+ }
+}
+
+float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){
+ double sum = 0.0;
+ double sum1 = 0.0;
+ double sum2 = 0.0;
+
+ for (int i = 0; i < n; i++) {
+ sum += embd1[i] * embd2[i];
+ sum1 += embd1[i] * embd1[i];
+ sum2 += embd2[i] * embd2[i];
+ }
+
+ return sum / (sqrt(sum1) * sqrt(sum2));
+}
+
+//
+// Control vector utils
+//
+
+static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
+ int32_t n_tensors;
+
+ size_t n_bytes = 0;
+
+ uint32_t max_direction_layer = 0;
+
+ llama_control_vector_data result = { -1, {} };
+
+ // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
+ {
+ struct ggml_init_params meta_params = {
+ /* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(),
+ /* .mem_buffer = */ nullptr,
+ /* .no_alloc = */ true,
+ };
+ ggml_context * meta_ctx = ggml_init(meta_params);
+ struct gguf_init_params meta_gguf_params = {
+ /* .no_alloc = */ true,
+ /* .ctx = */ &meta_ctx,
+ };
+ struct llamafile * file = llamafile_open_gguf(load_info.fname.c_str(), "rb");
+ if (!file) {
+ perror(load_info.fname.c_str());
+ ggml_free(meta_ctx);
+ return result;
+ }
+ struct gguf_context * meta_ctx_gguf = gguf_init_from_file(file, meta_gguf_params);
+ if (!meta_ctx_gguf) {
+ fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
+ llamafile_close(file);
+ ggml_free(meta_ctx);
+ return result;
+ }
+
+ n_tensors = gguf_get_n_tensors(meta_ctx_gguf);
+ for (int i = 0; i < n_tensors; i++) {
+ std::string name = gguf_get_tensor_name(meta_ctx_gguf, i);
+
+ // split on '.'
+ size_t dotpos = name.find('.');
+ if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
+ try {
+ uint32_t layer = std::stoi(name.substr(dotpos + 1));
+ if (layer == 0) {
+ fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
+ ggml_free(meta_ctx);
+ gguf_free(meta_ctx_gguf);
+ llamafile_close(file);
+ return result;
+ }
+ if (layer > max_direction_layer) {
+ max_direction_layer = layer;
+ }
+ } catch (...) {
+ fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
+ ggml_free(meta_ctx);
+ gguf_free(meta_ctx_gguf);
+ llamafile_close(file);
+ return result;
+ }
+ }
+
+ struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str());
+ if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) {
+ fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
+ ggml_free(meta_ctx);
+ gguf_free(meta_ctx_gguf);
+ llamafile_close(file);
+ return result;
+ }
+ if (result.n_embd == -1) {
+ result.n_embd = ggml_nelements(tensor_meta);
+ } else if (ggml_nelements(tensor_meta) != result.n_embd) {
+ fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, load_info.fname.c_str());
+ ggml_free(meta_ctx);
+ gguf_free(meta_ctx_gguf);
+ llamafile_close(file);
+ return result;
+ }
+ n_bytes += ggml_nbytes(tensor_meta);
+ }
+ ggml_free(meta_ctx);
+ gguf_free(meta_ctx_gguf);
+ llamafile_close(file);
+ }
+
+ if (n_tensors == 0) {
+ fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
+ return result;
+ }
+
+ // load and scale tensors into final control vector context
+ struct ggml_init_params ggml_params = {
+ /* .mem_size = */ ggml_tensor_overhead() * n_tensors + n_bytes,
+ /* .mem_buffer = */ nullptr,
+ /* .no_alloc = */ false,
+ };
+ struct ggml_context * ctx = ggml_init(ggml_params);
+
+ struct gguf_init_params params = {
+ /*.no_alloc = */ false,
+ /*.ctx = */ &ctx,
+ };
+ struct llamafile * file = llamafile_open_gguf(load_info.fname.c_str(), "rb");
+ if (!file) {
+ perror(load_info.fname.c_str());
+ ggml_free(ctx);
+ return result;
+ }
+ struct gguf_context * ctx_gguf = gguf_init_from_file(file, params);
+ if (!ctx_gguf) {
+ fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
+ ggml_free(ctx);
+ llamafile_close(file);
+ return result;
+ }
+
+ // do not store data for layer 0 (it's not used)
+ result.data.resize(result.n_embd * max_direction_layer);
+
+ for (uint32_t il = 1; il <= max_direction_layer; il++) {
+ const std::string name = "direction." + std::to_string(il);
+ const ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
+
+ float * dst = result.data.data() + result.n_embd * (il - 1);
+
+ if (tensor) {
+ const float * src = (const float *) tensor->data;
+ for (int j = 0; j < result.n_embd; j++) {
+ dst[j] = src[j] * load_info.strength;
+ }
+ } else {
+ for (int j = 0; j < result.n_embd; j++) {
+ dst[j] = 0.0f;
+ }
+ }
+ }
+
+ return result;
+}
+
+llama_control_vector_data llama_control_vector_load(const std::vector & load_infos) {
+ llama_control_vector_data result = { -1, {} };
+
+ for (const auto & info : load_infos) {
+ auto cur = llama_control_vector_load_one(info);
+
+ if (cur.n_embd == -1) {
+ return result;
+ }
+ if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
+ fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
+ return result;
+ }
+
+ if (result.n_embd == -1) {
+ result = std::move(cur);
+ } else {
+ for (size_t i = 0; i < cur.data.size(); i++) {
+ result.data[i] += cur.data[i];
+ }
+ }
+ }
+
+ if (result.n_embd == -1) {
+ fprintf(stderr, "%s: no vectors passed\n", __func__);
+ }
+
+ return result;
+}
diff --git a/llama.cpp/common.h b/llama.cpp/common.h
index c3eb91a5d3..1fdbc7d393 100644
--- a/llama.cpp/common.h
+++ b/llama.cpp/common.h
@@ -1,14 +1,16 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-
-// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
+// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
// Various helper functions and utilities
#pragma once
+#include "llamafile/log.h"
#include "llama.h"
#include "sampling.h"
#include "llamafile/version.h"
+#include "llamafile/llamafile.h"
#define LOG_NO_FILE_LINE_FUNCTION
#include "log.h"
@@ -34,33 +36,39 @@
tinylog(__func__, ": llamafile version " LLAMAFILE_VERSION_STRING "\n", NULL); \
} while(0)
+#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
+
// build info
extern int LLAMA_BUILD_NUMBER;
extern char const *LLAMA_COMMIT;
extern char const *LLAMA_COMPILER;
extern char const *LLAMA_BUILD_TARGET;
+struct llama_control_vector_load_info;
+
+int cpu_get_num_math();
+int32_t get_num_physical_cores();
+
//
// CLI argument parsing
//
-int32_t get_num_physical_cores();
struct gpt_params {
- uint32_t seed = -1; // RNG seed
+ uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
- int32_t n_threads = get_num_physical_cores();
+ int32_t n_threads = cpu_get_num_math();
int32_t n_threads_draft = -1;
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
int32_t n_threads_batch_draft = -1;
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 512; // context size
- int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
+ int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
+ int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
- int32_t n_draft = 8; // number of tokens to draft during speculative decoding
+ int32_t n_draft = 5; // number of tokens to draft during speculative decoding
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
int32_t n_parallel = 1; // number of parallel sequences to decode
int32_t n_sequences = 1; // number of sequences to decode
- float p_accept = 0.5f; // speculative decoding accept probability
float p_split = 0.1f; // speculative decoding split probability
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
@@ -79,23 +87,35 @@ struct gpt_params {
float yarn_beta_slow = 1.0f; // YaRN high correction dim
int32_t yarn_orig_ctx = 0; // YaRN original context length
float defrag_thold = -1.0f; // KV cache defragmentation threshold
- int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
- ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
+ std::string rpc_servers = ""; // comma separated list of RPC servers
+
+ ggml_backend_sched_eval_callback cb_eval = nullptr;
+ void * cb_eval_user_data = nullptr;
+
+ ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
+
+ enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
+ enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
// // sampling parameters
struct llama_sampling_params sparams;
- std::string model = "models/7B/ggml-model-f16.gguf"; // model path
- std::string model_draft = ""; // draft model for speculative decoding
- std::string model_alias = "unknown"; // model alias
- std::string prompt = "";
- std::string prompt_file = ""; // store the external prompt file name
- std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
- std::string input_prefix = ""; // string to prefix user inputs with
- std::string input_suffix = ""; // string to suffix user inputs with
+ std::string model = ""; // model path
+ std::string model_draft = ""; // draft model for speculative decoding
+ std::string model_alias = "unknown"; // model alias
+ std::string model_url = ""; // model url to download
+ std::string hf_repo = ""; // HF repo
+ std::string hf_file = ""; // HF file
+ std::string prompt = "";
+ std::string prompt_file = ""; // store the external prompt file name
+ std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
+ std::string input_prefix = ""; // string to prefix user inputs with
+ std::string input_suffix = ""; // string to suffix user inputs with
std::vector antiprompt; // string upon seeing which more user input is prompted
- std::string logdir = ""; // directory in which to save YAML log files
- std::string logits_file = ""; // file for saving *all* logits
+ std::string logdir = ""; // directory in which to save YAML log files
+ std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
+ std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
+ std::string logits_file = ""; // file for saving *all* logits
std::vector kv_overrides;
@@ -103,6 +123,11 @@ struct gpt_params {
std::vector> lora_adapter; // lora adapter path with user defined scale
std::string lora_base = ""; // base model path for the lora adapter
+ std::vector control_vectors; // control vector with user defined scale
+
+ int32_t control_vector_layer_start = -1; // layer range for control vector
+ int32_t control_vector_layer_end = -1; // layer range for control vector
+
int ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
int ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
// (which is more convenient to use for plotting)
@@ -116,12 +141,13 @@ struct gpt_params {
bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt
size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed
- bool kl_divergence = false; // compute KL-divergence
+ bool kl_divergence = false; // compute KL divergence
- bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS
bool random_prompt = false; // do not randomize prompt if none provided
bool use_color = false; // use color to distinguish generations and inputs
bool interactive = false; // interactive mode
+ bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode
+ bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
bool prompt_cache_all = false; // save user input and generations to prompt cache
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
@@ -131,7 +157,8 @@ struct gpt_params {
bool interactive_first = false; // wait for user input immediately
bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
- bool cont_batching = false; // insert new sequences for decoding on-the-fly
+ bool cont_batching = true; // insert new sequences for decoding on-the-fly
+ bool flash_attn = false; // flash attention
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool ignore_eos = false; // ignore generated EOS tokens
@@ -144,27 +171,37 @@ struct gpt_params {
bool infill = false; // use infill mode
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
bool no_kv_offload = false; // disable KV offloading
+ bool warmup = true; // warmup run
+ bool check_tensors = false; // validate tensor data
std::string cache_type_k = "f16"; // KV cache data type for the K
std::string cache_type_v = "f16"; // KV cache data type for the V
// multimodal models (see examples/llava)
- std::string mmproj = ""; // path to multimodal projector
- std::string image = ""; // path to an image file
+ std::string mmproj = ""; // path to multimodal projector
+ std::vector image; // path to image file(s)
};
+void gpt_params_handle_model_default(gpt_params & params);
+
+bool parse_kv_override(const char * data, std::vector & overrides);
+
bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params);
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
+bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
+
std::string get_system_info(const gpt_params & params);
std::string gpt_random_prompt(std::mt19937 & rng);
void process_escapes(std::string& input);
+bool validate_file_name(const std::string & filename);
+
//
// String utils
//
@@ -172,6 +209,7 @@ void process_escapes(std::string& input);
std::vector sampler_types_from_names(const std::vector & names, bool allow_alt_names);
std::vector sampler_types_from_chars(const std::string & names_string);
std::vector string_split(std::string input, char separator);
+std::string string_strip(const std::string & str);
std::string sampler_type_to_name_string(llama_sampler_type sampler_type);
//
@@ -184,6 +222,9 @@ std::tuple llama_init_from_gpt_par
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
+struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
+struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);
+
// Batch utils
void llama_batch_clear(struct llama_batch & batch);
@@ -204,20 +245,21 @@ void llama_batch_add(
std::vector llama_tokenize(
const struct llama_context * ctx,
const std::string & text,
- bool add_bos,
- bool special = false);
+ bool add_special,
+ bool parse_special = false);
std::vector llama_tokenize(
const struct llama_model * model,
const std::string & text,
- bool add_bos,
- bool special = false);
+ bool add_special,
+ bool parse_special = false);
-// tokenizes a token into a piece
+// tokenizes a token into a piece, optionally renders special/control tokens
// should work similar to Python's `tokenizer.id_to_piece`
std::string llama_token_to_piece(
const struct llama_context * ctx,
- llama_token token);
+ llama_token token,
+ bool special = true);
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
// that takes into account the tokenizer type and decides how to handle the leading space
@@ -244,6 +286,7 @@ bool llama_should_add_bos_token(const llama_model * model);
//
bool create_directory_with_parents(const std::string & path);
+std::string get_cache_directory();
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector & data);
void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector & data);
void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);
@@ -262,3 +305,39 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);
// Dump the KV cache view showing individual sequences in each cell (long output).
void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
+
+//
+// Embedding utils
+//
+
+void llama_embd_normalize(const float * inp, float * out, int n);
+
+float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
+
+//
+// Control vector utils
+//
+
+struct llama_control_vector_data {
+ int n_embd;
+
+ // stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
+ std::vector data;
+};
+
+struct llama_control_vector_load_info {
+ float strength;
+
+ std::string fname;
+};
+
+// Load control vectors, scale each by strength, and add them together.
+// On error, returns {-1, empty}
+llama_control_vector_data llama_control_vector_load(const std::vector & load_infos);
+
+//
+// Split utils
+//
+static const char * const LLM_KV_SPLIT_NO = "split.no";
+static const char * const LLM_KV_SPLIT_COUNT = "split.count";
+static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
diff --git a/llama.cpp/console.cpp b/llama.cpp/console.cpp
index 999f8045ed..448bf877fd 100644
--- a/llama.cpp/console.cpp
+++ b/llama.cpp/console.cpp
@@ -1,13 +1,15 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-
-// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
+// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
+
#include "console.h"
+
#include
#include
-
#include
#include
#include
#include
+#include
#include
#include
#include
@@ -30,6 +32,7 @@ namespace console {
static bool advanced_display = false;
static bool simple_io = true;
+static bool should_close_tty = false;
static display_t current_display = reset;
static FILE* out = stdout;
static FILE* tty = nullptr;
@@ -40,19 +43,32 @@ static termios initial_state;
//
void init(bool use_simple_io, bool use_advanced_display) {
- advanced_display = use_advanced_display;
+ should_close_tty = false;
simple_io = use_simple_io;
+ advanced_display = use_advanced_display;
if (!simple_io) {
- struct termios new_termios;
- tcgetattr(STDIN_FILENO, &initial_state);
- new_termios = initial_state;
- new_termios.c_lflag &= ~(ICANON | ECHO);
- new_termios.c_cc[VMIN] = 1;
- new_termios.c_cc[VTIME] = 0;
- tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
tty = fopen("/dev/tty", "w+e");
+ if (tty) {
+ should_close_tty = true;
+ } else if (IsLinux() || IsOpenbsd()) {
+ // this could happen because pledge() blocked us
+ tty = fdopen(0, "w+e");
+ }
if (tty != nullptr) {
- out = tty;
+ if (!tcgetattr(fileno(tty), &initial_state)) {
+ out = tty;
+ struct termios new_termios = initial_state;
+ new_termios.c_lflag &= ~(ICANON | ECHO);
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+ tcsetattr(fileno(tty), TCSANOW, &new_termios);
+ } else {
+ simple_io = true;
+ fclose(tty);
+ tty = 0;
+ }
+ } else {
+ simple_io = true;
}
}
setlocale(LC_ALL, "");
@@ -64,11 +80,14 @@ void cleanup() {
// Restore settings
if (!simple_io) {
if (tty != nullptr) {
- out = stdout;
- fclose(tty);
+ fflush(tty);
+ tcsetattr(fileno(tty), TCSANOW, &initial_state);
+ if (should_close_tty) {
+ fclose(tty);
+ }
tty = nullptr;
+ out = stdout;
}
- tcsetattr(STDIN_FILENO, TCSANOW, &initial_state);
}
}
diff --git a/llama.cpp/ggml-alloc.c b/llama.cpp/ggml-alloc.c
index cbffba8af6..64270d320d 100644
--- a/llama.cpp/ggml-alloc.c
+++ b/llama.cpp/ggml-alloc.c
@@ -11,10 +11,6 @@
#include
#include
-#ifndef NDEBUG
-#define NDEBUG // [jart] delete printf debugging
-#endif
-
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MAX_FREE_BLOCKS 256
@@ -67,7 +63,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
}
}
-// TODO: GGML_PAD ?
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {
assert(alignment && !(alignment & (alignment - 1))); // power of 2
size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment;
@@ -75,25 +70,14 @@ static size_t aligned_offset(const void * buffer, size_t offset, size_t alignmen
}
// tallocr
-struct ggml_tallocr {
- ggml_backend_buffer_t buffer;
- void * base;
- size_t alignment;
- size_t offset;
-};
-
-ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
- ggml_tallocr_t talloc = malloc(sizeof(struct ggml_tallocr));
- if (talloc == NULL) {
- return NULL;
- }
+struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
void * base = ggml_backend_buffer_get_base(buffer);
size_t align = ggml_backend_buffer_get_alignment(buffer);
assert(align && !(align & (align - 1))); // power of 2
- *talloc = (struct ggml_tallocr) {
+ struct ggml_tallocr talloc = (struct ggml_tallocr) {
/*.buffer = */ buffer,
/*.base = */ base,
/*.alignment = */ align,
@@ -102,11 +86,7 @@ ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
return talloc;
}
-void ggml_tallocr_free(ggml_tallocr_t talloc) {
- free(talloc);
-}
-
-void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor) {
+void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
size = GGML_PAD(size, talloc->alignment);
@@ -360,12 +340,16 @@ struct hash_node {
bool allocated;
};
-//
struct tensor_alloc {
size_t offset;
size_t size_max; // 0 = pre-allocated, unused, or view
};
+struct leaf_alloc {
+ int buffer_id;
+ struct tensor_alloc leaf;
+};
+
struct node_alloc {
int buffer_id;
struct tensor_alloc dst;
@@ -384,21 +368,21 @@ struct ggml_gallocr {
struct node_alloc * node_allocs; // [n_nodes]
int n_nodes;
- struct tensor_alloc * leaf_allocs; // [n_leafs]
+ struct leaf_alloc * leaf_allocs; // [n_leafs]
int n_leafs;
};
ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) {
- ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(sizeof(struct ggml_gallocr), 1);
+ ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr));
GGML_ASSERT(galloc != NULL);
- galloc->bufts = calloc(sizeof(ggml_backend_buffer_type_t) * n_bufs, 1);
+ galloc->bufts = calloc(n_bufs, sizeof(ggml_backend_buffer_type_t));
GGML_ASSERT(galloc->bufts != NULL);
- galloc->buffers = calloc(sizeof(ggml_backend_buffer_t) * n_bufs, 1);
+ galloc->buffers = calloc(n_bufs, sizeof(ggml_backend_buffer_t) * n_bufs);
GGML_ASSERT(galloc->buffers != NULL);
- galloc->buf_tallocs = calloc(sizeof(struct ggml_dyn_tallocr *) * n_bufs, 1);
+ galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *));
GGML_ASSERT(galloc->buf_tallocs != NULL);
for (int i = 0; i < n_bufs; i++) {
@@ -549,17 +533,28 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
return node_buffer_ids ? node_buffer_ids[i] : 0;
}
-static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) {
+static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
// clear hash tables
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
+ // allocate leafs
+ // these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
+ for (int i = 0; i < graph->n_leafs; i++) {
+ struct ggml_tensor * leaf = graph->leafs[i];
+ ggml_gallocr_allocate_node(galloc, leaf, get_node_buffer_id(leaf_buffer_ids, i));
+ }
+
// count number of children and views
- // allocate all graph inputs and leafs first to avoid overwriting them
+ // allocate other graph inputs and leafs first to avoid overwriting them
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
- if (ggml_is_view(node)) {
+ // TODO: better way to add external dependencies
+ // GGML_OP_NONE does not appear normally in the graph nodes, but is used by ggml-backend to add dependencies to
+ // control when some tensors are allocated and freed. in this case, the dependencies are in `src`, but the node
+ // itself is never used and should not be considered a dependency
+ if (ggml_is_view(node) && node->op != GGML_OP_NONE) {
struct ggml_tensor * view_src = node->view_src;
ggml_gallocr_hash_get(galloc, view_src)->n_views += 1;
}
@@ -576,26 +571,13 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
ggml_gallocr_hash_get(galloc, src)->n_children += 1;
- // allocate explicit inputs and leafs
- if (src->flags & GGML_TENSOR_FLAG_INPUT || src->op == GGML_OP_NONE) {
+ // allocate explicit inputs
+ if (src->flags & GGML_TENSOR_FLAG_INPUT) {
ggml_gallocr_allocate_node(galloc, src, get_node_buffer_id(node_buffer_ids, i));
}
}
}
- // allocate the remaining leafs that are unused on the graph
- // these are effectively static tensors that the application is not using in the graph, but may still want to allocate for other purposes
- for (int i = 0; i < graph->n_leafs; i++) {
- struct ggml_tensor * leaf = graph->leafs[i];
- struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
-
- if (hn->n_children == 0) {
- assert(!hn->allocated);
- // since buffer ids are only given for nodes, these leafs are always allocated in the first buffer
- ggml_gallocr_allocate_node(galloc, leaf, 0);
- }
- }
-
// allocate tensors
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
@@ -658,7 +640,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
}
}
-bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) {
+bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
size_t hash_size = graph->visited_hash_table.size;
// initialize hash table
@@ -666,8 +648,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
free(galloc->hash_set.keys);
free(galloc->hash_values);
galloc->hash_set.size = hash_size;
- galloc->hash_set.keys = calloc(sizeof(struct ggml_tensor *), hash_size);
- galloc->hash_values = calloc(sizeof(struct hash_node), hash_size);
+ galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
+ galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
GGML_ASSERT(galloc->hash_set.keys != NULL);
GGML_ASSERT(galloc->hash_values != NULL);
} else {
@@ -682,12 +664,12 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
}
// allocate in hash table
- ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids);
+ ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids);
// set the node_allocs from the hash table
if (galloc->n_nodes < graph->n_nodes) {
free(galloc->node_allocs);
- galloc->node_allocs = calloc(sizeof(struct node_alloc), graph->n_nodes);
+ galloc->node_allocs = calloc(graph->n_nodes, sizeof(struct node_alloc));
GGML_ASSERT(galloc->node_allocs != NULL);
}
galloc->n_nodes = graph->n_nodes;
@@ -717,15 +699,21 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
}
if (galloc->n_leafs < graph->n_leafs) {
free(galloc->leaf_allocs);
- galloc->leaf_allocs = calloc(sizeof(struct tensor_alloc), graph->n_leafs);
+ galloc->leaf_allocs = calloc(graph->n_leafs, sizeof(galloc->leaf_allocs[0]));
GGML_ASSERT(galloc->leaf_allocs != NULL);
}
galloc->n_leafs = graph->n_leafs;
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
- galloc->leaf_allocs[i].offset = hn->offset;
- galloc->leaf_allocs[i].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
+ galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
+ if (leaf->view_src || leaf->data) {
+ galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
+ galloc->leaf_allocs[i].leaf.size_max = 0;
+ } else {
+ galloc->leaf_allocs[i].leaf.offset = hn->offset;
+ galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
+ }
}
// reallocate buffers if needed
@@ -733,7 +721,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0;
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
- if (new_size > cur_size) {
+ // even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
+ if (new_size > cur_size || galloc->buffers[i] == NULL) {
#ifndef NDEBUG
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
#endif
@@ -750,30 +739,30 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
}
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
- return ggml_gallocr_reserve_n(galloc, graph, NULL);
+ return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
}
-static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id, struct tensor_alloc * tensor_alloc) {
- assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max);
+static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, int buffer_id, struct tensor_alloc * tensor_alloc) {
+ assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
- if (node->view_src != NULL) {
- if (node->buffer == NULL) {
+ if (tensor->view_src != NULL) {
+ if (tensor->buffer == NULL) {
assert(tensor_alloc->offset == SIZE_MAX);
- if (node->view_src->buffer == NULL) {
+ if (tensor->view_src->buffer == NULL) {
// this tensor was allocated without ggml-backend
return;
}
- ggml_backend_view_init(galloc->buffers[buffer_id], node);
+ ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
}
} else {
- if (node->data == NULL) {
+ if (tensor->data == NULL) {
assert(tensor_alloc->offset != SIZE_MAX);
- assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max);
+ assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
void * addr = (char *)base + tensor_alloc->offset;
- ggml_backend_tensor_alloc(galloc->buffers[buffer_id], node, addr);
+ ggml_backend_tensor_alloc(galloc->buffers[buffer_id], tensor, addr);
} else {
- if (node->buffer == NULL) {
+ if (tensor->buffer == NULL) {
// this tensor was allocated without ggml-backend
return;
}
@@ -849,13 +838,18 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
// reset buffers
for (int i = 0; i < galloc->n_buffers; i++) {
- // zero size buffers are not allocated
if (galloc->buffers[i] != NULL) {
ggml_backend_buffer_reset(galloc->buffers[i]);
}
}
// allocate the graph tensors from the previous assignments
+ // leafs
+ for (int i = 0; i < graph->n_leafs; i++) {
+ struct ggml_tensor * leaf = graph->leafs[i];
+ struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i];
+ ggml_gallocr_init_tensor(galloc, leaf, leaf_alloc->buffer_id, &leaf_alloc->leaf);
+ }
// nodes
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
@@ -869,12 +863,6 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
}
ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst);
}
- // leafs
- for (int i = 0; i < graph->n_leafs; i++) {
- struct ggml_tensor * leaf = graph->leafs[i];
- struct tensor_alloc * leaf_alloc = &galloc->leaf_allocs[i];
- ggml_gallocr_init_tensor(galloc, leaf, 0, leaf_alloc);
- }
return true;
}
@@ -906,12 +894,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
return false;
}
- struct ggml_tallocr * tallocr = ggml_tallocr_new(buffer);
+ struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
if (t->data == NULL) {
if (t->view_src == NULL) {
- ggml_tallocr_alloc(tallocr, t);
+ ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
ggml_backend_view_init(buffer, t);
}
@@ -923,8 +911,6 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
}
}
- ggml_tallocr_free(tallocr);
-
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer;
diff --git a/llama.cpp/ggml-alloc.h b/llama.cpp/ggml-alloc.h
index 1d9085d15f..434c13b34a 100644
--- a/llama.cpp/ggml-alloc.h
+++ b/llama.cpp/ggml-alloc.h
@@ -11,11 +11,15 @@ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend * ggml_backend_t;
// Tensor allocator
-typedef struct ggml_tallocr * ggml_tallocr_t;
+struct ggml_tallocr {
+ ggml_backend_buffer_t buffer;
+ void * base;
+ size_t alignment;
+ size_t offset;
+};
-GGML_API ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer);
-GGML_API void ggml_tallocr_free(ggml_tallocr_t talloc);
-GGML_API void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor);
+GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
+GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
// Graph allocator
/*
@@ -50,7 +54,11 @@ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
// returns false if the buffer allocation failed
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
-GGML_API bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids);
+GGML_API bool ggml_gallocr_reserve_n(
+ ggml_gallocr_t galloc,
+ struct ggml_cgraph * graph,
+ const int * node_buffer_ids,
+ const int * leaf_buffer_ids);
// automatic reallocation if the topology changes when using a single buffer
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
diff --git a/llama.cpp/ggml-backend-impl.h b/llama.cpp/ggml-backend-impl.h
index 0e5b68503a..f02379c728 100644
--- a/llama.cpp/ggml-backend-impl.h
+++ b/llama.cpp/ggml-backend-impl.h
@@ -88,29 +88,48 @@ extern "C" {
// (optional) asynchronous tensor data access
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
- bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * src, struct ggml_tensor * dst);
+ bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
// (optional) complete all pending operations
void (*GGML_CALL synchronize)(ggml_backend_t backend);
- // compute graph with a plan
+ // compute graph with a plan (not used currently)
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
- void (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+ // compute graph with a plan
+ enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
// compute graph without a plan (async)
- bool (*GGML_CALL graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
// check if the backend supports an operation
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
+
+ // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
+ // these should be expensive operations with large batch sizes that may benefit from running on this backend
+ // even if the weight has to be copied from the CPU temporarily
+ bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
+
+ // (optional) event synchronization
+ ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
+ void (*GGML_CALL event_free) (ggml_backend_event_t event);
+ void (*GGML_CALL event_record) (ggml_backend_event_t event);
+ void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
+ void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
};
struct ggml_backend {
- struct ggml_backend_i iface;
+ ggml_guid_t guid;
+ struct ggml_backend_i iface;
ggml_backend_context_t context;
};
+ struct ggml_backend_event {
+ ggml_backend_t backend;
+ void * context;
+ };
+
//
// Backend registry
//
@@ -131,6 +150,8 @@ extern "C" {
void (*GGML_CALL exit)(int);
void (*GGML_CALL free)(void *);
void *(*GGML_CALL malloc)(size_t);
+ char *(*GGML_CALL getenv)(const char *);
+ long (*GGML_CALL write)(int, const void *, long);
void (*GGML_CALL ggml_backend_register)(const char *, ggml_backend_init_fn, ggml_backend_buffer_type_t, void *);
ggml_backend_buffer_t (*GGML_CALL ggml_backend_buffer_init)(ggml_backend_buffer_type_t, struct ggml_backend_buffer_i, ggml_backend_buffer_context_t, size_t);
ggml_backend_buffer_t (*GGML_CALL ggml_backend_cpu_buffer_from_ptr)(void *, size_t);
@@ -157,6 +178,9 @@ extern "C" {
void (*GGML_CALL ggml_rope_yarn_corr_dims)(int, int, float, float, float, float[2]);
const char *(*GGML_CALL ggml_op_desc)(const struct ggml_tensor *);
bool (*GGML_CALL ggml_backend_buffer_is_host)(ggml_backend_buffer_t);
+ bool (*GGML_CALL ggml_guid_matches)(ggml_guid_t, ggml_guid_t);
+ bool (*GGML_CALL ggml_is_empty)(const struct ggml_tensor *);
+ bool (*GGML_CALL ggml_are_same_shape)(const struct ggml_tensor *, const struct ggml_tensor *);
};
#ifdef __cplusplus
diff --git a/llama.cpp/ggml-backend.c b/llama.cpp/ggml-backend.c
index e21683ad9c..fbf7bbb052 100644
--- a/llama.cpp/ggml-backend.c
+++ b/llama.cpp/ggml-backend.c
@@ -2,6 +2,8 @@
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
#include "ggml-backend-impl.h"
+#include "ggml-alloc.h"
+#include "ggml-impl.h"
#include
#include
@@ -9,22 +11,10 @@
#include
#include
#include
-#include
-
-#include "ggml-alloc.h"
-#include "ggml-cuda.h"
-#include "ggml-impl.h"
-#include "ggml-metal.h"
-#include "llamafile/log.h"
-
-#ifndef NDEBUG
-#define NDEBUG // [jart] delete printf debugging
-#endif
#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
// backend buffer type
const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
@@ -171,6 +161,13 @@ bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml
// backend
+ggml_guid_t ggml_backend_guid(ggml_backend_t backend) {
+ if (backend == NULL) {
+ return NULL;
+ }
+ return backend->guid;
+}
+
const char * ggml_backend_name(ggml_backend_t backend) {
if (backend == NULL) {
return "NULL";
@@ -227,29 +224,29 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
- GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(buf != NULL && "tensor buffer not set");
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
if (!size) {
return;
}
- tensor->buffer->iface.set_tensor(buf, tensor, data, offset, size);
+ buf->iface.set_tensor(buf, tensor, data, offset, size);
}
GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
+ GGML_ASSERT(buf != NULL && "tensor buffer not set");
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
- GGML_ASSERT(tensor->buffer != NULL && "tensor buffer not set");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
if (!size) {
return;
}
- tensor->buffer->iface.get_tensor(buf, tensor, data, offset, size);
+ buf->iface.get_tensor(buf, tensor, data, offset, size);
}
void ggml_backend_synchronize(ggml_backend_t backend) {
@@ -261,18 +258,30 @@ void ggml_backend_synchronize(ggml_backend_t backend) {
}
ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+ GGML_ASSERT(backend->iface.graph_plan_create != NULL);
+
return backend->iface.graph_plan_create(backend, cgraph);
}
void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+ GGML_ASSERT(backend->iface.graph_plan_free != NULL);
+
backend->iface.graph_plan_free(backend, plan);
}
-void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
- backend->iface.graph_plan_compute(backend, plan);
+enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+ GGML_ASSERT(backend->iface.graph_plan_compute != NULL);
+
+ return backend->iface.graph_plan_compute(backend, plan);
+}
+
+enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+ enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph);
+ ggml_backend_synchronize(backend);
+ return err;
}
-bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
return backend->iface.graph_compute(backend, cgraph);
}
@@ -280,6 +289,13 @@ bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor *
return backend->iface.supports_op(backend, op);
}
+bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op) {
+ if (backend->iface.offload_op != NULL) {
+ return backend->iface.offload_op(backend, op);
+ }
+ return false;
+}
+
// backend copy
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
@@ -320,34 +336,68 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst
}
}
-void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
+void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst) {
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
if (src == dst) {
return;
}
- if (ggml_backend_buft_supports_backend(src->buffer->buft, backend) && ggml_backend_buft_supports_backend(dst->buffer->buft, backend)) {
- if (backend->iface.cpy_tensor_async != NULL) {
- if (backend->iface.cpy_tensor_async(backend, src, dst)) {
- return;
- }
+ if (backend_dst->iface.cpy_tensor_async != NULL) {
+ if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) {
+ return;
}
}
- size_t nbytes = ggml_nbytes(src);
+ // an async copy would normally happen after all the queued operations on both backends are completed
+ // sync src, set_async dst
if (ggml_backend_buffer_is_host(src->buffer)) {
- ggml_backend_tensor_set_async(backend, dst, src->data, 0, nbytes);
- }
- else {
+ ggml_backend_synchronize(backend_src);
+ ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
+ } else {
+ ggml_backend_synchronize(backend_src);
ggml_backend_tensor_copy(src, dst);
+ ggml_backend_synchronize(backend_dst);
+ }
+}
+
+// events
+
+ggml_backend_event_t ggml_backend_event_new(ggml_backend_t backend) {
+ if (backend->iface.event_new == NULL) {
+ return NULL;
+ }
+ return backend->iface.event_new(backend);
+}
+
+void ggml_backend_event_free(ggml_backend_event_t event) {
+ if (event == NULL) {
+ return;
}
+ event->backend->iface.event_free(event);
+}
+
+void ggml_backend_event_record(ggml_backend_event_t event) {
+ GGML_ASSERT(event->backend->iface.event_record != NULL);
+
+ event->backend->iface.event_record(event);
+}
+
+void ggml_backend_event_synchronize(ggml_backend_event_t event) {
+ GGML_ASSERT(event->backend->iface.event_synchronize != NULL);
+
+ event->backend->iface.event_synchronize(event);
}
+void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
+ GGML_ASSERT(backend->iface.event_wait != NULL);
+
+ backend->iface.event_wait(backend, event);
+}
// backend registry
-#define GGML_MAX_BACKENDS_REG 16
+#define GGML_REG_MAX_BACKENDS 16
struct ggml_backend_reg {
char name[128];
@@ -356,7 +406,7 @@ struct ggml_backend_reg {
void * user_data;
};
-static struct ggml_backend_reg ggml_backend_registry[GGML_MAX_BACKENDS_REG];
+static struct ggml_backend_reg ggml_backend_registry[GGML_REG_MAX_BACKENDS];
static size_t ggml_backend_registry_count = 0;
GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data);
@@ -401,7 +451,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
}
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
- GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);
+ GGML_ASSERT(ggml_backend_registry_count < GGML_REG_MAX_BACKENDS);
size_t id = ggml_backend_registry_count;
@@ -721,6 +771,10 @@ GGML_CALL static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(gg
if (cpu_plan->cplan.work_size > 0) {
cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
+ if (cpu_plan->cplan.work_data == NULL) {
+ free(cpu_plan);
+ return NULL;
+ }
}
cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
@@ -738,22 +792,26 @@ GGML_CALL static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, g
GGML_UNUSED(backend);
}
-GGML_CALL static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+GGML_CALL static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
- ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
+ return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
GGML_UNUSED(backend);
}
-GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
if (cpu_ctx->work_size < cplan.work_size) {
- // TODO: may be faster to free and use malloc to avoid the copy
- cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
+ free(cpu_ctx->work_data);
+ cpu_ctx->work_data = malloc(cplan.work_size);
+ if (cpu_ctx->work_data == NULL) {
+ cpu_ctx->work_size = 0;
+ return GGML_STATUS_ALLOC_FAILED;
+ }
cpu_ctx->work_size = cplan.work_size;
}
cplan.work_data = cpu_ctx->work_data;
@@ -761,14 +819,17 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str
cplan.abort_callback = cpu_ctx->abort_callback;
cplan.abort_callback_data = cpu_ctx->abort_callback_data;
- ggml_graph_compute(cgraph, &cplan);
- return true;
+ return ggml_graph_compute(cgraph, &cplan);
}
GGML_CALL static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
switch (op->op) {
case GGML_OP_CPY:
- return op->type != GGML_TYPE_IQ2_XXS && op->type != GGML_TYPE_IQ2_XS && op->type != GGML_TYPE_IQ1_S; // missing type_traits.from_float
+ return
+ op->type != GGML_TYPE_IQ2_XXS &&
+ op->type != GGML_TYPE_IQ2_XS &&
+ op->type != GGML_TYPE_IQ1_S &&
+ op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
case GGML_OP_MUL_MAT:
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
default:
@@ -791,8 +852,19 @@ static struct ggml_backend_i cpu_backend_i = {
/* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
/* .supports_op = */ ggml_backend_cpu_supports_op,
+ /* .offload_op = */ NULL,
+ /* .event_new = */ NULL,
+ /* .event_free = */ NULL,
+ /* .event_record = */ NULL,
+ /* .event_wait = */ NULL,
+ /* .event_synchronize = */ NULL,
};
+static ggml_guid_t ggml_backend_cpu_guid(void) {
+ static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
+ return &guid;
+}
+
ggml_backend_t ggml_backend_cpu_init(void) {
struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
if (ctx == NULL) {
@@ -812,6 +884,7 @@ ggml_backend_t ggml_backend_cpu_init(void) {
}
*cpu_backend = (struct ggml_backend) {
+ /* .guid = */ ggml_backend_cpu_guid(),
/* .interface = */ cpu_backend_i,
/* .context = */ ctx
};
@@ -819,7 +892,7 @@ ggml_backend_t ggml_backend_cpu_init(void) {
}
GGML_CALL bool ggml_backend_is_cpu(ggml_backend_t backend) {
- return backend && backend->iface.get_name == ggml_backend_cpu_name;
+ return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
}
void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
@@ -940,15 +1013,27 @@ static bool ggml_is_view_op(enum ggml_op op) {
// scheduler
-#define GGML_MAX_BACKENDS 16
-#define GGML_MAX_SPLITS 256
-#define GGML_MAX_SPLIT_INPUTS 16
+#ifndef GGML_SCHED_MAX_BACKENDS
+#define GGML_SCHED_MAX_BACKENDS 16
+#endif
+
+#ifndef GGML_SCHED_MAX_SPLITS
+#define GGML_SCHED_MAX_SPLITS 2048
+#endif
+
+#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
+#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
+#endif
+
+#ifndef GGML_SCHED_MAX_COPIES
+#define GGML_SCHED_MAX_COPIES 4
+#endif
struct ggml_backend_sched_split {
int backend_id;
int i_start;
int i_end;
- struct ggml_tensor * inputs[GGML_MAX_SPLIT_INPUTS];
+ struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
int n_inputs;
// graph view of this split
struct ggml_cgraph graph;
@@ -956,27 +1041,37 @@ struct ggml_backend_sched_split {
struct ggml_backend_sched {
bool is_reset; // true if the scheduler has been reset since the last graph split
+ bool is_alloc;
int n_backends;
- ggml_backend_t backends[GGML_MAX_BACKENDS];
- ggml_backend_buffer_type_t bufts[GGML_MAX_BACKENDS];
+ ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS];
+ ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
ggml_gallocr_t galloc;
// hash keys of the nodes in the graph
struct ggml_hash_set hash_set;
// hash values
int * tensor_backend_id;
- struct ggml_tensor * (* tensor_copies)[GGML_MAX_BACKENDS];
+ struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
- int * node_backend_ids; // [n_nodes]
- int n_nodes;
+ int * node_backend_ids; // [graph_size]
+ int * leaf_backend_ids; // [graph_size]
// copy of the graph with modified inputs
struct ggml_cgraph * graph;
- struct ggml_backend_sched_split splits[GGML_MAX_SPLITS];
+ // graph splits
+ struct ggml_backend_sched_split * splits;
int n_splits;
+ int splits_capacity;
+
+ // pipeline parallelism support
+ int n_copies;
+ int cur_copy;
+ ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
+ struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
+ int n_graph_inputs;
struct ggml_context * ctx;
@@ -984,17 +1079,16 @@ struct ggml_backend_sched {
void * callback_eval_user_data;
// align context_buffer to GGML_MEM_ALIGN
- #ifdef _MSC_VER
+#ifdef _MSC_VER
__declspec(align(GGML_MEM_ALIGN))
- #else
+#else
__attribute__((aligned(GGML_MEM_ALIGN)))
- #endif
- char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
+#endif
+ char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
};
-#define hash_id(node) ggml_hash_find_or_insert(sched->hash_set, node)
-#define tensor_backend_id(node) sched->tensor_backend_id[hash_id(node)]
-#define tensor_backend(node) (tensor_backend_id(node) == -1 ? NULL : sched->backends[tensor_backend_id(node)])
+#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor)
+#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)]
// returns the priority of the backend, lower id is higher priority
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
@@ -1006,7 +1100,8 @@ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backen
return -1;
}
-static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, ggml_backend_buffer_t buffer) {
+static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor) {
+ ggml_backend_buffer_t buffer = tensor->buffer;
if (buffer == NULL) {
return -1;
}
@@ -1017,12 +1112,16 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, gg
return i;
}
}
- GGML_ASSERT(false && "tensor buffer type not supported by any backend");
- return -1; // silence warning
+
+ fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n",
+ __func__, ggml_backend_buffer_name(buffer), tensor->name);
+ GGML_ASSERT(false);
+
+ return -1;
}
#if 0
-static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS][128]; // debug only
+static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
#define GET_CAUSE(node) causes[hash_id(node)]
#else
@@ -1035,31 +1134,48 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
// TODO: use supports_op to check if the backend supports the op
// assign pre-allocated nodes to their backend
- // dst
- int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->buffer);
- if (cur_backend != -1) {
- SET_CAUSE(node, "1.dst");
- return cur_backend;
+ int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor);
+ if (cur_backend_id != -1) {
+ SET_CAUSE(tensor, "1.dst");
+ return cur_backend_id;
}
+
// view_src
if (tensor->view_src != NULL) {
- cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src->buffer);
- if (cur_backend != -1) {
- SET_CAUSE(node, "1.vsrc");
- return cur_backend;
+ cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src);
+ if (cur_backend_id != -1) {
+ SET_CAUSE(tensor, "1.vsrc");
+ return cur_backend_id;
}
}
+
+ // graph input
+ if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
+ cur_backend_id = sched->n_backends - 1; // last backend (assumed CPU)
+ SET_CAUSE(tensor, "1.inp");
+ return cur_backend_id;
+ }
+
// assign nodes that use weights to the backend of the weights
+ // operations with weights are preferably run on the same backend as the weights
for (int i = 0; i < GGML_MAX_SRC; i++) {
const struct ggml_tensor * src = tensor->src[i];
if (src == NULL) {
continue;
}
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
- int src_backend = ggml_backend_sched_backend_from_buffer(sched, src->buffer);
- // operations with weights are always run on the same backend as the weights
- SET_CAUSE(node, "1.wgt%d", i);
- return src_backend;
+ int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src);
+ // check if a backend with higher prio wants to offload the op
+ if (src_backend_id == sched->n_backends - 1) {
+ for (int b = 0; b < src_backend_id; b++) {
+ if (ggml_backend_offload_op(sched->backends[b], tensor)) {
+ SET_CAUSE(tensor, "1.off");
+ return b;
+ }
+ }
+ }
+ SET_CAUSE(tensor, "1.wgt%d", i);
+ return src_backend_id;
}
}
@@ -1069,9 +1185,9 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
static char * fmt_size(size_t size) {
static char buffer[128];
if (size >= 1024*1024) {
- sprintf(buffer, "%zuM", size/1024/1024);
+ snprintf(buffer, sizeof(buffer), "%zuM", size/1024/1024);
} else {
- sprintf(buffer, "%zuK", size/1024);
+ snprintf(buffer, sizeof(buffer), "%zuK", size/1024);
}
return buffer;
}
@@ -1094,7 +1210,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
if (ggml_is_view_op(node->op)) {
continue;
}
- ggml_backend_t tensor_backend = tensor_backend(node);
+ ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
for (int j = 0; j < GGML_MAX_SRC; j++) {
@@ -1102,7 +1218,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
if (src == NULL) {
continue;
}
- ggml_backend_t src_backend = tensor_backend(src);
+ ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src));
}
@@ -1119,6 +1235,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
// reset splits
sched->n_splits = 0;
+ sched->n_graph_inputs = 0;
sched->is_reset = false;
struct ggml_init_params params = {
@@ -1138,33 +1255,36 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
// pass 1: assign backends to ops with pre-allocated inputs
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
- if (tensor_backend_id(leaf) != -1) {
+ int * leaf_backend_id = &tensor_backend_id(leaf);
+ if (*leaf_backend_id != -1) {
// do not overwrite user assignments
continue;
}
- tensor_backend_id(leaf) = ggml_backend_sched_backend_id_from_cur(sched, leaf);
+ *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
}
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
- if (tensor_backend_id(node) != -1) {
+ int * node_backend_id = &tensor_backend_id(node);
+ if (*node_backend_id != -1) {
// do not overwrite user assignments
continue;
}
- tensor_backend_id(node) = ggml_backend_sched_backend_id_from_cur(sched, node);
+ *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
// src
for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j];
if (src == NULL) {
continue;
}
- if (tensor_backend_id(src) == -1) {
- tensor_backend_id(src) = ggml_backend_sched_backend_id_from_cur(sched, src);
+ int * src_backend_id = &tensor_backend_id(src);
+ if (*src_backend_id == -1) {
+ *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
}
}
}
#ifdef DEBUG_PASS1
- fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
+ fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif
// pass 2: expand current backend assignments
@@ -1172,97 +1292,96 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
// expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend)
// thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops
- // pass 2.1 expand gpu up
+
+ // pass 2.2 expand gpu down
{
int cur_backend_id = -1;
- for (int i = graph->n_nodes - 1; i >= 0; i--) {
+ for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
continue;
}
- int tensor_backend_id = tensor_backend_id(node);
- if (tensor_backend_id != -1) {
- if (tensor_backend_id == sched->n_backends - 1) {
+ int * node_backend_id = &tensor_backend_id(node);
+ if (*node_backend_id != -1) {
+ if (*node_backend_id == sched->n_backends - 1) {
// skip cpu (lowest prio backend)
cur_backend_id = -1;
} else {
- cur_backend_id = tensor_backend_id;
+ cur_backend_id = *node_backend_id;
}
} else {
- tensor_backend_id(node) = cur_backend_id;
- SET_CAUSE(node, "2.1");
+ *node_backend_id = cur_backend_id;
+ SET_CAUSE(node, "2.2");
}
}
}
-
- // pass 2.2 expand gpu down
+ // pass 2.1 expand gpu up
{
int cur_backend_id = -1;
- for (int i = 0; i < graph->n_nodes; i++) {
+ for (int i = graph->n_nodes - 1; i >= 0; i--) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
continue;
}
- int tensor_backend_id = tensor_backend_id(node);
- if (tensor_backend_id != -1) {
- if (tensor_backend_id == sched->n_backends - 1) {
+ int * node_backend_id = &tensor_backend_id(node);
+ if (*node_backend_id != -1) {
+ if (*node_backend_id == sched->n_backends - 1) {
// skip cpu (lowest prio backend)
cur_backend_id = -1;
} else {
- cur_backend_id = tensor_backend_id;
+ cur_backend_id = *node_backend_id;
}
} else {
- tensor_backend_id(node) = cur_backend_id;
- SET_CAUSE(node, "2.2");
+ *node_backend_id = cur_backend_id;
+ SET_CAUSE(node, "2.1");
}
}
}
-
- // pass 2.3 expand rest up
+ // pass 2.4 expand rest down
{
int cur_backend_id = -1;
- for (int i = graph->n_nodes - 1; i >= 0; i--) {
+ for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
continue;
}
- int tensor_backend_id = tensor_backend_id(node);
- if (tensor_backend_id != -1) {
- cur_backend_id = tensor_backend_id;
+ int * node_backend_id = &tensor_backend_id(node);
+ if (*node_backend_id != -1) {
+ cur_backend_id = *node_backend_id;
} else {
- tensor_backend_id(node) = cur_backend_id;
- SET_CAUSE(node, "2.3");
+ *node_backend_id = cur_backend_id;
+ SET_CAUSE(node, "2.4");
}
}
}
-
- // pass 2.4 expand rest down
+ // pass 2.3 expand rest up
{
int cur_backend_id = -1;
- for (int i = 0; i < graph->n_nodes; i++) {
+ for (int i = graph->n_nodes - 1; i >= 0; i--) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
continue;
}
- int tensor_backend_id = tensor_backend_id(node);
- if (tensor_backend_id != -1) {
- cur_backend_id = tensor_backend_id;
+ int * node_backend_id = &tensor_backend_id(node);
+ if (*node_backend_id != -1) {
+ cur_backend_id = *node_backend_id;
} else {
- tensor_backend_id(node) = cur_backend_id;
- SET_CAUSE(node, "2.4");
+ *node_backend_id = cur_backend_id;
+ SET_CAUSE(node, "2.3");
}
}
}
+
#ifdef DEBUG_PASS2
- fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
+ fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif
// pass 3: assign backends to remaining src from dst and view_src
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
- int cur_backend_id = tensor_backend_id(node);
- if (node->view_src != NULL && cur_backend_id == -1) {
- cur_backend_id = tensor_backend_id(node) = tensor_backend_id(node->view_src);
+ int * cur_backend_id = &tensor_backend_id(node);
+ if (node->view_src != NULL && *cur_backend_id == -1) {
+ *cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "3.vsrc");
}
for (int j = 0; j < GGML_MAX_SRC; j++) {
@@ -1270,38 +1389,39 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
if (src == NULL) {
continue;
}
- int src_backend_id = tensor_backend_id(src);
- if (src_backend_id == -1) {
+ int * src_backend_id = &tensor_backend_id(src);
+ if (*src_backend_id == -1) {
if (src->view_src != NULL) {
// views are always on the same backend as the source
- tensor_backend_id(src) = tensor_backend_id(src->view_src);
+ *src_backend_id = tensor_backend_id(src->view_src);
SET_CAUSE(src, "3.vsrc");
} else {
- tensor_backend_id(src) = cur_backend_id;
+ *src_backend_id = *cur_backend_id;
SET_CAUSE(src, "3.cur");
}
}
}
}
#ifdef DEBUG_PASS3
- fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
+ fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif
// pass 4: split graph, find tensors that need to be copied
{
- int cur_split = 0;
+ int i_split = 0;
+ struct ggml_backend_sched_split * split = &sched->splits[0];
// find the backend of the first split, skipping view ops
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
if (!ggml_is_view_op(node->op)) {
- sched->splits[0].backend_id = tensor_backend_id(node);
+ split->backend_id = tensor_backend_id(node);
break;
}
}
- sched->splits[0].i_start = 0;
- sched->splits[0].n_inputs = 0;
- memset(sched->splits[0].inputs, 0, sizeof(sched->splits[0].inputs)); //HACK
- int cur_backend_id = sched->splits[0].backend_id;
+ split->i_start = 0;
+ split->n_inputs = 0;
+ memset(split->inputs, 0, sizeof(split->inputs)); //HACK
+ int cur_backend_id = split->backend_id;
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
@@ -1309,18 +1429,54 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
continue;
}
- int tensor_backend_id = tensor_backend_id(node);
+ const int node_backend_id = tensor_backend_id(node);
- GGML_ASSERT(tensor_backend_id != -1); // all nodes should be assigned by now
+ GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now
- if (tensor_backend_id != cur_backend_id) {
- sched->splits[cur_split].i_end = i;
- cur_split++;
- GGML_ASSERT(cur_split < GGML_MAX_SPLITS);
- sched->splits[cur_split].backend_id = tensor_backend_id;
- sched->splits[cur_split].i_start = i;
- sched->splits[cur_split].n_inputs = 0;
- cur_backend_id = tensor_backend_id;
+ // check if we should start a new split based on the sources of the current node
+ bool need_new_split = false;
+ if (node_backend_id == cur_backend_id && split->n_inputs > 0) {
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
+ struct ggml_tensor * src = node->src[j];
+ if (src == NULL) {
+ continue;
+ }
+ // check if a weight is on a different backend
+ // by starting a new split, the memory of the previously offloaded weights can be reused
+ if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
+ int src_backend_id = tensor_backend_id(src);
+ if (src_backend_id != -1 && src_backend_id != cur_backend_id) {
+ need_new_split = true;
+ break;
+ }
+ }
+ // check if the split has too many inputs
+ if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
+ const size_t id = hash_id(src);
+ int src_backend_id = sched->tensor_backend_id[id];
+ if (src_backend_id != cur_backend_id && sched->tensor_copies[hash_id(src)][cur_backend_id][0] == NULL) {
+ //printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name);
+ need_new_split = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (node_backend_id != cur_backend_id || need_new_split) {
+ split->i_end = i;
+ i_split++;
+ if (i_split >= sched->splits_capacity) {
+ sched->splits_capacity *= 2;
+ sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
+ GGML_ASSERT(sched->splits != NULL);
+ }
+ GGML_ASSERT(i_split < GGML_SCHED_MAX_SPLITS);
+ split = &sched->splits[i_split];
+ split->backend_id = node_backend_id;
+ split->i_start = i;
+ split->n_inputs = 0;
+ cur_backend_id = node_backend_id;
}
// find inputs that are not on the same backend
@@ -1329,83 +1485,84 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
if (src == NULL) {
continue;
}
- int src_backend_id = tensor_backend_id(src);
+
+ const int src_backend_id = tensor_backend_id(src);
assert(src_backend_id != -1); // all inputs should be assigned by now
- if (src_backend_id != tensor_backend_id) {
- // create a copy of the input in the split's backend
- size_t id = hash_id(src);
- if (sched->tensor_copies[id][cur_backend_id] == NULL) {
- ggml_backend_t backend = sched->backends[cur_backend_id];
- struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
- ggml_format_name(tensor_copy, "%s#%s", ggml_backend_name(backend), src->name);
- sched->tensor_copies[id][cur_backend_id] = tensor_copy;
- tensor_backend_id(tensor_copy) = cur_backend_id;
- SET_CAUSE(tensor_copy, "4.cpy");
+ if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
+ size_t id = hash_id(src);
+ if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
+ ggml_backend_t backend = sched->backends[src_backend_id];
+ for (int c = 0; c < sched->n_copies; c++) {
+ struct ggml_tensor * tensor_copy;
+ if (c == sched->cur_copy) {
+ tensor_copy = src; // use the original tensor as the current copy
+ } else {
+ tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
+ ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
+ }
+ if (sched->n_copies > 1) {
+ ggml_set_input(tensor_copy);
+ ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
+ }
+ sched->tensor_copies[id][src_backend_id][c] = tensor_copy;
+ SET_CAUSE(tensor_copy, "4.cpy");
+ }
+ int n_graph_inputs = sched->n_graph_inputs++;
+ GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
+ sched->graph_inputs[n_graph_inputs] = src;
+ }
+ }
- int n_inputs = sched->splits[cur_split].n_inputs++;
- GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS);
- sched->splits[cur_split].inputs[n_inputs] = src;
+ if (src_backend_id != node_backend_id) {
+ // create a copy of the input in the split's backend
+ const size_t id = hash_id(src);
+ if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
+ ggml_backend_t backend = sched->backends[cur_backend_id];
+ for (int c = 0; c < sched->n_copies; c++) {
+ struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
+ ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
+ if (sched->n_copies > 1) {
+ ggml_set_input(tensor_copy);
+ ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
+ }
+ sched->tensor_copies[id][cur_backend_id][c] = tensor_copy;
+ SET_CAUSE(tensor_copy, "4.cpy");
+ }
+ int n_inputs = split->n_inputs++;
+ GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
+ split->inputs[n_inputs] = src;
}
- node->src[j] = sched->tensor_copies[id][cur_backend_id];
+ node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy];
}
}
}
- sched->splits[cur_split].i_end = graph->n_nodes;
- sched->n_splits = cur_split + 1;
+ split->i_end = graph->n_nodes;
+ sched->n_splits = i_split + 1;
}
#ifdef DEBUG_PASS4
- fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
-#endif
-
-#ifndef NDEBUG
- // sanity check: all sources should have the same backend as the node
- for (int i = 0; i < graph->n_nodes; i++) {
- struct ggml_tensor * node = graph->nodes[i];
- ggml_backend_t tensor_backend = tensor_backend(node);
- if (tensor_backend == NULL) {
- fprintf(stderr, "!!!!!!! %s has no backend\n", node->name);
- }
- if (node->view_src != NULL && tensor_backend != tensor_backend(node->view_src)) {
- fprintf(stderr, "!!!!!!! %s has backend %s, view_src %s has backend %s\n",
- node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL",
- node->view_src->name, tensor_backend(node->view_src) ? ggml_backend_name(tensor_backend(node->view_src)) : "NULL");
- }
- for (int j = 0; j < GGML_MAX_SRC; j++) {
- struct ggml_tensor * src = node->src[j];
- if (src == NULL) {
- continue;
- }
- ggml_backend_t src_backend = tensor_backend(src);
- if (src_backend != tensor_backend /* && src_backend != NULL */) {
- fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n",
- node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL",
- j, src->name, src_backend ? ggml_backend_name(src_backend) : "NULL");
- }
- if (src->view_src != NULL && src_backend != tensor_backend(src->view_src)) {
- fprintf(stderr, "!!!!!!! [src] %s has backend %s, view_src %s has backend %s\n",
- src->name, src_backend ? ggml_backend_name(src_backend) : "NULL",
- src->view_src->name, tensor_backend(src->view_src) ? ggml_backend_name(tensor_backend(src->view_src)) : "NULL");
- }
- }
- }
- fflush(stderr);
+ fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif
// create copies of the graph for each split
- // FIXME: avoid this copy, pass split inputs to ggml_gallocr_alloc_graph_n in some other way
- struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_MAX_SPLIT_INPUTS, false);
+ // TODO: avoid this copy
+ struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2, false);
for (int i = 0; i < sched->n_splits; i++) {
struct ggml_backend_sched_split * split = &sched->splits[i];
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
+ // add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
for (int j = 0; j < split->n_inputs; j++) {
+ assert(graph_copy->size > (graph_copy->n_nodes + 1));
+
struct ggml_tensor * input = split->inputs[j];
- struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id];
+ const size_t input_id = hash_id(input);
+ struct ggml_tensor * input_cpy = sched->tensor_copies[input_id][split->backend_id][sched->cur_copy];
// add a dependency to the input source so that it is not freed before the copy is done
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
- sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(input);
+ input_dep->src[0] = input;
+ sched->node_backend_ids[graph_copy->n_nodes] = sched->tensor_backend_id[input_id];
graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
// add a dependency to the input copy so that it is allocated at the start of the split
@@ -1414,22 +1571,61 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
}
for (int j = split->i_start; j < split->i_end; j++) {
+ assert(graph_copy->size > graph_copy->n_nodes);
sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(graph->nodes[j]);
graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j];
}
}
+
+ if (sched->n_copies > 1) {
+ // add input copies as leafs so that they are allocated first
+ for (int i = 0; i < sched->n_graph_inputs; i++) {
+ struct ggml_tensor * input = sched->graph_inputs[i];
+ size_t id = hash_id(input);
+ int backend_id = tensor_backend_id(input);
+ for (int c = 0; c < sched->n_copies; c++) {
+ struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
+ sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
+ graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
+ }
+ }
+
+ for (int i = 0; i < sched->n_splits; i++) {
+ struct ggml_backend_sched_split * split = &sched->splits[i];
+ int backend_id = split->backend_id;
+ for (int j = 0; j < split->n_inputs; j++) {
+ struct ggml_tensor * input = split->inputs[j];
+ size_t id = hash_id(input);
+ for (int c = 0; c < sched->n_copies; c++) {
+ struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
+ sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
+ graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
+ }
+ }
+ }
+ }
+
+ // add leafs from the original graph
+ for (int i = 0; i < graph->n_leafs; i++) {
+ struct ggml_tensor * leaf = graph->leafs[i];
+ sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
+ graph_copy->leafs[graph_copy->n_leafs++] = leaf;
+ }
+
sched->graph = graph_copy;
}
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
- // ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids);
+ // allocate graph
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
+ // the re-allocation may cause the split inputs to be moved to a different address
+ ggml_backend_sched_synchronize(sched);
#ifndef NDEBUG
- fprintf(stderr, "ggml_backend_sched: failed to allocate graph, reserving\n");
+ fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__);
#endif
- ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids);
+ ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
- fprintf(stderr, "ggml_backend_sched: failed to allocate graph\n");
+ fprintf(stderr, "%s: failed to allocate graph\n", __func__);
return false;
}
}
@@ -1437,10 +1633,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
return true;
}
-static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
- uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
- uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
-
+static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
struct ggml_backend_sched_split * splits = sched->splits;
for (int i = 0; i < sched->n_splits; i++) {
@@ -1449,33 +1642,35 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
ggml_backend_t split_backend = sched->backends[split_backend_id];
// copy the input tensors to the split backend
- uint64_t copy_start_us = ggml_time_us();
for (int j = 0; j < split->n_inputs; j++) {
+ ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
struct ggml_tensor * input = split->inputs[j];
- struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id];
+ struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy];
- GGML_ASSERT(input->buffer != NULL);
- GGML_ASSERT(input_cpy->buffer != NULL);
-
- ggml_backend_tensor_copy_async(split_backend, input, input_cpy);
+ if (input->flags & GGML_TENSOR_FLAG_INPUT) {
+ // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
+ ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
+ } else {
+ ggml_backend_synchronize(split_backend);
+ }
+ ggml_backend_tensor_copy(input, input_cpy);
+ } else {
+ // wait for the split backend to finish using the input before overwriting it
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
+ ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]);
+ } else {
+ ggml_backend_synchronize(split_backend);
+ }
+ ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
+ }
}
- //ggml_backend_synchronize(split_backend); // necessary to measure copy time
- int64_t copy_end_us = ggml_time_us();
- copy_us[split_backend_id] += copy_end_us - copy_start_us;
-#if 0
- char split_filename[GGML_MAX_NAME];
- snprintf(split_filename, GGML_MAX_NAME, "split_%i_%s.dot", i, ggml_backend_name(split_backend));
- ggml_graph_dump_dot(split->graph, NULL, split_filename);
-#endif
-
-
- uint64_t compute_start_us = ggml_time_us();
if (!sched->callback_eval) {
- if (!ggml_backend_graph_compute(split_backend, &split->graph)) {
- return false;
+ enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph);
+ if (ec != GGML_STATUS_SUCCESS) {
+ return ec;
}
- //ggml_backend_synchronize(split_backend); // necessary to measure compute time
} else {
// similar to ggml_backend_compare_graph_backend
for (int j0 = 0; j0 < split->graph.n_nodes; j0++) {
@@ -1494,10 +1689,14 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
- if (!ggml_backend_graph_compute(split_backend, &gv)) {
- return false;
+ enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv);
+ if (ec != GGML_STATUS_SUCCESS) {
+ return ec;
}
+ // TODO: pass backend to the callback, then the user can decide if they want to synchronize
+ ggml_backend_synchronize(split_backend);
+
if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
break;
}
@@ -1505,39 +1704,58 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
j0 = j1;
}
}
- uint64_t compute_end_us = ggml_time_us();
- compute_us[split_backend_id] += compute_end_us - compute_start_us;
- }
-#if 0
- // per-backend timings
- fprintf(stderr, "sched_compute_splits times (%d splits):\n", sched->n_splits);
- for (int i = 0; i < sched->n_backends; i++) {
- if (copy_us[i] > 0 || compute_us[i] > 0) {
- fprintf(stderr, "\t%5.5s: %lu us copy, %lu us compute\n", ggml_backend_name(sched->backends[i]), copy_us[i], compute_us[i]);
+ // record the event of this copy
+ if (split->n_inputs > 0) {
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
+ ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy]);
+ }
}
}
-#endif
- return true;
+ sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
+
+ return GGML_STATUS_SUCCESS;
}
-ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) {
+ggml_backend_sched_t ggml_backend_sched_new(
+ ggml_backend_t * backends,
+ ggml_backend_buffer_type_t * bufts,
+ int n_backends,
+ size_t graph_size,
+ bool parallel) {
GGML_ASSERT(n_backends > 0);
- GGML_ASSERT(n_backends <= GGML_MAX_BACKENDS);
+ GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
+ GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
- struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);
+ struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched));
// initialize hash table
- sched->hash_set = ggml_hash_set_new(graph_size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
- sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size);
- sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size);
- sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), graph_size);
+ sched->hash_set = ggml_hash_set_new(graph_size);
+ sched->tensor_backend_id = calloc(sched->hash_set.size, sizeof(sched->tensor_backend_id[0]));
+ sched->tensor_copies = calloc(sched->hash_set.size, sizeof(sched->tensor_copies[0]));
+
+ const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
+ sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
+ sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
sched->n_backends = n_backends;
- for (int i = 0; i < n_backends; i++) {
- sched->backends[i] = backends[i];
- sched->bufts[i] = bufts ? bufts[i] : ggml_backend_get_default_buffer_type(backends[i]);
+
+ sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
+
+ const int initial_splits_capacity = 16;
+ sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0]));
+ sched->splits_capacity = initial_splits_capacity;
+
+ for (int b = 0; b < n_backends; b++) {
+ sched->backends[b] = backends[b];
+ sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
+ GGML_ASSERT(ggml_backend_buft_supports_backend(sched->bufts[b], backends[b]));
+ if (sched->n_copies > 1) {
+ for (int c = 0; c < sched->n_copies; c++) {
+ sched->events[b][c] = ggml_backend_event_new(backends[b]);
+ }
+ }
}
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
@@ -1551,55 +1769,91 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
if (sched == NULL) {
return;
}
+ for (int b = 0; b < sched->n_backends; b++) {
+ for (int c = 0; c < sched->n_copies; c++) {
+ ggml_backend_event_free(sched->events[b][c]);
+ }
+ }
ggml_gallocr_free(sched->galloc);
ggml_free(sched->ctx);
+ free(sched->splits);
free(sched->hash_set.keys);
free(sched->tensor_backend_id);
free(sched->tensor_copies);
free(sched->node_backend_ids);
+ free(sched->leaf_backend_ids);
free(sched);
}
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
// reset state for the next run
- size_t hash_size = sched->hash_set.size;
- memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
- memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
- memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
+ if (!sched->is_reset) {
+ size_t hash_size = sched->hash_set.size;
+ memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
+ memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
+ memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
- sched->is_reset = true;
+ sched->is_reset = true;
+ }
+ sched->is_alloc = false;
}
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
+ GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes);
+
ggml_backend_sched_split_graph(sched, measure_graph);
- if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids)) {
+ // TODO: extract this to a separate function
+ if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
return false;
}
ggml_backend_sched_reset(sched);
+ ggml_backend_sched_synchronize(sched);
+
return true;
}
-bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
- GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
-
- if (!sched->is_reset) {
- ggml_backend_sched_reset(sched);
- }
+bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
+ GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes);
ggml_backend_sched_split_graph(sched, graph);
+
if (!ggml_backend_sched_alloc_splits(sched)) {
return false;
}
- if (!ggml_backend_sched_compute_splits(sched)) {
- return false;
- }
+ sched->is_alloc = true;
return true;
}
+enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
+ enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
+ ggml_backend_sched_synchronize(sched);
+ return err;
+}
+
+enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
+ if (!sched->is_reset && !sched->is_alloc) {
+ ggml_backend_sched_reset(sched);
+ }
+
+ if (!sched->is_alloc) {
+ if (!ggml_backend_sched_alloc_graph(sched, graph)) {
+ return GGML_STATUS_ALLOC_FAILED;
+ }
+ }
+
+ return ggml_backend_sched_compute_splits(sched);
+}
+
+void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
+ for (int i = 0; i < sched->n_backends; i++) {
+ ggml_backend_synchronize(sched->backends[i]);
+ }
+}
+
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
sched->callback_eval = callback;
sched->callback_eval_user_data = user_data;
@@ -1609,19 +1863,24 @@ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
return sched->n_splits;
}
+int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
+ return sched->n_copies;
+}
+
size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
int backend_index = ggml_backend_sched_backend_id(sched, backend);
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
+
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
}
-void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
+void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
int backend_index = ggml_backend_sched_backend_id(sched, backend);
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
tensor_backend_id(node) = backend_index;
}
-ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
+ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
int backend_index = tensor_backend_id(node);
if (backend_index == -1) {
return NULL;
@@ -1639,7 +1898,6 @@ void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * t
tensor->buffer = buffer;
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
- tensor->backend = tensor->view_src->backend;
ggml_backend_buffer_init_tensor(buffer, tensor);
}
@@ -1718,10 +1976,10 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
struct ggml_hash_set hash_set = {
/* .size = */ graph->visited_hash_table.size,
- /* .keys = */ calloc(sizeof(hash_set.keys[0]), graph->visited_hash_table.size) // NOLINT
+ /* .keys = */ calloc(graph->visited_hash_table.size, sizeof(hash_set.keys[0])) // NOLINT
};
- struct ggml_tensor ** node_copies = calloc(sizeof(node_copies[0]), hash_set.size); // NOLINT
- bool * node_init = calloc(sizeof(node_init[0]), hash_set.size);
+ struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
+ bool * node_init = calloc(hash_set.size, sizeof(node_init[0]));
struct ggml_init_params params = {
/* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false),
@@ -1844,6 +2102,8 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
return true;
}
+#include "llamafile/log.h"
+
GGML_CALL static void system_exit(int rc) {
exit(rc);
}
@@ -1856,11 +2116,21 @@ GGML_CALL static void *system_malloc(size_t n) {
return malloc(n);
}
+GGML_CALL static char *system_getenv(const char *s) {
+ return getenv(s);
+}
+
+GGML_CALL static long system_write(int fd, const void *p, long n) {
+ return write(fd, p, n);
+}
+
static const struct ggml_backend_api kGgmlBackendApi = {
&FLAG_log_disable,
system_exit,
system_free,
system_malloc,
+ system_getenv,
+ system_write,
ggml_backend_register,
ggml_backend_buffer_init,
ggml_backend_cpu_buffer_from_ptr,
@@ -1887,6 +2157,9 @@ static const struct ggml_backend_api kGgmlBackendApi = {
ggml_rope_yarn_corr_dims,
ggml_op_desc,
ggml_backend_buffer_is_host,
+ ggml_guid_matches,
+ ggml_is_empty,
+ ggml_are_same_shape,
};
const struct ggml_backend_api *ggml_backend_api(void) {
diff --git a/llama.cpp/ggml-backend.h b/llama.cpp/ggml-backend.h
index 9ea70ead8e..e8f78b2f0b 100644
--- a/llama.cpp/ggml-backend.h
+++ b/llama.cpp/ggml-backend.h
@@ -9,6 +9,7 @@ extern "C" {
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
+ typedef struct ggml_backend_event * ggml_backend_event_t;
typedef struct ggml_backend * ggml_backend_t;
typedef void * ggml_backend_graph_plan_t;
@@ -49,7 +50,7 @@ extern "C" {
// Backend
//
-
+ GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
GGML_API void ggml_backend_free(ggml_backend_t backend);
@@ -66,16 +67,30 @@ extern "C" {
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
- GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
- GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
- GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
- GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
- GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
+ GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
+ GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
+ GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
// tensor copy between different backends
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
- GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
+
+ // asynchronous copy
+ // the copy is performed after all the currently queued operations in backend_src
+ // backend_dst will wait for the copy to complete before performing other operations
+ // automatic fallback to sync copy if async is not supported
+ GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
+
+ // events
+ GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
+ GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
+ GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
+ GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
+ GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
//
// CPU backend
@@ -122,27 +137,31 @@ extern "C" {
/*
Example usage:
- sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends);
- // sched is initialized with measure allocators and cannot be used until allocated with a measure graph
+ // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
+ // preferrably to run on the same backend as the buffer
+ ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
- // initialize buffers from a measure graph
- measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
+ sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
- // in build_graph:
- build_graph(...) {
- // manually assign nodes to a backend (optional, should not be needed in most cases)
- struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
- ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
- }
+ // initialize buffers from a max size graph (optional)
+ reserve_graph = build_graph(sched, max_batch_size);
- // allocate backend buffers from measure graph
- ggml_backend_sched_init_measure(sched, measure_graph);
+ // manually assign nodes to a backend (optional, should not be needed in most cases)
+ struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
+ ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
- // the scheduler is now ready to compute graphs
+ ggml_backend_sched_reserve(sched, reserve_graph);
// compute
graph = build_graph(sched);
ggml_backend_sched_graph_compute(sched, graph);
+
+ // if there are graph inputs:
+ ggml_backend_sched_reset(sched);
+ ggml_backend_sched_alloc_graph(sched, graph);
+ ggml_backend_tensor_set(input_tensor, ...);
+ ggml_backend_sched_graph_compute(sched, graph);
+ }
*/
struct ggml_backend_sched;
@@ -157,26 +176,32 @@ extern "C" {
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
// Initialize a backend scheduler
- GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
- GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
+ GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
+ GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
+
// Initialize backend buffers from a measure graph
- GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
+ GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
+
// Get the number of splits of the last graph
- GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
+ GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
+ GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
- GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
+ GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
- GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
- GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
+ GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
+ GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
// Allocate and compute graph on the backend scheduler
- GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+ GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+ GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+ GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+ GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
// Reset all assignments and allocators - must be called before changing the node backends
- GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
+ GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
// Set a callback to be called for each resulting node during graph compute
- GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
+ GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
//
// Utils
diff --git a/llama.cpp/ggml-common.h b/llama.cpp/ggml-common.h
new file mode 100644
index 0000000000..b7a17ccf97
--- /dev/null
+++ b/llama.cpp/ggml-common.h
@@ -0,0 +1,1866 @@
+#ifndef GGML_COMMON_DECL
+
+#if defined(GGML_COMMON_DECL_C)
+#include
+
+typedef uint16_t ggml_half;
+typedef uint32_t ggml_half2;
+
+#define GGML_COMMON_AGGR
+
+#define GGML_COMMON_DECL
+#elif defined(GGML_COMMON_DECL_METAL)
+#include
+
+typedef half ggml_half;
+typedef half2 ggml_half2;
+
+#define GGML_COMMON_AGGR
+
+#define GGML_COMMON_DECL
+#elif defined(GGML_COMMON_DECL_CUDA)
+#include
+#include
+
+typedef half ggml_half;
+typedef half2 ggml_half2;
+
+#define GGML_COMMON_AGGR data
+
+#define GGML_COMMON_DECL
+#elif defined(GGML_COMMON_DECL_HIP)
+#include
+#include
+
+typedef half ggml_half;
+typedef half2 ggml_half2;
+
+#define GGML_COMMON_AGGR data
+
+#define GGML_COMMON_DECL
+#elif defined(GGML_COMMON_DECL_SYCL)
+#include
+#include
+
+typedef sycl::half ggml_half;
+typedef sycl::half2 ggml_half2;
+
+#define GGML_COMMON_AGGR data
+
+#define GGML_COMMON_DECL
+#endif
+
+#if defined(GGML_COMMON_DECL)
+
+#ifndef __cplusplus
+#ifndef static_assert
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
+#define static_assert(cond, msg) _Static_assert(cond, msg)
+#else
+#define static_assert(cond, msg) struct global_scope_noop_trick
+#endif
+#endif
+#endif // __cplusplus
+
+// QK = number of values after dequantization
+// QK_K = super-block size
+
+#ifdef GGML_QKK_64
+#define QK_K 64
+#define K_SCALE_SIZE 4
+#else
+#define QK_K 256
+#define K_SCALE_SIZE 12
+#endif // GGML_QKK_64
+
+#if defined(GGML_COMMON_DECL_CUDA) || defined(GGML_COMMON_DECL_HIP) || defined(GGML_COMMON_DECL_SYCL)
+// QR = QK / number of values before dequantization
+// QI = number of 32 bit integers before dequantization
+
+#define QI4_0 (QK4_0 / (4 * QR4_0))
+#define QR4_0 2
+
+#define QI4_1 (QK4_1 / (4 * QR4_1))
+#define QR4_1 2
+
+#define QI5_0 (QK5_0 / (4 * QR5_0))
+#define QR5_0 2
+
+#define QI5_1 (QK5_1 / (4 * QR5_1))
+#define QR5_1 2
+
+#define QI8_0 (QK8_0 / (4 * QR8_0))
+#define QR8_0 1
+
+#define QI8_1 (QK8_1 / (4 * QR8_1))
+#define QR8_1 1
+
+#define QI2_K (QK_K / (4*QR2_K))
+#define QR2_K 4
+
+#define QI3_K (QK_K / (4*QR3_K))
+#define QR3_K 4
+
+#define QI4_K (QK_K / (4*QR4_K))
+#define QR4_K 2
+
+#define QI5_K (QK_K / (4*QR5_K))
+#define QR5_K 2
+
+#define QI6_K (QK_K / (4*QR6_K))
+#define QR6_K 2
+
+#define QI2_XXS (QK_K / (4*QR2_XXS))
+#define QR2_XXS 8
+
+#define QI2_XS (QK_K / (4*QR2_XS))
+#define QR2_XS 8
+
+#define QI2_S (QK_K / (4*QR2_S))
+#define QR2_S 8
+
+#define QI3_XXS (QK_K / (4*QR3_XXS))
+#define QR3_XXS 8
+
+#define QI3_XS (QK_K / (4*QR3_XS))
+#define QR3_XS 8
+
+#define QI1_S (QK_K / (4*QR1_S))
+#define QR1_S 8
+
+#define QI4_NL (QK4_NL / (4*QR4_NL))
+#define QR4_NL 2
+
+#if QK_K == 64
+#define QI4_XS QI4_NL
+#define QR4_XS QR4_NL
+#else
+#define QI4_XS (QK_K / (4*QR4_XS))
+#define QR4_XS 8
+#endif
+
+#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
+
+#define QK4_0 32
+typedef struct {
+ ggml_half d; // delta
+ uint8_t qs[QK4_0 / 2]; // nibbles / quants
+} block_q4_0;
+static_assert(sizeof(block_q4_0) == sizeof(ggml_half) + QK4_0 / 2, "wrong q4_0 block size/padding");
+
+#define QK4_1 32
+typedef struct {
+ union {
+ struct {
+ ggml_half d; // delta
+ ggml_half m; // min
+ } GGML_COMMON_AGGR;
+ ggml_half2 dm;
+ };
+ uint8_t qs[QK4_1 / 2]; // nibbles / quants
+} block_q4_1;
+static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
+
+#define QK5_0 32
+typedef struct {
+ ggml_half d; // delta
+ uint8_t qh[4]; // 5-th bit of quants
+ uint8_t qs[QK5_0 / 2]; // nibbles / quants
+} block_q5_0;
+static_assert(sizeof(block_q5_0) == sizeof(ggml_half) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
+
+#define QK5_1 32
+typedef struct {
+ union {
+ struct {
+ ggml_half d; // delta
+ ggml_half m; // min
+ } GGML_COMMON_AGGR;
+ ggml_half2 dm;
+ };
+ uint8_t qh[4]; // 5-th bit of quants
+ uint8_t qs[QK5_1 / 2]; // nibbles / quants
+} block_q5_1;
+static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_half) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
+
+#define QK8_0 32
+typedef struct {
+ ggml_half d; // delta
+ int8_t qs[QK8_0]; // quants
+} block_q8_0;
+static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block size/padding");
+
+#define QK8_1 32
+typedef struct {
+ union {
+ struct {
+ ggml_half d; // delta
+ ggml_half s; // d * sum(qs[i])
+ } GGML_COMMON_AGGR;
+ ggml_half2 ds;
+ };
+ int8_t qs[QK8_1]; // quants
+} block_q8_1;
+static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
+
+//[kawrakow] Need these two for performance on Arm
+typedef struct {
+ ggml_half d[8];
+ int8_t qs[4*QK8_1];
+} block_q8_1_x4;
+static_assert(sizeof(block_q8_1_x4) == 4*sizeof(block_q8_1), "wrong q8_1_x4 block size/padding");
+typedef struct {
+ ggml_half d[4];
+ int8_t qs[4*QK8_0];
+} block_q8_0_x4;
+static_assert(sizeof(block_q8_0_x4) == 4*sizeof(block_q8_0), "wrong q8_0_x4 block size/padding");
+
+//
+// Super-block quantization structures
+//
+
+// 2-bit quantization
+// weight is represented as x = a * q + b
+// 16 blocks of 16 elements each
+// Effectively 2.625 bits per weight
+typedef struct {
+ uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
+ uint8_t qs[QK_K/4]; // quants
+ union {
+ struct {
+ ggml_half d; // super-block scale for quantized scales
+ ggml_half dmin; // super-block scale for quantized mins
+ } GGML_COMMON_AGGR;
+ ggml_half2 dm;
+ };
+} block_q2_K;
+static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
+
+// 3-bit quantization
+// weight is represented as x = a * q
+// 16 blocks of 16 elements each
+// Effectively 3.4375 bits per weight
+#ifdef GGML_QKK_64
+typedef struct {
+ uint8_t hmask[QK_K/8]; // quants - high bit
+ uint8_t qs[QK_K/4]; // quants - low 2 bits
+ uint8_t scales[2];
+ ggml_half d; // super-block scale
+} block_q3_K;
+static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
+#else
+typedef struct {
+ uint8_t hmask[QK_K/8]; // quants - high bit
+ uint8_t qs[QK_K/4]; // quants - low 2 bits
+ uint8_t scales[12]; // scales, quantized with 6 bits
+ ggml_half d; // super-block scale
+} block_q3_K;
+static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
+#endif
+
+// 4-bit quantization
+// 8 blocks of 32 elements each
+// weight is represented as x = a * q + b
+// Effectively 4.5 bits per weight
+#ifdef GGML_QKK_64
+typedef struct {
+ ggml_half d[2]; // super-block scales/mins
+ uint8_t scales[2]; // 4-bit block scales/mins
+ uint8_t qs[QK_K/2]; // 4--bit quants
+} block_q4_K;
+static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + QK_K/2 + 2, "wrong q4_K block size/padding");
+#else
+typedef struct {
+ union {
+ struct {
+ ggml_half d; // super-block scale for quantized scales
+ ggml_half dmin; // super-block scale for quantized mins
+ } GGML_COMMON_AGGR;
+ ggml_half2 dm;
+ };
+ uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
+ uint8_t qs[QK_K/2]; // 4--bit quants
+} block_q4_K;
+static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
+#endif
+
+// 5-bit quantization
+// 8 blocks of 32 elements each
+// weight is represented as x = a * q + b
+// Effectively 5.5 bits per weight
+#ifdef GGML_QKK_64
+typedef struct {
+ ggml_half d; // super-block scale
+ int8_t scales[QK_K/16]; // 8-bit block scales
+ uint8_t qh[QK_K/8]; // quants, high bit
+ uint8_t qs[QK_K/2]; // quants, low 4 bits
+} block_q5_K;
+static_assert(sizeof(block_q5_K) == sizeof(ggml_half) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
+#else
+typedef struct {
+ union {
+ struct {
+ ggml_half d; // super-block scale for quantized scales
+ ggml_half dmin; // super-block scale for quantized mins
+ } GGML_COMMON_AGGR;
+ ggml_half2 dm;
+ };
+ uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
+ uint8_t qh[QK_K/8]; // quants, high bit
+ uint8_t qs[QK_K/2]; // quants, low 4 bits
+} block_q5_K;
+static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
+#endif
+
+// 6-bit quantization
+// weight is represented as x = a * q
+// 16 blocks of 16 elements each
+// Effectively 6.5625 bits per weight
+typedef struct {
+ uint8_t ql[QK_K/2]; // quants, lower 4 bits
+ uint8_t qh[QK_K/4]; // quants, upper 2 bits
+ int8_t scales[QK_K/16]; // scales, quantized with 8 bits
+ ggml_half d; // super-block scale
+} block_q6_K;
+static_assert(sizeof(block_q6_K) == sizeof(ggml_half) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
+
+// This is only used for intermediate quantization and dot products
+// [kawrakow] Note: I have switched the order of bsums and qs. This results in some performance gain on Arm
+typedef struct {
+ float d; // delta
+ int16_t bsums[QK_K/16]; // sum of quants in groups of 16
+ int8_t qs[QK_K]; // quants
+} block_q8_K;
+static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
+
+// (Almost) "true" 2-bit quantization.
+// Due to the need to use blocks as per ggml design, it ends up using
+// 2.0625 bpw because of the 16-bit scale for each block of 256.
+typedef struct {
+ ggml_half d;
+ uint16_t qs[QK_K/8];
+} block_iq2_xxs;
+static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_half) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
+
+// 2.3125 bpw quants
+typedef struct {
+ ggml_half d;
+ uint16_t qs[QK_K/8];
+ uint8_t scales[QK_K/32];
+} block_iq2_xs;
+static_assert(sizeof(block_iq2_xs) == sizeof(ggml_half) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
+
+// 2.5625 bpw quants
+typedef struct {
+ ggml_half d;
+ uint8_t qs[QK_K/4];
+ uint8_t qh[QK_K/32];
+ uint8_t scales[QK_K/32];
+} block_iq2_s;
+static_assert(sizeof(block_iq2_s) == sizeof(ggml_half) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
+
+// (Almost) "true" 3-bit quantization.
+// Due to the need to use blocks as per ggml design, it ends up using
+// 3.0625 bpw because of the 16-bit scale for each block of 256.
+typedef struct {
+ ggml_half d;
+ uint8_t qs[3*QK_K/8];
+} block_iq3_xxs;
+static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_half) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
+
+// 3.4375 bpw
+#if QK_K == 64
+#define IQ3S_N_SCALE 2
+#else
+#define IQ3S_N_SCALE QK_K/64
+#endif
+typedef struct {
+ ggml_half d;
+ uint8_t qs[QK_K/4];
+ uint8_t qh[QK_K/32];
+ uint8_t signs[QK_K/8];
+ uint8_t scales[IQ3S_N_SCALE];
+} block_iq3_s;
+static_assert(sizeof(block_iq3_s) == sizeof(ggml_half) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
+
+typedef struct {
+ ggml_half d;
+ uint8_t qs[QK_K/8];
+ uint16_t qh[QK_K/32];
+} block_iq1_s;
+static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
+
+// 1.75 bpw
+typedef struct {
+ uint8_t qs[QK_K/8]; // grid index, low 8 bits
+ uint8_t qh[QK_K/16]; // grid index, high 3 bits + grid shift bit (for two groups of 8)
+#if QK_K == 64
+ ggml_half d;
+#endif
+ uint8_t scales[QK_K/32]; // 3-bit block scales (4-bit if QK_K == 64)
+} block_iq1_m;
+#if QK_K == 64
+static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32 + sizeof(ggml_half), "wrong iq1_m block size/padding");
+#else
+static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32, "wrong iq1_m block size/padding");
+#endif
+
+// Used by IQ1_M quants
+typedef union {
+ ggml_half f16;
+ uint16_t u16;
+} iq1m_scale_t;
+
+// Non-linear quants
+#define QK4_NL 32
+typedef struct {
+ ggml_half d;
+ uint8_t qs[QK4_NL/2];
+} block_iq4_nl;
+static_assert(sizeof(block_iq4_nl) == sizeof(ggml_half) + QK4_NL/2, "wrong iq4_nl block size/padding");
+
+#if QK_K == 64
+#define block_iq4_xs block_iq4_nl
+#else
+typedef struct {
+ ggml_half d;
+ uint16_t scales_h;
+ uint8_t scales_l[QK_K/64];
+ uint8_t qs[QK_K/2];
+} block_iq4_xs;
+static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
+#endif
+
+#endif // GGML_COMMON_DECL
+#endif // GGML_COMMON_DECL
+
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef GGML_COMMON_IMPL
+
+#if defined(GGML_COMMON_IMPL_C)
+#include
+
+#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
+#define GGML_TABLE_END() };
+
+#define GGML_COMMON_IMPL
+#elif defined(GGML_COMMON_IMPL_METAL)
+#include
+
+#define GGML_TABLE_BEGIN(type, name, size) static const constant type name[size] = {
+#define GGML_TABLE_END() };
+
+#define GGML_COMMON_IMPL
+#elif defined(GGML_COMMON_IMPL_CUDA) || defined(GGML_COMMON_IMPL_HIP)
+#include
+
+#define GGML_TABLE_BEGIN(type, name, size) static const __device__ type name[size] = {
+#define GGML_TABLE_END() };
+
+#define GGML_COMMON_IMPL
+#elif defined(GGML_COMMON_IMPL_SYCL)
+
+#include
+
+#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
+#define GGML_TABLE_END() };
+
+#define GGML_COMMON_IMPL
+#endif
+
+#if defined(GGML_COMMON_IMPL)
+
+GGML_TABLE_BEGIN(uint8_t, kmask_iq2xs, 8)
+ 1, 2, 4, 8, 16, 32, 64, 128
+GGML_TABLE_END()
+
+GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
+ 0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
+ 144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
+ 160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
+ 48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
+ 192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
+ 80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
+ 96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
+ 240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
+GGML_TABLE_END()
+
+//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
+GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
+ 0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
+ 0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
+ 0xff000000ff000000, 0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
+ 0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00, 0x00000000ffffffff,
+ 0xff0000ff00000000, 0x000000ff000000ff, 0x000000ff0000ff00, 0xff0000ff0000ffff,
+ 0x000000ff00ff0000, 0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
+ 0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0x000000ffff00ffff,
+ 0xff0000ffffff0000, 0x000000ffffff00ff, 0x000000ffffffff00, 0xff0000ffffffffff,
+ 0xff00ff0000000000, 0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
+ 0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0x0000ff0000ffffff,
+ 0x0000ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, 0x0000ff00ff00ffff,
+ 0xff00ff00ffff0000, 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
+ 0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00, 0x0000ffff0000ffff,
+ 0xff00ffff00ff0000, 0x0000ffff00ff00ff, 0x0000ffff00ffff00, 0xff00ffff00ffffff,
+ 0xff00ffffff000000, 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
+ 0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00, 0x0000ffffffffffff,
+ 0xffff000000000000, 0x00ff0000000000ff, 0x00ff00000000ff00, 0xffff00000000ffff,
+ 0x00ff000000ff0000, 0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
+ 0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0x00ff0000ff00ffff,
+ 0xffff0000ffff0000, 0x00ff0000ffff00ff, 0x00ff0000ffffff00, 0xffff0000ffffffff,
+ 0x00ff00ff00000000, 0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
+ 0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, 0xffff00ff00ffffff,
+ 0xffff00ffff000000, 0x00ff00ffff0000ff, 0x00ff00ffff00ff00, 0xffff00ffff00ffff,
+ 0x00ff00ffffff0000, 0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
+ 0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00, 0x00ffff000000ffff,
+ 0xffffff0000ff0000, 0x00ffff0000ff00ff, 0x00ffff0000ffff00, 0xffffff0000ffffff,
+ 0xffffff00ff000000, 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
+ 0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00, 0x00ffff00ffffffff,
+ 0xffffffff00000000, 0x00ffffff000000ff, 0x00ffffff0000ff00, 0xffffffff0000ffff,
+ 0x00ffffff00ff0000, 0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
+ 0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
+ 0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
+GGML_TABLE_END()
+//#endif
+
+
+GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)
+ 0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
+ 0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
+ 0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
+ 0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
+ 0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
+ 0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
+ 0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
+ 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
+ 0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
+ 0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
+ 0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
+ 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
+ 0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
+ 0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
+ 0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
+ 0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
+ 0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
+ 0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
+ 0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
+ 0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
+ 0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
+ 0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
+ 0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
+ 0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
+ 0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
+ 0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
+ 0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
+ 0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
+ 0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
+ 0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
+ 0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
+ 0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
+ 0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
+ 0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
+ 0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
+ 0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
+ 0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
+ 0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
+ 0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
+ 0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
+ 0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
+ 0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
+ 0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
+ 0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
+ 0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
+ 0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
+ 0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
+ 0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
+ 0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
+ 0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
+ 0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
+ 0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
+ 0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
+ 0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
+ 0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
+ 0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
+ 0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
+ 0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
+ 0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
+ 0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
+ 0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
+ 0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
+ 0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
+ 0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
+GGML_TABLE_END()
+
+GGML_TABLE_BEGIN(uint64_t, iq2xs_grid, 512)
+ 0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
+ 0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
+ 0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
+ 0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
+ 0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
+ 0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
+ 0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
+ 0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
+ 0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
+ 0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
+ 0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
+ 0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
+ 0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
+ 0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
+ 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
+ 0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
+ 0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
+ 0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
+ 0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
+ 0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
+ 0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
+ 0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
+ 0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
+ 0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
+ 0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
+ 0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
+ 0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
+ 0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
+ 0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
+ 0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
+ 0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
+ 0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
+ 0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
+ 0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
+ 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
+ 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
+ 0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
+ 0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
+ 0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
+ 0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
+ 0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
+ 0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
+ 0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
+ 0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
+ 0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
+ 0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
+ 0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
+ 0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
+ 0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
+ 0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
+ 0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
+ 0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
+ 0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
+ 0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
+ 0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
+ 0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
+ 0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
+ 0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
+ 0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
+ 0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
+ 0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
+ 0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
+ 0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
+ 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
+ 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
+ 0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
+ 0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
+ 0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
+ 0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
+ 0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
+ 0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
+ 0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
+ 0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
+ 0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
+ 0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
+ 0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
+ 0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
+ 0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
+ 0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
+ 0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
+ 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
+ 0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
+ 0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
+ 0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
+ 0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
+ 0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
+ 0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
+ 0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
+ 0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
+ 0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
+ 0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
+ 0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
+ 0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
+ 0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
+ 0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
+ 0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
+ 0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
+ 0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
+ 0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
+ 0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
+ 0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
+ 0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
+ 0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
+ 0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
+ 0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
+ 0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
+ 0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
+ 0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
+ 0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
+ 0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
+ 0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
+ 0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
+ 0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
+ 0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
+ 0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
+ 0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
+ 0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
+ 0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
+ 0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
+ 0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
+ 0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
+ 0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
+ 0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
+ 0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
+ 0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
+ 0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
+ 0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
+ 0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
+GGML_TABLE_END()
+
+GGML_TABLE_BEGIN(uint64_t, iq2s_grid, 1024)
+ 0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
+ 0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
+ 0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
+ 0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
+ 0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
+ 0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
+ 0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
+ 0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
+ 0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
+ 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
+ 0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
+ 0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
+ 0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
+ 0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
+ 0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
+ 0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
+ 0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
+ 0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
+ 0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
+ 0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
+ 0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
+ 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
+ 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
+ 0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
+ 0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
+ 0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
+ 0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
+ 0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
+ 0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
+ 0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
+ 0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
+ 0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
+ 0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
+ 0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
+ 0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
+ 0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
+ 0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
+ 0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
+ 0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
+ 0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
+ 0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
+ 0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
+ 0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
+ 0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
+ 0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
+ 0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
+ 0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
+ 0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
+ 0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
+ 0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
+ 0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
+ 0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
+ 0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
+ 0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
+ 0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
+ 0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
+ 0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
+ 0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
+ 0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
+ 0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
+ 0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
+ 0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
+ 0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
+ 0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
+ 0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
+ 0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
+ 0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
+ 0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
+ 0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
+ 0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
+ 0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
+ 0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
+ 0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
+ 0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
+ 0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
+ 0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
+ 0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
+ 0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
+ 0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
+ 0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
+ 0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
+ 0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
+ 0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
+ 0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
+ 0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
+ 0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
+ 0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
+ 0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
+ 0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
+ 0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
+ 0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
+ 0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
+ 0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
+ 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
+ 0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
+ 0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
+ 0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
+ 0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
+ 0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
+ 0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
+ 0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
+ 0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
+ 0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
+ 0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
+ 0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
+ 0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
+ 0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
+ 0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
+ 0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
+ 0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
+ 0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
+ 0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
+ 0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
+ 0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
+ 0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
+ 0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
+ 0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
+ 0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
+ 0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
+ 0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
+ 0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
+ 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
+ 0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
+ 0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
+ 0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
+ 0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
+ 0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
+ 0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
+ 0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
+ 0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
+ 0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
+ 0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
+ 0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
+ 0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
+ 0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
+ 0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
+ 0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
+ 0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
+ 0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
+ 0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
+ 0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
+ 0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
+ 0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
+ 0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
+ 0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
+ 0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
+ 0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
+ 0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
+ 0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
+ 0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
+ 0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
+ 0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
+ 0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
+ 0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
+ 0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
+ 0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
+ 0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
+ 0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
+ 0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
+ 0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
+ 0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
+ 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
+ 0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
+ 0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
+ 0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
+ 0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
+ 0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
+ 0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
+ 0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
+ 0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
+ 0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
+ 0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
+ 0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
+ 0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
+ 0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
+ 0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
+ 0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
+ 0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
+ 0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
+ 0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
+ 0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
+ 0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
+ 0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
+ 0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
+ 0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
+ 0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
+ 0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
+ 0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
+ 0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
+ 0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
+ 0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
+ 0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
+ 0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
+ 0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
+ 0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
+ 0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
+ 0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
+ 0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
+ 0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
+ 0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
+ 0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
+ 0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
+ 0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
+ 0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
+ 0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
+ 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
+ 0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
+ 0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
+ 0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
+ 0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
+ 0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
+ 0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
+ 0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
+ 0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
+ 0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
+ 0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
+ 0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
+ 0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
+ 0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
+ 0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
+ 0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
+ 0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
+ 0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
+ 0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
+ 0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
+ 0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
+ 0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
+ 0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
+ 0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
+ 0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
+ 0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
+ 0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
+ 0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
+ 0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
+ 0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
+ 0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
+ 0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
+ 0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
+ 0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
+ 0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
+ 0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
+ 0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
+ 0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
+ 0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
+ 0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
+ 0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
+ 0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
+ 0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
+ 0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
+ 0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
+ 0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
+ 0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
+ 0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
+ 0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
+ 0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
+ 0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
+GGML_TABLE_END()
+
+GGML_TABLE_BEGIN(uint32_t, iq3xxs_grid, 256)
+ 0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
+ 0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
+ 0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
+ 0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
+ 0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
+ 0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
+ 0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
+ 0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
+ 0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
+ 0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
+ 0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
+ 0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
+ 0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
+ 0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
+ 0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
+ 0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
+ 0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
+ 0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
+ 0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
+ 0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
+ 0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
+ 0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
+ 0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
+ 0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
+ 0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
+ 0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
+ 0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
+ 0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
+ 0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
+ 0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
+ 0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
+ 0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
+GGML_TABLE_END()
+
+GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
+ 0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
+ 0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
+ 0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
+ 0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
+ 0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
+ 0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
+ 0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
+ 0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
+ 0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
+ 0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
+ 0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
+ 0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
+ 0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
+ 0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
+ 0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
+ 0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
+ 0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
+ 0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
+ 0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
+ 0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
+ 0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
+ 0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
+ 0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
+ 0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
+ 0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
+ 0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
+ 0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
+ 0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
+ 0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
+ 0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
+ 0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
+ 0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
+ 0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
+ 0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
+ 0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
+ 0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
+ 0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
+ 0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
+ 0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
+ 0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
+ 0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
+ 0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
+ 0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
+ 0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
+ 0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
+ 0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
+ 0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
+ 0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
+ 0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
+ 0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
+ 0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
+ 0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
+ 0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
+ 0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
+ 0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
+ 0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
+ 0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
+ 0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
+ 0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
+ 0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
+ 0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
+ 0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
+ 0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
+ 0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
+GGML_TABLE_END()
+
+#define NGRID_IQ1S 2048
+#define IQ1S_DELTA 0.125f
+#define IQ1M_DELTA 0.125f
+#if defined(GGML_COMMON_IMPL_C)
+GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ1S)
+ 0xffffffffffffffff, 0xffffffffffffff01, 0xffffffffffff0000, 0xffffffffffff01ff,
+ 0xffffffffffff0101, 0xffffffffff00ff00, 0xffffffffff000000, 0xffffffffff01ffff,
+ 0xffffffffff01ff01, 0xffffffffff0101ff, 0xffffffffff010101, 0xffffffff00ff0000,
+ 0xffffffff0000ff00, 0xffffffff000000ff, 0xffffffff00000001, 0xffffffff00010000,
+ 0xffffffff01ffffff, 0xffffffff01ffff01, 0xffffffff01ff01ff, 0xffffffff01ff0101,
+ 0xffffffff01000000, 0xffffffff0101ffff, 0xffffffff0101ff01, 0xffffffff010101ff,
+ 0xffffffff01010101, 0xffffff00ffff00ff, 0xffffff00ffff0000, 0xffffff00ff00ff00,
+ 0xffffff00ff0000ff, 0xffffff00ff000001, 0xffffff00ff000100, 0xffffff00ff000101,
+ 0xffffff00ff010000, 0xffffff0000ffff00, 0xffffff0000ff0001, 0xffffff0000ff0100,
+ 0xffffff000000ff01, 0xffffff0000000000, 0xffffff0000000101, 0xffffff000001ff00,
+ 0xffffff00000100ff, 0xffffff0000010001, 0xffffff00000101ff, 0xffffff0001ff0000,
+ 0xffffff000100ff00, 0xffffff00010000ff, 0xffffff0001000001, 0xffffff0001010000,
+ 0xffffff01ffffffff, 0xffffff01ffffff01, 0xffffff01ffff01ff, 0xffffff01ffff0101,
+ 0xffffff01ff000000, 0xffffff01ff01ffff, 0xffffff01ff01ff01, 0xffffff01ff0101ff,
+ 0xffffff01ff010101, 0xffffff0100ff0000, 0xffffff010000ff00, 0xffffff0100000100,
+ 0xffffff01000100ff, 0xffffff0100010100, 0xffffff0101ffffff, 0xffffff0101ffff01,
+ 0xffffff0101ff01ff, 0xffffff0101ff0101, 0xffffff010100ff00, 0xffffff0101000000,
+ 0xffffff0101000100, 0xffffff010101ffff, 0xffffff010101ff01, 0xffffff01010101ff,
+ 0xffffff0101010101, 0xffff00ffff00ff00, 0xffff00ffff0000ff, 0xffff00ffff000001,
+ 0xffff00ffff010000, 0xffff00ff00ffff00, 0xffff00ff00ff0100, 0xffff00ff00000000,
+ 0xffff00ff00000101, 0xffff00ff000100ff, 0xffff00ff00010000, 0xffff00ff0100ff00,
+ 0xffff00ff01000100, 0xffff00ff01010000, 0xffff0000ffffff00, 0xffff0000ffff00ff,
+ 0xffff0000ffff0000, 0xffff0000ffff0001, 0xffff0000ff000000, 0xffff0000ff0001ff,
+ 0xffff0000ff000101, 0xffff0000ff010100, 0xffff000000ffffff, 0xffff000000ff0000,
+ 0xffff000000ff0101, 0xffff00000000ffff, 0xffff00000000ff00, 0xffff0000000000ff,
+ 0xffff000000000000, 0xffff000000000001, 0xffff000000000100, 0xffff00000001ffff,
+ 0xffff00000001ff01, 0xffff000000010000, 0xffff0000000101ff, 0xffff000000010101,
+ 0xffff000001ffff00, 0xffff00000100ff00, 0xffff000001000000, 0xffff0000010001ff,
+ 0xffff000001000101, 0xffff00000101ff00, 0xffff0000010100ff, 0xffff000001010000,
+ 0xffff000001010001, 0xffff000001010100, 0xffff0001ff0000ff, 0xffff0001ff000100,
+ 0xffff000100ffff00, 0xffff000100ff00ff, 0xffff00010000ffff, 0xffff00010000ff01,
+ 0xffff000100000000, 0xffff0001000001ff, 0xffff00010001ffff, 0xffff00010001ff00,
+ 0xffff000100010001, 0xffff000100010100, 0xffff000101ff0000, 0xffff00010100ff00,
+ 0xffff0001010000ff, 0xffff000101000100, 0xffff01ffffffffff, 0xffff01ffffffff01,
+ 0xffff01ffffff01ff, 0xffff01ffffff0101, 0xffff01ffff000000, 0xffff01ffff01ffff,
+ 0xffff01ffff01ff01, 0xffff01ffff0101ff, 0xffff01ffff010101, 0xffff01ff00ff0000,
+ 0xffff01ff0000ff00, 0xffff01ff00000001, 0xffff01ff00010000, 0xffff01ff01ffffff,
+ 0xffff01ff01ffff01, 0xffff01ff01ff01ff, 0xffff01ff01ff0101, 0xffff01ff01000000,
+ 0xffff01ff0101ffff, 0xffff01ff0101ff01, 0xffff01ff010101ff, 0xffff01ff01010101,
+ 0xffff0100ffff0000, 0xffff0100ff00ff00, 0xffff0100ff0000ff, 0xffff0100ff000100,
+ 0xffff0100ff0100ff, 0xffff0100ff010000, 0xffff010000ffff00, 0xffff01000000ffff,
+ 0xffff01000000ff00, 0xffff010000000000, 0xffff01000001ff00, 0xffff0100000100ff,
+ 0xffff010000010100, 0xffff01000100ff00, 0xffff0100010000ff, 0xffff010001000001,
+ 0xffff010001000100, 0xffff010001010000, 0xffff0101ffffffff, 0xffff0101ffffff01,
+ 0xffff0101ffff01ff, 0xffff0101ffff0101, 0xffff0101ff000000, 0xffff0101ff01ffff,
+ 0xffff0101ff01ff01, 0xffff0101ff0101ff, 0xffff0101ff010101, 0xffff010100ff0000,
+ 0xffff01010000ff00, 0xffff010100000100, 0xffff01010001ff00, 0xffff010100010000,
+ 0xffff010101ffffff, 0xffff010101ffff01, 0xffff010101ff0000, 0xffff010101ff01ff,
+ 0xffff010101ff0101, 0xffff010101000000, 0xffff01010101ffff, 0xffff01010101ff01,
+ 0xffff0101010101ff, 0xffff010101010101, 0xff00ffffff00ffff, 0xff00ffffff00ff00,
+ 0xff00ffffff0000ff, 0xff00ffffff000100, 0xff00ffffff0100ff, 0xff00ffffff010000,
+ 0xff00ffff00ffff00, 0xff00ffff00ff00ff, 0xff00ffff0000ffff, 0xff00ffff00000000,
+ 0xff00ffff000001ff, 0xff00ffff0001ff00, 0xff00ffff000100ff, 0xff00ffff00010000,
+ 0xff00ffff00010100, 0xff00ffff0100ff00, 0xff00ffff010000ff, 0xff00ffff01000001,
+ 0xff00ffff0101ff00, 0xff00ffff01010000, 0xff00ff00ffffff00, 0xff00ff00ffff00ff,
+ 0xff00ff00ffff0001, 0xff00ff00ffff0100, 0xff00ff00ff00ffff, 0xff00ff00ff00ff01,
+ 0xff00ff00ff000000, 0xff00ff00ff0001ff, 0xff00ff00ff01ff00, 0xff00ff00ff0100ff,
+ 0xff00ff00ff010100, 0xff00ff0000ff0000, 0xff00ff0000ff0101, 0xff00ff000000ffff,
+ 0xff00ff000000ff00, 0xff00ff000000ff01, 0xff00ff00000000ff, 0xff00ff0000000000,
+ 0xff00ff0000000001, 0xff00ff0000000100, 0xff00ff000001ffff, 0xff00ff0000010000,
+ 0xff00ff0001ff00ff, 0xff00ff000100ff01, 0xff00ff0001000000, 0xff00ff000101ff00,
+ 0xff00ff00010100ff, 0xff00ff01ff00ff00, 0xff00ff01ff0000ff, 0xff00ff01ff000001,
+ 0xff00ff01ff010000, 0xff00ff0100ffffff, 0xff00ff0100ff0001, 0xff00ff0100ff0100,
+ 0xff00ff010000ff01, 0xff00ff0100000000, 0xff00ff01000001ff, 0xff00ff0100000101,
+ 0xff00ff01000100ff, 0xff00ff0100010001, 0xff00ff0101ff0000, 0xff00ff010100ff00,
+ 0xff00ff01010000ff, 0xff00ff0101000001, 0xff00ff0101010000, 0xff0000ffffffff00,
+ 0xff0000ffffff0001, 0xff0000ffffff0100, 0xff0000ffff0000ff, 0xff0000ffff000000,
+ 0xff0000ffff0001ff, 0xff0000ffff000100, 0xff0000ffff01ff00, 0xff0000ffff010001,
+ 0xff0000ff00ffff00, 0xff0000ff00ff0000, 0xff0000ff00ff0001, 0xff0000ff00ff01ff,
+ 0xff0000ff00ff0101, 0xff0000ff0000ff00, 0xff0000ff000000ff, 0xff0000ff00000000,
+ 0xff0000ff00000001, 0xff0000ff00000100, 0xff0000ff0001ff01, 0xff0000ff00010000,
+ 0xff0000ff000101ff, 0xff0000ff01ff00ff, 0xff0000ff01ff0100, 0xff0000ff0100ffff,
+ 0xff0000ff010000ff, 0xff0000ff01000000, 0xff0000ff010001ff, 0xff0000ff01000100,
+ 0xff0000ff01000101, 0xff0000ff0101ff00, 0xff0000ff010100ff, 0xff0000ff01010000,
+ 0xff0000ff01010100, 0xff000000ffffff01, 0xff000000ffff0000, 0xff000000ffff0101,
+ 0xff000000ff00ff00, 0xff000000ff0000ff, 0xff000000ff000000, 0xff000000ff000001,
+ 0xff000000ff000100, 0xff000000ff01ffff, 0xff000000ff01ff01, 0xff000000ff010000,
+ 0xff000000ff0101ff, 0xff000000ff010101, 0xff00000000ffff00, 0xff00000000ff00ff,
+ 0xff00000000ff0000, 0xff00000000ff0001, 0xff0000000000ff00, 0xff0000000000ff01,
+ 0xff000000000000ff, 0xff00000000000000, 0xff00000000000001, 0xff00000000000100,
+ 0xff00000000000101, 0xff0000000001ff00, 0xff000000000100ff, 0xff00000000010000,
+ 0xff00000000010001, 0xff00000000010100, 0xff00000001ffffff, 0xff00000001ffff01,
+ 0xff00000001ff00ff, 0xff00000001ff0000, 0xff00000001ff01ff, 0xff00000001ff0101,
+ 0xff0000000100ffff, 0xff0000000100ff00, 0xff000000010000ff, 0xff00000001000000,
+ 0xff00000001000001, 0xff00000001000100, 0xff00000001000101, 0xff0000000101ffff,
+ 0xff0000000101ff01, 0xff00000001010000, 0xff000001ffffff00, 0xff000001ffff00ff,
+ 0xff000001ffff0000, 0xff000001ffff0001, 0xff000001ff000000, 0xff000001ff000001,
+ 0xff000001ff0001ff, 0xff000001ff000101, 0xff000001ff01ff00, 0xff000001ff010001,
+ 0xff00000100ffffff, 0xff00000100ffff01, 0xff00000100ff00ff, 0xff00000100ff0000,
+ 0xff00000100ff01ff, 0xff00000100ff0101, 0xff0000010000ff00, 0xff00000100000000,
+ 0xff00000100000001, 0xff000001000001ff, 0xff00000100000100, 0xff0000010001ff00,
+ 0xff000001000100ff, 0xff00000100010000, 0xff000001000101ff, 0xff00000100010100,
+ 0xff00000100010101, 0xff00000101ff0001, 0xff00000101ff0101, 0xff0000010100ff01,
+ 0xff00000101000000, 0xff000001010100ff, 0xff00000101010100, 0xff0001ffff00ff00,
+ 0xff0001ffff000001, 0xff0001ffff010000, 0xff0001ff00ffff00, 0xff0001ff00ff00ff,
+ 0xff0001ff00ff0001, 0xff0001ff00ff0100, 0xff0001ff0000ffff, 0xff0001ff00000000,
+ 0xff0001ff000001ff, 0xff0001ff00000101, 0xff0001ff0001ffff, 0xff0001ff0001ff00,
+ 0xff0001ff000100ff, 0xff0001ff00010001, 0xff0001ff00010100, 0xff0001ff01ff0000,
+ 0xff0001ff0100ff00, 0xff0001ff010000ff, 0xff0001ff01010000, 0xff000100ff00ffff,
+ 0xff000100ff00ff01, 0xff000100ff000000, 0xff000100ff000101, 0xff000100ff01ff00,
+ 0xff000100ff010000, 0xff00010000ffff01, 0xff00010000ff00ff, 0xff00010000ff0000,
+ 0xff00010000ff01ff, 0xff0001000000ff00, 0xff000100000000ff, 0xff00010000000000,
+ 0xff00010000000001, 0xff00010000000100, 0xff00010000000101, 0xff0001000001ffff,
+ 0xff00010000010000, 0xff00010000010101, 0xff00010001ff0100, 0xff0001000100ff00,
+ 0xff0001000100ff01, 0xff00010001000000, 0xff000100010001ff, 0xff0001000101ff00,
+ 0xff00010001010001, 0xff00010001010100, 0xff000101ffff0100, 0xff000101ff000001,
+ 0xff000101ff0100ff, 0xff000101ff010001, 0xff00010100ff00ff, 0xff00010100ff0001,
+ 0xff00010100ff0100, 0xff0001010000ffff, 0xff0001010000ff01, 0xff00010100000000,
+ 0xff000101000001ff, 0xff0001010001ff00, 0xff00010100010001, 0xff00010100010100,
+ 0xff00010101ff0000, 0xff0001010100ff00, 0xff00010101000001, 0xff00010101000101,
+ 0xff01ffffffffffff, 0xff01ffffffffff01, 0xff01ffffffff01ff, 0xff01ffffffff0101,
+ 0xff01ffffff000000, 0xff01ffffff01ffff, 0xff01ffffff01ff01, 0xff01ffffff010000,
+ 0xff01ffffff0101ff, 0xff01ffffff010101, 0xff01ffff00ff0000, 0xff01ffff0000ff00,
+ 0xff01ffff00000100, 0xff01ffff0001ff00, 0xff01ffff00010000, 0xff01ffff01ffffff,
+ 0xff01ffff01ffff01, 0xff01ffff01ff01ff, 0xff01ffff01ff0101, 0xff01ffff01000000,
+ 0xff01ffff0101ffff, 0xff01ffff0101ff01, 0xff01ffff01010000, 0xff01ffff010101ff,
+ 0xff01ffff01010101, 0xff01ff00ffff0000, 0xff01ff00ff00ff00, 0xff01ff00ff0000ff,
+ 0xff01ff00ff000100, 0xff01ff00ff010000, 0xff01ff0000ffff01, 0xff01ff0000ff00ff,
+ 0xff01ff0000ff0100, 0xff01ff0000000000, 0xff01ff00000001ff, 0xff01ff0000000101,
+ 0xff01ff000001ff00, 0xff01ff00000100ff, 0xff01ff0000010000, 0xff01ff0000010001,
+ 0xff01ff0001ff0000, 0xff01ff000100ffff, 0xff01ff0001000001, 0xff01ff0001000100,
+ 0xff01ff0001010000, 0xff01ff01ffffff00, 0xff01ff01ffff01ff, 0xff01ff01ffff0101,
+ 0xff01ff01ff00ff00, 0xff01ff01ff000000, 0xff01ff01ff01ffff, 0xff01ff01ff01ff01,
+ 0xff01ff01ff0101ff, 0xff01ff01ff010101, 0xff01ff0100ff0000, 0xff01ff010000ff00,
+ 0xff01ff0100000001, 0xff01ff0100000100, 0xff01ff0100010000, 0xff01ff0101ffff00,
+ 0xff01ff0101ff01ff, 0xff01ff0101ff0101, 0xff01ff010100ff00, 0xff01ff0101000000,
+ 0xff01ff010101ffff, 0xff01ff010101ff01, 0xff01ff01010101ff, 0xff01ff0101010101,
+ 0xff0100ffffff0000, 0xff0100ffff0000ff, 0xff0100ffff000001, 0xff0100ffff000100,
+ 0xff0100ffff010000, 0xff0100ff00ff00ff, 0xff0100ff00ff0000, 0xff0100ff00ff0001,
+ 0xff0100ff00ff0100, 0xff0100ff0000ff01, 0xff0100ff00000000, 0xff0100ff000001ff,
+ 0xff0100ff00000101, 0xff0100ff00010001, 0xff0100ff01ff0000, 0xff0100ff0100ff00,
+ 0xff0100ff010000ff, 0xff0100ff01000100, 0xff0100ff0101ff00, 0xff0100ff01010000,
+ 0xff010000ffff0100, 0xff010000ff000000, 0xff010000ff01ff00, 0xff010000ff010100,
+ 0xff01000000ffffff, 0xff01000000ff0000, 0xff01000000ff01ff, 0xff0100000000ff00,
+ 0xff010000000000ff, 0xff01000000000000, 0xff01000000000100, 0xff0100000001ff01,
+ 0xff01000000010000, 0xff010000000101ff, 0xff01000001ff0100, 0xff0100000100ffff,
+ 0xff010000010000ff, 0xff01000001000000, 0xff010000010001ff, 0xff01000001000101,
+ 0xff0100000101ff00, 0xff010000010100ff, 0xff01000001010001, 0xff01000001010100,
+ 0xff010001ffff0000, 0xff010001ff00ffff, 0xff010001ff00ff01, 0xff010001ff000100,
+ 0xff010001ff010000, 0xff01000100ffff00, 0xff01000100ff0100, 0xff01000100000000,
+ 0xff0100010001ffff, 0xff0100010001ff00, 0xff01000100010100, 0xff01000101ff00ff,
+ 0xff01000101ff0001, 0xff0100010100ffff, 0xff01000101000101, 0xff0101ffffffffff,
+ 0xff0101ffffffff01, 0xff0101ffffff01ff, 0xff0101ffffff0101, 0xff0101ffff000000,
+ 0xff0101ffff01ffff, 0xff0101ffff01ff01, 0xff0101ffff0101ff, 0xff0101ffff010101,
+ 0xff0101ff00ff0000, 0xff0101ff0000ff00, 0xff0101ff000000ff, 0xff0101ff00010000,
+ 0xff0101ff01ffffff, 0xff0101ff01ffff01, 0xff0101ff01ff01ff, 0xff0101ff01ff0101,
+ 0xff0101ff0101ffff, 0xff0101ff0101ff01, 0xff0101ff010101ff, 0xff0101ff01010101,
+ 0xff010100ffff0100, 0xff010100ff00ff00, 0xff010100ff0000ff, 0xff010100ff000100,
+ 0xff010100ff010000, 0xff01010000ff0001, 0xff01010000ff0100, 0xff0101000000ff01,
+ 0xff01010000000000, 0xff0101000001ff00, 0xff010100000100ff, 0xff01010000010001,
+ 0xff01010000010100, 0xff01010001ff0000, 0xff0101000100ffff, 0xff01010001000001,
+ 0xff01010001000100, 0xff010100010100ff, 0xff01010001010000, 0xff010101ffffffff,
+ 0xff010101ffffff01, 0xff010101ffff01ff, 0xff010101ffff0101, 0xff010101ff01ffff,
+ 0xff010101ff01ff01, 0xff010101ff0101ff, 0xff010101ff010101, 0xff01010100ff0000,
+ 0xff0101010000ff00, 0xff01010100000001, 0xff01010100000100, 0xff01010100010000,
+ 0xff01010101ffffff, 0xff01010101ffff01, 0xff01010101ff01ff, 0xff01010101ff0101,
+ 0xff01010101000000, 0xff0101010101ffff, 0xff0101010101ff01, 0xff010101010101ff,
+ 0xff01010101010101, 0x00ffffffffff0000, 0x00ffffffff00ff00, 0x00ffffffff000001,
+ 0x00ffffffff010000, 0x00ffffff00ff0100, 0x00ffffff0000ff01, 0x00ffffff00000000,
+ 0x00ffffff000001ff, 0x00ffffff00000101, 0x00ffffff0001ff00, 0x00ffffff000100ff,
+ 0x00ffffff00010001, 0x00ffffff010000ff, 0x00ffffff01000100, 0x00ffffff0101ff00,
+ 0x00ffffff01010001, 0x00ffff00ffffffff, 0x00ffff00ffffff00, 0x00ffff00ffff00ff,
+ 0x00ffff00ffff0001, 0x00ffff00ffff0100, 0x00ffff00ff00ff01, 0x00ffff00ff000000,
+ 0x00ffff00ff000001, 0x00ffff00ff0001ff, 0x00ffff00ff000101, 0x00ffff00ff01ff00,
+ 0x00ffff00ff010001, 0x00ffff00ff010100, 0x00ffff0000ff0000, 0x00ffff0000ff01ff,
+ 0x00ffff0000ff0101, 0x00ffff000000ff00, 0x00ffff00000000ff, 0x00ffff0000000000,
+ 0x00ffff0000000001, 0x00ffff0000000100, 0x00ffff0000000101, 0x00ffff0000010000,
+ 0x00ffff00000101ff, 0x00ffff0000010101, 0x00ffff0001ffff00, 0x00ffff0001ff00ff,
+ 0x00ffff0001ff0001, 0x00ffff000100ffff, 0x00ffff000100ff01, 0x00ffff0001000000,
+ 0x00ffff000101ffff, 0x00ffff000101ff00, 0x00ffff000101ff01, 0x00ffff01ffff0000,
+ 0x00ffff01ff00ff00, 0x00ffff01ff0000ff, 0x00ffff01ff000001, 0x00ffff01ff010000,
+ 0x00ffff0100ffff00, 0x00ffff010000ff01, 0x00ffff0100000000, 0x00ffff0100000101,
+ 0x00ffff01000100ff, 0x00ffff0100010100, 0x00ffff0101ff0100, 0x00ffff01010000ff,
+ 0x00ffff0101010000, 0x00ff00ffffffff00, 0x00ff00ffff000000, 0x00ff00ffff000100,
+ 0x00ff00ffff010100, 0x00ff00ff00ff0000, 0x00ff00ff00ff01ff, 0x00ff00ff00ff0101,
+ 0x00ff00ff0000ff00, 0x00ff00ff000000ff, 0x00ff00ff00000000, 0x00ff00ff00000001,
+ 0x00ff00ff0001ff00, 0x00ff00ff0001ff01, 0x00ff00ff00010000, 0x00ff00ff000101ff,
+ 0x00ff00ff00010101, 0x00ff00ff01ffff00, 0x00ff00ff01ff0001, 0x00ff00ff01ff0100,
+ 0x00ff00ff0100ffff, 0x00ff00ff0100ff01, 0x00ff00ff01000000, 0x00ff00ff0101ffff,
+ 0x00ff00ff0101ff00, 0x00ff00ff01010100, 0x00ff0000ffffff00, 0x00ff0000ffffff01,
+ 0x00ff0000ffff0000, 0x00ff0000ffff0101, 0x00ff0000ff00ff00, 0x00ff0000ff0000ff,
+ 0x00ff0000ff000000, 0x00ff0000ff000001, 0x00ff0000ff000100, 0x00ff0000ff01ffff,
+ 0x00ff0000ff010000, 0x00ff0000ff010101, 0x00ff000000ffff00, 0x00ff000000ff00ff,
+ 0x00ff000000ff0000, 0x00ff000000ff0001, 0x00ff000000ff0100, 0x00ff00000000ffff,
+ 0x00ff00000000ff00, 0x00ff0000000000ff, 0x00ff000000000000, 0x00ff000000000001,
+ 0x00ff0000000001ff, 0x00ff000000000100, 0x00ff00000001ff00, 0x00ff0000000100ff,
+ 0x00ff000000010000, 0x00ff000000010001, 0x00ff000000010100, 0x00ff000001ffff01,
+ 0x00ff000001ff00ff, 0x00ff000001ff0000, 0x00ff000001ff01ff, 0x00ff00000100ff00,
+ 0x00ff0000010000ff, 0x00ff000001000000, 0x00ff000001000001, 0x00ff000001000100,
+ 0x00ff000001000101, 0x00ff000001010000, 0x00ff0000010101ff, 0x00ff000001010101,
+ 0x00ff0001ffffff00, 0x00ff0001ffff0000, 0x00ff0001ffff0100, 0x00ff0001ff0000ff,
+ 0x00ff0001ff000000, 0x00ff0001ff0001ff, 0x00ff0001ff000101, 0x00ff0001ff01ff00,
+ 0x00ff0001ff0100ff, 0x00ff0001ff010100, 0x00ff000100ffffff, 0x00ff000100ffff01,
+ 0x00ff000100ff0000, 0x00ff000100ff01ff, 0x00ff00010000ffff, 0x00ff00010000ff00,
+ 0x00ff00010000ff01, 0x00ff000100000000, 0x00ff000100000001, 0x00ff000100000100,
+ 0x00ff00010001ff01, 0x00ff000100010000, 0x00ff0001000101ff, 0x00ff000101ffff00,
+ 0x00ff000101ff0000, 0x00ff000101ff0101, 0x00ff0001010000ff, 0x00ff000101000000,
+ 0x00ff00010101ff00, 0x00ff0001010100ff, 0x00ff000101010001, 0x00ff01ffffff0000,
+ 0x00ff01ffff00ff00, 0x00ff01ffff000000, 0x00ff01ffff000101, 0x00ff01ffff010000,
+ 0x00ff01ff00ffff01, 0x00ff01ff00ff0100, 0x00ff01ff0000ffff, 0x00ff01ff00000000,
+ 0x00ff01ff000001ff, 0x00ff01ff0001ff00, 0x00ff01ff000100ff, 0x00ff01ff00010001,
+ 0x00ff01ff00010100, 0x00ff01ff01ff0000, 0x00ff01ff0100ff00, 0x00ff01ff010000ff,
+ 0x00ff01ff01000001, 0x00ff01ff01000100, 0x00ff01ff01010000, 0x00ff0100ffffff00,
+ 0x00ff0100ffff0000, 0x00ff0100ffff0001, 0x00ff0100ffff0101, 0x00ff0100ff00ffff,
+ 0x00ff0100ff0000ff, 0x00ff0100ff000000, 0x00ff0100ff0001ff, 0x00ff0100ff01ff00,
+ 0x00ff0100ff0100ff, 0x00ff0100ff010001, 0x00ff010000ffffff, 0x00ff010000ff0000,
+ 0x00ff010000ff0101, 0x00ff01000000ff00, 0x00ff01000000ff01, 0x00ff0100000000ff,
+ 0x00ff010000000000, 0x00ff010000000001, 0x00ff010000000100, 0x00ff01000001ffff,
+ 0x00ff01000001ff01, 0x00ff010000010000, 0x00ff010000010001, 0x00ff010000010101,
+ 0x00ff010001ff0001, 0x00ff010001ff0100, 0x00ff01000100ff01, 0x00ff010001000000,
+ 0x00ff010001000001, 0x00ff0100010001ff, 0x00ff01000101ff00, 0x00ff0100010100ff,
+ 0x00ff010001010001, 0x00ff010001010100, 0x00ff0101ff000001, 0x00ff010100ff00ff,
+ 0x00ff010100ff0001, 0x00ff010100ff0100, 0x00ff010100000000, 0x00ff0101000001ff,
+ 0x00ff010100000101, 0x00ff0101000100ff, 0x00ff010100010100, 0x00ff0101010000ff,
+ 0x00ff010101010000, 0x0000ffffffffff00, 0x0000ffffffff00ff, 0x0000ffffffff0000,
+ 0x0000ffffffff0001, 0x0000ffffffff0100, 0x0000ffffff00ff01, 0x0000ffffff000000,
+ 0x0000ffffff000101, 0x0000ffffff01ff00, 0x0000ffffff0100ff, 0x0000ffffff010100,
+ 0x0000ffff00ffffff, 0x0000ffff00ff0000, 0x0000ffff00ff01ff, 0x0000ffff0000ff00,
+ 0x0000ffff000000ff, 0x0000ffff00000000, 0x0000ffff00000001, 0x0000ffff00000100,
+ 0x0000ffff00010000, 0x0000ffff000101ff, 0x0000ffff01ff0001, 0x0000ffff01ff0100,
+ 0x0000ffff01000000, 0x0000ffff010001ff, 0x0000ffff0101ffff, 0x0000ffff0101ff00,
+ 0x0000ffff01010001, 0x0000ffff01010100, 0x0000ff00ffff0000, 0x0000ff00ffff01ff,
+ 0x0000ff00ffff0100, 0x0000ff00ffff0101, 0x0000ff00ff00ff00, 0x0000ff00ff0000ff,
+ 0x0000ff00ff000000, 0x0000ff00ff000001, 0x0000ff00ff0001ff, 0x0000ff00ff000100,
+ 0x0000ff00ff01ffff, 0x0000ff00ff010000, 0x0000ff00ff010001, 0x0000ff00ff0101ff,
+ 0x0000ff00ff010101, 0x0000ff0000ffff00, 0x0000ff0000ff00ff, 0x0000ff0000ff0000,
+ 0x0000ff0000ff0001, 0x0000ff0000ff0100, 0x0000ff000000ffff, 0x0000ff000000ff00,
+ 0x0000ff000000ff01, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
+ 0x0000ff00000001ff, 0x0000ff0000000100, 0x0000ff0000000101, 0x0000ff000001ff00,
+ 0x0000ff00000100ff, 0x0000ff0000010000, 0x0000ff0000010001, 0x0000ff0000010100,
+ 0x0000ff0001ffff01, 0x0000ff0001ff0000, 0x0000ff000100ff00, 0x0000ff00010000ff,
+ 0x0000ff0001000000, 0x0000ff0001000001, 0x0000ff0001000100, 0x0000ff000101ffff,
+ 0x0000ff0001010000, 0x0000ff0001010101, 0x0000ff01ffffff00, 0x0000ff01ffff0001,
+ 0x0000ff01ff00ff01, 0x0000ff01ff000000, 0x0000ff01ff000101, 0x0000ff01ff01ff00,
+ 0x0000ff01ff0100ff, 0x0000ff0100ffff01, 0x0000ff0100ff0000, 0x0000ff0100ff0101,
+ 0x0000ff010000ff00, 0x0000ff01000000ff, 0x0000ff0100000000, 0x0000ff0100000001,
+ 0x0000ff0100000100, 0x0000ff010001ff01, 0x0000ff0100010000, 0x0000ff0101ff0000,
+ 0x0000ff010100ffff, 0x0000ff010100ff01, 0x0000ff0101000000, 0x0000ff0101000100,
+ 0x0000ff0101000101, 0x0000ff01010100ff, 0x000000ffffff00ff, 0x000000ffffff0000,
+ 0x000000ffff00ff00, 0x000000ffff0000ff, 0x000000ffff000000, 0x000000ffff000001,
+ 0x000000ffff0001ff, 0x000000ffff000100, 0x000000ffff01ff00, 0x000000ffff010000,
+ 0x000000ffff0101ff, 0x000000ffff010101, 0x000000ff00ffff00, 0x000000ff00ff00ff,
+ 0x000000ff00ff0000, 0x000000ff00ff0001, 0x000000ff00ff0100, 0x000000ff00ff0101,
+ 0x000000ff0000ffff, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
+ 0x000000ff00000001, 0x000000ff000001ff, 0x000000ff00000100, 0x000000ff00000101,
+ 0x000000ff0001ff00, 0x000000ff0001ff01, 0x000000ff000100ff, 0x000000ff00010000,
+ 0x000000ff00010001, 0x000000ff00010100, 0x000000ff01ffffff, 0x000000ff01ff01ff,
+ 0x000000ff01ff0101, 0x000000ff0100ff00, 0x000000ff010000ff, 0x000000ff01000000,
+ 0x000000ff01000001, 0x000000ff01000100, 0x000000ff0101ff00, 0x000000ff010100ff,
+ 0x000000ff01010000, 0x000000ff01010101, 0x00000000ffffff00, 0x00000000ffffff01,
+ 0x00000000ffff00ff, 0x00000000ffff0000, 0x00000000ffff0001, 0x00000000ffff0100,
+ 0x00000000ff00ffff, 0x00000000ff00ff00, 0x00000000ff00ff01, 0x00000000ff0000ff,
+ 0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff000101,
+ 0x00000000ff01ff00, 0x00000000ff0100ff, 0x00000000ff010000, 0x00000000ff010001,
+ 0x00000000ff010100, 0x0000000000ffffff, 0x0000000000ffff00, 0x0000000000ffff01,
+ 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001, 0x0000000000ff01ff,
+ 0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
+ 0x00000000000000ff, 0x0000000000000000, 0x0000000000000001, 0x00000000000001ff,
+ 0x0000000000000100, 0x0000000000000101, 0x000000000001ffff, 0x000000000001ff00,
+ 0x00000000000100ff, 0x0000000000010000, 0x0000000000010001, 0x00000000000101ff,
+ 0x0000000000010100, 0x0000000000010101, 0x0000000001ffff00, 0x0000000001ff00ff,
+ 0x0000000001ff0000, 0x0000000001ff0100, 0x0000000001ff0101, 0x000000000100ffff,
+ 0x000000000100ff00, 0x00000000010000ff, 0x0000000001000000, 0x0000000001000001,
+ 0x00000000010001ff, 0x0000000001000100, 0x000000000101ff00, 0x00000000010100ff,
+ 0x0000000001010000, 0x0000000001010001, 0x0000000001010100, 0x00000001ffffffff,
+ 0x00000001ffffff00, 0x00000001ffffff01, 0x00000001ffff00ff, 0x00000001ffff0001,
+ 0x00000001ffff01ff, 0x00000001ffff0100, 0x00000001ff00ff00, 0x00000001ff0000ff,
+ 0x00000001ff000000, 0x00000001ff0001ff, 0x00000001ff000100, 0x00000001ff01ffff,
+ 0x00000001ff01ff00, 0x00000001ff01ff01, 0x00000001ff0100ff, 0x00000001ff010000,
+ 0x00000001ff010001, 0x00000001ff0101ff, 0x00000001ff010100, 0x0000000100ffff00,
+ 0x0000000100ff0000, 0x0000000100ff0001, 0x0000000100ff01ff, 0x0000000100ff0100,
+ 0x0000000100ff0101, 0x000000010000ffff, 0x000000010000ff00, 0x000000010000ff01,
+ 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001, 0x00000001000001ff,
+ 0x0000000100000100, 0x0000000100000101, 0x000000010001ff00, 0x00000001000100ff,
+ 0x0000000100010000, 0x0000000100010100, 0x0000000101ffff01, 0x0000000101ff0000,
+ 0x0000000101ff0001, 0x0000000101ff01ff, 0x0000000101ff0100, 0x0000000101ff0101,
+ 0x000000010100ff00, 0x0000000101000000, 0x0000000101000101, 0x000000010101ff01,
+ 0x0000000101010000, 0x0000000101010001, 0x00000001010101ff, 0x0000000101010100,
+ 0x000001ffffff00ff, 0x000001ffffff0000, 0x000001ffffff0001, 0x000001ffffff0100,
+ 0x000001ffff00ffff, 0x000001ffff000000, 0x000001ffff0001ff, 0x000001ffff01ff00,
+ 0x000001ffff010101, 0x000001ff00ff0000, 0x000001ff00ff01ff, 0x000001ff00ff0101,
+ 0x000001ff0000ff00, 0x000001ff000000ff, 0x000001ff00000000, 0x000001ff00000001,
+ 0x000001ff000001ff, 0x000001ff00000100, 0x000001ff0001ffff, 0x000001ff0001ff01,
+ 0x000001ff000100ff, 0x000001ff00010000, 0x000001ff01ffff01, 0x000001ff01ff0100,
+ 0x000001ff0100ffff, 0x000001ff0100ff01, 0x000001ff01000000, 0x000001ff010001ff,
+ 0x000001ff0101ff00, 0x000001ff01010100, 0x00000100ffffff00, 0x00000100ffffff01,
+ 0x00000100ffff0000, 0x00000100ffff0101, 0x00000100ff00ff00, 0x00000100ff0000ff,
+ 0x00000100ff000000, 0x00000100ff000001, 0x00000100ff000100, 0x00000100ff010000,
+ 0x0000010000ffff00, 0x0000010000ff00ff, 0x0000010000ff0000, 0x0000010000ff0001,
+ 0x0000010000ff0100, 0x000001000000ffff, 0x000001000000ff00, 0x000001000000ff01,
+ 0x00000100000000ff, 0x0000010000000000, 0x0000010000000001, 0x00000100000001ff,
+ 0x0000010000000100, 0x0000010000000101, 0x000001000001ff00, 0x00000100000100ff,
+ 0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010001ffff00,
+ 0x0000010001ff0000, 0x0000010001ff0100, 0x000001000100ff00, 0x00000100010000ff,
+ 0x0000010001000000, 0x0000010001000001, 0x00000100010001ff, 0x0000010001000100,
+ 0x0000010001010000, 0x00000101ffff00ff, 0x00000101ffff01ff, 0x00000101ff000000,
+ 0x00000101ff000101, 0x00000101ff01ffff, 0x00000101ff010000, 0x00000101ff010001,
+ 0x00000101ff010100, 0x0000010100ff0000, 0x0000010100ff01ff, 0x0000010100ff0100,
+ 0x000001010000ff00, 0x0000010100000000, 0x0000010100000001, 0x00000101000001ff,
+ 0x0000010100000100, 0x000001010001ff01, 0x0000010100010000, 0x00000101000101ff,
+ 0x0000010100010101, 0x0000010101ffff00, 0x0000010101ff0101, 0x000001010100ff01,
+ 0x0000010101000000, 0x0000010101000001, 0x00000101010001ff, 0x0000010101000101,
+ 0x000001010101ff00, 0x0001ffffffff0000, 0x0001ffffff0000ff, 0x0001ffffff000001,
+ 0x0001ffffff000100, 0x0001ffffff010000, 0x0001ffff00ff00ff, 0x0001ffff0000ffff,
+ 0x0001ffff00000000, 0x0001ffff00000001, 0x0001ffff000001ff, 0x0001ffff00000101,
+ 0x0001ffff0001ff00, 0x0001ffff000100ff, 0x0001ffff00010001, 0x0001ffff00010100,
+ 0x0001ffff01ffff00, 0x0001ffff01000001, 0x0001ffff01010000, 0x0001ff00ffffff00,
+ 0x0001ff00ffff00ff, 0x0001ff00ffff0001, 0x0001ff00ffff0100, 0x0001ff00ff00ff01,
+ 0x0001ff00ff000000, 0x0001ff00ff01ff00, 0x0001ff00ff01ff01, 0x0001ff00ff010001,
+ 0x0001ff00ff010100, 0x0001ff0000ff0000, 0x0001ff0000ff0100, 0x0001ff000000ff00,
+ 0x0001ff0000000000, 0x0001ff0000000001, 0x0001ff0000000100, 0x0001ff0000010000,
+ 0x0001ff0000010001, 0x0001ff0000010101, 0x0001ff0001ff00ff, 0x0001ff0001ff0101,
+ 0x0001ff000100ff01, 0x0001ff0001000000, 0x0001ff000101ff00, 0x0001ff0001010001,
+ 0x0001ff0001010100, 0x0001ff01ff00ff00, 0x0001ff01ff000001, 0x0001ff01ff000100,
+ 0x0001ff0100ffffff, 0x0001ff0100ffff00, 0x0001ff0100ff0001, 0x0001ff0100000000,
+ 0x0001ff0100000001, 0x0001ff01000001ff, 0x0001ff010001ffff, 0x0001ff0101ff0000,
+ 0x0001ff010100ff00, 0x0001ff0101000001, 0x0001ff0101010000, 0x000100ffff00ff00,
+ 0x000100ffff00ff01, 0x000100ffff000000, 0x000100ffff000001, 0x000100ffff000101,
+ 0x000100ffff01ff00, 0x000100ffff010001, 0x000100ffff010100, 0x000100ff00ffffff,
+ 0x000100ff00ffff01, 0x000100ff00ff0000, 0x000100ff00ff01ff, 0x000100ff00ff0101,
+ 0x000100ff0000ff00, 0x000100ff000000ff, 0x000100ff00000000, 0x000100ff00000001,
+ 0x000100ff00000100, 0x000100ff00000101, 0x000100ff0001ffff, 0x000100ff0001ff01,
+ 0x000100ff00010000, 0x000100ff01ff00ff, 0x000100ff01ff0000, 0x000100ff01ff0100,
+ 0x000100ff0100ffff, 0x000100ff0100ff01, 0x000100ff010000ff, 0x000100ff01000000,
+ 0x000100ff01000001, 0x000100ff010001ff, 0x000100ff01000101, 0x000100ff0101ff00,
+ 0x000100ff010100ff, 0x000100ff01010100, 0x00010000ffff0000, 0x00010000ffff01ff,
+ 0x00010000ffff0101, 0x00010000ff00ff00, 0x00010000ff000000, 0x00010000ff000001,
+ 0x00010000ff000100, 0x0001000000ff00ff, 0x0001000000ff0000, 0x0001000000ff0001,
+ 0x0001000000ff0100, 0x000100000000ffff, 0x000100000000ff00, 0x00010000000000ff,
+ 0x0001000000000000, 0x0001000000000001, 0x0001000000000100, 0x000100000001ff00,
+ 0x00010000000100ff, 0x0001000000010000, 0x0001000000010001, 0x0001000000010100,
+ 0x0001000001ff0001, 0x0001000001ff0100, 0x0001000001ff0101, 0x000100000100ff00,
+ 0x0001000001000000, 0x0001000001000001, 0x0001000001000100, 0x0001000001000101,
+ 0x000100000101ff01, 0x0001000001010000, 0x0001000001010001, 0x00010000010101ff,
+ 0x00010001ffffff01, 0x00010001ffff0100, 0x00010001ff000000, 0x00010001ff01ffff,
+ 0x00010001ff010001, 0x00010001ff0101ff, 0x00010001ff010100, 0x0001000100ffffff,
+ 0x0001000100ff0000, 0x0001000100ff01ff, 0x0001000100ff0101, 0x000100010000ff00,
+ 0x00010001000000ff, 0x0001000100000000, 0x0001000100000001, 0x00010001000001ff,
+ 0x0001000100000101, 0x000100010001ffff, 0x0001000100010000, 0x00010001000101ff,
+ 0x0001000101ffffff, 0x0001000101ffff01, 0x0001000101ff0000, 0x0001000101ff0101,
+ 0x00010001010000ff, 0x0001000101000001, 0x00010001010001ff, 0x0001000101000100,
+ 0x000100010101ffff, 0x00010001010100ff, 0x0001000101010001, 0x0001000101010101,
+ 0x000101ffff000001, 0x000101ffff000100, 0x000101ffff010000, 0x000101ff00ffff00,
+ 0x000101ff0000ff01, 0x000101ff00000000, 0x000101ff00000101, 0x000101ff0001ff00,
+ 0x000101ff00010100, 0x000101ff01ff0000, 0x000101ff0100ff00, 0x000101ff010001ff,
+ 0x000101ff01010001, 0x00010100ffffff00, 0x00010100ffff00ff, 0x00010100ff00ffff,
+ 0x00010100ff000000, 0x00010100ff01ff00, 0x00010100ff0100ff, 0x00010100ff010001,
+ 0x00010100ff010100, 0x0001010000ffffff, 0x0001010000ffff00, 0x0001010000ff0000,
+ 0x0001010000ff0001, 0x0001010000ff01ff, 0x000101000000ff00, 0x00010100000000ff,
+ 0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x000101000001ffff,
+ 0x0001010000010000, 0x0001010000010101, 0x0001010001ffff01, 0x0001010001ff00ff,
+ 0x0001010001ff0101, 0x0001010001000000, 0x000101000101ff00, 0x00010100010100ff,
+ 0x0001010001010000, 0x0001010001010100, 0x00010101ff00ff00, 0x00010101ff000001,
+ 0x00010101ff0001ff, 0x0001010100ffff00, 0x0001010100ff00ff, 0x0001010100ff0100,
+ 0x000101010000ffff, 0x0001010100000000, 0x00010101000001ff, 0x0001010100000101,
+ 0x00010101000100ff, 0x0001010100010000, 0x0001010100010100, 0x0001010101ff0001,
+ 0x00010101010000ff, 0x00010101010001ff, 0x0001010101000101, 0x0001010101010001,
+ 0x01ffffffffffffff, 0x01ffffffffffff01, 0x01ffffffffff01ff, 0x01ffffffffff0101,
+ 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff, 0x01ffffffff010101,
+ 0x01ffffff00ff0000, 0x01ffffff0000ffff, 0x01ffffff0000ff00, 0x01ffffff000000ff,
+ 0x01ffffff00000001, 0x01ffffff00000100, 0x01ffffff00010000, 0x01ffffff01ffffff,
+ 0x01ffffff01ffff01, 0x01ffffff01ff01ff, 0x01ffffff01ff0101, 0x01ffffff01000000,
+ 0x01ffffff0101ffff, 0x01ffffff0101ff01, 0x01ffffff010101ff, 0x01ffffff01010101,
+ 0x01ffff00ffff0000, 0x01ffff00ff00ff00, 0x01ffff00ff0000ff, 0x01ffff00ff000001,
+ 0x01ffff00ff000100, 0x01ffff00ff010000, 0x01ffff0000ffff00, 0x01ffff0000ff00ff,
+ 0x01ffff0000ff0100, 0x01ffff000000ffff, 0x01ffff000000ff01, 0x01ffff0000000000,
+ 0x01ffff0000000001, 0x01ffff00000001ff, 0x01ffff0000000100, 0x01ffff00000100ff,
+ 0x01ffff0000010001, 0x01ffff0000010100, 0x01ffff0001ff0000, 0x01ffff0001ff0100,
+ 0x01ffff00010000ff, 0x01ffff0001000001, 0x01ffff0001000100, 0x01ffff0001010000,
+ 0x01ffff01ffffffff, 0x01ffff01ffffff01, 0x01ffff01ffff01ff, 0x01ffff01ffff0101,
+ 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff01ff01, 0x01ffff01ff0101ff,
+ 0x01ffff01ff010101, 0x01ffff010000ff00, 0x01ffff01000000ff, 0x01ffff0100000100,
+ 0x01ffff0100010000, 0x01ffff0101ffffff, 0x01ffff0101ffff01, 0x01ffff0101ff01ff,
+ 0x01ffff0101ff0101, 0x01ffff0101000000, 0x01ffff010101ffff, 0x01ffff010101ff01,
+ 0x01ffff01010101ff, 0x01ffff0101010101, 0x01ff00ffff0000ff, 0x01ff00ffff000100,
+ 0x01ff00ff00ffff00, 0x01ff00ff00ff00ff, 0x01ff00ff0000ff00, 0x01ff00ff00000000,
+ 0x01ff00ff00000101, 0x01ff00ff0001ff00, 0x01ff00ff000100ff, 0x01ff00ff00010100,
+ 0x01ff00ff010000ff, 0x01ff00ff01000100, 0x01ff0000ffffff00, 0x01ff0000ffff0100,
+ 0x01ff0000ff00ff01, 0x01ff0000ff000000, 0x01ff0000ff000101, 0x01ff0000ff010001,
+ 0x01ff0000ff010100, 0x01ff000000ffffff, 0x01ff000000ffff00, 0x01ff000000ff0000,
+ 0x01ff000000ff01ff, 0x01ff00000000ff00, 0x01ff0000000000ff, 0x01ff000000000000,
+ 0x01ff000000000001, 0x01ff000000000100, 0x01ff000000000101, 0x01ff000000010000,
+ 0x01ff000000010001, 0x01ff0000000101ff, 0x01ff000000010101, 0x01ff000001ffff00,
+ 0x01ff000001ff00ff, 0x01ff000001ff0001, 0x01ff000001ff0100, 0x01ff00000100ffff,
+ 0x01ff00000100ff01, 0x01ff000001000000, 0x01ff0000010001ff, 0x01ff000001010001,
+ 0x01ff0001ff00ff00, 0x01ff0001ff000001, 0x01ff0001ff000100, 0x01ff0001ff010000,
+ 0x01ff000100ffff00, 0x01ff000100ff00ff, 0x01ff000100ff0100, 0x01ff000100ff0101,
+ 0x01ff00010000ffff, 0x01ff000100000000, 0x01ff000100000100, 0x01ff000100000101,
+ 0x01ff00010001ff00, 0x01ff000100010001, 0x01ff000100010101, 0x01ff000101ff0000,
+ 0x01ff00010100ff00, 0x01ff000101000101, 0x01ff0001010100ff, 0x01ff01ffffffffff,
+ 0x01ff01ffffffff01, 0x01ff01ffffff01ff, 0x01ff01ffffff0101, 0x01ff01ffff000000,
+ 0x01ff01ffff01ffff, 0x01ff01ffff01ff01, 0x01ff01ffff0101ff, 0x01ff01ffff010101,
+ 0x01ff01ff00ffff00, 0x01ff01ff00ff0000, 0x01ff01ff0000ff00, 0x01ff01ff000000ff,
+ 0x01ff01ff00000100, 0x01ff01ff00010000, 0x01ff01ff00010100, 0x01ff01ff01ffffff,
+ 0x01ff01ff01ffff01, 0x01ff01ff01ff01ff, 0x01ff01ff01ff0101, 0x01ff01ff01000000,
+ 0x01ff01ff0101ffff, 0x01ff01ff0101ff01, 0x01ff01ff010101ff, 0x01ff01ff01010101,
+ 0x01ff0100ffff0000, 0x01ff0100ffff0001, 0x01ff0100ff00ff00, 0x01ff0100ff0000ff,
+ 0x01ff0100ff000001, 0x01ff0100ff010000, 0x01ff010000ffff00, 0x01ff010000ff00ff,
+ 0x01ff010000ff0001, 0x01ff010000ff0100, 0x01ff01000000ffff, 0x01ff01000000ff01,
+ 0x01ff010000000000, 0x01ff010000000101, 0x01ff01000001ff00, 0x01ff0100000100ff,
+ 0x01ff010001ff0000, 0x01ff010001000001, 0x01ff010001000100, 0x01ff010001010000,
+ 0x01ff0101ffffffff, 0x01ff0101ffffff01, 0x01ff0101ffff01ff, 0x01ff0101ffff0101,
+ 0x01ff0101ff000000, 0x01ff0101ff01ffff, 0x01ff0101ff01ff01, 0x01ff0101ff0101ff,
+ 0x01ff0101ff010101, 0x01ff010100ff0000, 0x01ff01010000ff00, 0x01ff0101000000ff,
+ 0x01ff010100000001, 0x01ff010101ffffff, 0x01ff010101ffff01, 0x01ff010101ff01ff,
+ 0x01ff010101ff0101, 0x01ff010101000000, 0x01ff01010101ffff, 0x01ff01010101ff01,
+ 0x01ff0101010101ff, 0x01ff010101010101, 0x0100ffffffff0000, 0x0100ffffff00ff00,
+ 0x0100ffffff000001, 0x0100ffffff0001ff, 0x0100ffffff000100, 0x0100ffffff010000,
+ 0x0100ffff00ffff00, 0x0100ffff00ff0001, 0x0100ffff00ff0100, 0x0100ffff00000000,
+ 0x0100ffff000001ff, 0x0100ffff00000101, 0x0100ffff00010100, 0x0100ffff00010101,
+ 0x0100ffff01ff0000, 0x0100ffff0100ff00, 0x0100ffff010000ff, 0x0100ffff01000001,
+ 0x0100ffff01000100, 0x0100ffff01010000, 0x0100ff00ffffff00, 0x0100ff00ffff00ff,
+ 0x0100ff00ffff0001, 0x0100ff00ffff0100, 0x0100ff00ff00ffff, 0x0100ff00ff000000,
+ 0x0100ff00ff0001ff, 0x0100ff00ff000101, 0x0100ff00ff01ff00, 0x0100ff00ff0100ff,
+ 0x0100ff00ff010001, 0x0100ff00ff010100, 0x0100ff0000ffffff, 0x0100ff0000ff0000,
+ 0x0100ff000000ffff, 0x0100ff000000ff00, 0x0100ff00000000ff, 0x0100ff0000000000,
+ 0x0100ff0000000001, 0x0100ff0000000100, 0x0100ff000001ff01, 0x0100ff0000010000,
+ 0x0100ff0001ff00ff, 0x0100ff0001ff0001, 0x0100ff000100ff01, 0x0100ff0001000000,
+ 0x0100ff00010001ff, 0x0100ff000101ff00, 0x0100ff00010100ff, 0x0100ff0001010001,
+ 0x0100ff0001010100, 0x0100ff01ffff0000, 0x0100ff01ff00ff00, 0x0100ff01ff0000ff,
+ 0x0100ff01ff000100, 0x0100ff01ff010000, 0x0100ff0100ff00ff, 0x0100ff0100ff0001,
+ 0x0100ff0100ff0100, 0x0100ff010000ffff, 0x0100ff010000ff01, 0x0100ff0100000000,
+ 0x0100ff01000001ff, 0x0100ff0100010001, 0x0100ff0100010100, 0x0100ff0101ff0000,
+ 0x0100ff01010000ff, 0x0100ff0101000001, 0x0100ff0101010100, 0x010000ffffffff00,
+ 0x010000ffffff00ff, 0x010000ffffff0001, 0x010000ffff00ffff, 0x010000ffff000000,
+ 0x010000ffff0001ff, 0x010000ffff010001, 0x010000ff00ffffff, 0x010000ff00ff0101,
+ 0x010000ff0000ff00, 0x010000ff000000ff, 0x010000ff00000000, 0x010000ff00000001,
+ 0x010000ff000001ff, 0x010000ff00000100, 0x010000ff0001ffff, 0x010000ff0001ff00,
+ 0x010000ff0001ff01, 0x010000ff00010000, 0x010000ff01ff00ff, 0x010000ff01ff0001,
+ 0x010000ff0100ff01, 0x010000ff010000ff, 0x010000ff01000000, 0x010000ff010001ff,
+ 0x010000ff0101ff00, 0x010000ff01010100, 0x01000000ffffffff, 0x01000000ffff0000,
+ 0x01000000ffff01ff, 0x01000000ffff0101, 0x01000000ff00ffff, 0x01000000ff00ff00,
+ 0x01000000ff0000ff, 0x01000000ff000000, 0x01000000ff000001, 0x01000000ff000100,
+ 0x01000000ff01ff00, 0x01000000ff010000, 0x01000000ff010100, 0x01000000ff010101,
+ 0x0100000000ffff00, 0x0100000000ff00ff, 0x0100000000ff0000, 0x0100000000ff0001,
+ 0x0100000000ff0100, 0x010000000000ffff, 0x010000000000ff00, 0x010000000000ff01,
+ 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001, 0x01000000000001ff,
+ 0x0100000000000100, 0x0100000000000101, 0x010000000001ff00, 0x01000000000100ff,
+ 0x0100000000010000, 0x0100000000010001, 0x0100000000010100, 0x0100000001ffff00,
+ 0x0100000001ff0000, 0x0100000001ff01ff, 0x010000000100ff00, 0x010000000100ff01,
+ 0x01000000010000ff, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100,
+ 0x0100000001000101, 0x010000000101ffff, 0x010000000101ff01, 0x0100000001010000,
+ 0x01000000010101ff, 0x0100000001010101, 0x01000001ffffff00, 0x01000001ffff00ff,
+ 0x01000001ff00ffff, 0x01000001ff000000, 0x01000001ff000100, 0x01000001ff01ffff,
+ 0x01000001ff010001, 0x01000001ff010100, 0x0100000100ff0000, 0x0100000100ff01ff,
+ 0x0100000100ff0100, 0x010000010000ff00, 0x010000010000ff01, 0x0100000100000000,
+ 0x0100000100000001, 0x0100000100000100, 0x0100000100010000, 0x01000001000101ff,
+ 0x0100000101ffff01, 0x0100000101ff00ff, 0x0100000101ff0100, 0x0100000101ff0101,
+ 0x010000010100ff01, 0x01000001010000ff, 0x0100000101000000, 0x01000001010100ff,
+ 0x0100000101010001, 0x0100000101010100, 0x010001ffffff0000, 0x010001ffff000001,
+ 0x010001ffff000100, 0x010001ffff010000, 0x010001ff00ffff00, 0x010001ff00ff0001,
+ 0x010001ff0000ffff, 0x010001ff0000ff01, 0x010001ff00000000, 0x010001ff00000001,
+ 0x010001ff00000101, 0x010001ff000100ff, 0x010001ff00010000, 0x010001ff01ff0000,
+ 0x010001ff0100ff00, 0x010001ff01000001, 0x010001ff01000100, 0x010001ff01010000,
+ 0x01000100ffff00ff, 0x01000100ffff0001, 0x01000100ffff0100, 0x01000100ff00ffff,
+ 0x01000100ff00ff01, 0x01000100ff000000, 0x01000100ff0001ff, 0x01000100ff000101,
+ 0x01000100ff01ffff, 0x01000100ff01ff00, 0x01000100ff0100ff, 0x01000100ff010001,
+ 0x0100010000ffffff, 0x0100010000ffff01, 0x0100010000ff0000, 0x0100010000ff01ff,
+ 0x0100010000ff0101, 0x010001000000ff00, 0x01000100000000ff, 0x0100010000000000,
+ 0x0100010000000001, 0x0100010000000100, 0x010001000001ff01, 0x0100010000010000,
+ 0x0100010000010001, 0x0100010000010101, 0x0100010001ffff00, 0x0100010001ff00ff,
+ 0x010001000100ffff, 0x010001000100ff01, 0x0100010001000000, 0x0100010001000101,
+ 0x010001000101ff00, 0x0100010001010001, 0x01000101ffff0000, 0x01000101ff000000,
+ 0x01000101ff010000, 0x0100010100ff00ff, 0x0100010100ff0001, 0x0100010100ff0100,
+ 0x010001010000ffff, 0x0100010100000000, 0x01000101000001ff, 0x010001010001ff00,
+ 0x0100010101ff0000, 0x010001010100ff00, 0x01000101010000ff, 0x0100010101000000,
+ 0x0100010101000001, 0x0101ffffffffffff, 0x0101ffffffffff01, 0x0101ffffffff01ff,
+ 0x0101ffffffff0101, 0x0101ffffff000000, 0x0101ffffff01ffff, 0x0101ffffff01ff01,
+ 0x0101ffffff0101ff, 0x0101ffffff010101, 0x0101ffff00ff0000, 0x0101ffff0000ff00,
+ 0x0101ffff000000ff, 0x0101ffff00000001, 0x0101ffff00000100, 0x0101ffff01ffffff,
+ 0x0101ffff01ffff01, 0x0101ffff01ff01ff, 0x0101ffff01ff0101, 0x0101ffff01000000,
+ 0x0101ffff0101ffff, 0x0101ffff0101ff01, 0x0101ffff010101ff, 0x0101ffff01010101,
+ 0x0101ff00ffff0000, 0x0101ff00ffff0100, 0x0101ff00ff00ff00, 0x0101ff00ff0000ff,
+ 0x0101ff00ff000001, 0x0101ff00ff000100, 0x0101ff00ff000101, 0x0101ff0000ff0001,
+ 0x0101ff0000ff0100, 0x0101ff000000ff00, 0x0101ff0000000000, 0x0101ff00000001ff,
+ 0x0101ff0000000101, 0x0101ff000001ff00, 0x0101ff00000100ff, 0x0101ff0001ff0000,
+ 0x0101ff000100ffff, 0x0101ff000100ff01, 0x0101ff0001000001, 0x0101ff0001000100,
+ 0x0101ff01ffffff01, 0x0101ff01ffff01ff, 0x0101ff01ffff0101, 0x0101ff01ff00ffff,
+ 0x0101ff01ff000100, 0x0101ff01ff01ff01, 0x0101ff01ff0101ff, 0x0101ff01ff010101,
+ 0x0101ff0100ff0000, 0x0101ff010000ff00, 0x0101ff0100000001, 0x0101ff0100000100,
+ 0x0101ff0100010000, 0x0101ff0101ffffff, 0x0101ff0101ffff01, 0x0101ff0101ff01ff,
+ 0x0101ff0101ff0101, 0x0101ff0101000000, 0x0101ff010101ffff, 0x0101ff010101ff01,
+ 0x0101ff01010101ff, 0x0101ff0101010101, 0x010100ffff000100, 0x010100ffff010000,
+ 0x010100ff00ffff00, 0x010100ff00ff00ff, 0x010100ff0000ffff, 0x010100ff000000ff,
+ 0x010100ff00000000, 0x010100ff000001ff, 0x010100ff00000101, 0x010100ff0001ff00,
+ 0x010100ff00010000, 0x010100ff00010001, 0x010100ff000101ff, 0x010100ff00010100,
+ 0x010100ff01ff0000, 0x01010000ffff0001, 0x01010000ffff0100, 0x01010000ff00ffff,
+ 0x01010000ff00ff01, 0x01010000ff000000, 0x01010000ff0001ff, 0x01010000ff010001,
+ 0x01010000ff010100, 0x0101000000ffff01, 0x0101000000ff0000, 0x010100000000ff00,
+ 0x01010000000000ff, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100,
+ 0x0101000000010000, 0x0101000000010101, 0x0101000001ffff00, 0x0101000001ff00ff,
+ 0x0101000001ff0000, 0x0101000001ff0001, 0x0101000001ff0100, 0x010100000100ff01,
+ 0x0101000001000000, 0x01010000010001ff, 0x01010001ffff0000, 0x01010001ff00ff00,
+ 0x01010001ff000001, 0x01010001ff000101, 0x01010001ff01ff00, 0x01010001ff010000,
+ 0x0101000100ff00ff, 0x0101000100ff0001, 0x0101000100ff0101, 0x010100010000ff01,
+ 0x0101000100000000, 0x0101000100000001, 0x01010001000001ff, 0x010100010001ffff,
+ 0x010100010001ff01, 0x0101000101ff0001, 0x010100010100ffff, 0x0101000101000000,
+ 0x0101000101000001, 0x0101000101000100, 0x010100010101ff00, 0x01010001010100ff,
+ 0x0101000101010001, 0x010101ffffffffff, 0x010101ffffffff01, 0x010101ffffff01ff,
+ 0x010101ffffff0101, 0x010101ffff01ffff, 0x010101ffff01ff01, 0x010101ffff0101ff,
+ 0x010101ffff010101, 0x010101ff0000ff00, 0x010101ff000000ff, 0x010101ff00000001,
+ 0x010101ff00000100, 0x010101ff01ffffff, 0x010101ff01ffff01, 0x010101ff01ff01ff,
+ 0x010101ff01ff0101, 0x010101ff01000000, 0x010101ff0101ffff, 0x010101ff0101ff01,
+ 0x010101ff010101ff, 0x010101ff01010101, 0x01010100ffff0000, 0x01010100ff0000ff,
+ 0x01010100ff000100, 0x01010100ff01ff00, 0x01010100ff010000, 0x0101010000ffff00,
+ 0x010101000000ffff, 0x0101010000000000, 0x0101010000000101, 0x010101000001ff00,
+ 0x0101010000010001, 0x0101010000010100, 0x010101000100ffff, 0x0101010001000001,
+ 0x01010101ffffffff, 0x01010101ffffff01, 0x01010101ffff01ff, 0x01010101ffff0101,
+ 0x01010101ff01ffff, 0x01010101ff01ff01, 0x01010101ff0101ff, 0x01010101ff010101,
+ 0x010101010000ff00, 0x01010101000000ff, 0x0101010100000001, 0x0101010101ffffff,
+ 0x0101010101ffff01, 0x0101010101ff01ff, 0x0101010101ff0101, 0x0101010101000000,
+ 0x010101010101ffff, 0x010101010101ff01, 0x01010101010101ff, 0x0101010101010101,
+GGML_TABLE_END()
+#else
+GGML_TABLE_BEGIN(uint32_t, iq1s_grid_gpu, NGRID_IQ1S)
+ 0x00000000, 0x00000002, 0x00000101, 0x00000200, 0x00000202, 0x00010001, 0x00010101, 0x00020000,
+ 0x00020002, 0x00020200, 0x00020202, 0x01000101, 0x01010001, 0x01010100, 0x01010102, 0x01020101,
+ 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02010101, 0x02020000, 0x02020002, 0x02020200,
+ 0x02020202, 0x00000110, 0x00000111, 0x00010011, 0x00010110, 0x00010112, 0x00010211, 0x00010212,
+ 0x00020111, 0x01000011, 0x01000112, 0x01000211, 0x01010012, 0x01010111, 0x01010212, 0x01020011,
+ 0x01020110, 0x01020112, 0x01020210, 0x02000111, 0x02010011, 0x02010110, 0x02010112, 0x02020111,
+ 0x00000020, 0x00000022, 0x00000220, 0x00000222, 0x00010121, 0x00020020, 0x00020022, 0x00020220,
+ 0x00020222, 0x01000121, 0x01010021, 0x01010221, 0x01020120, 0x01020221, 0x02000020, 0x02000022,
+ 0x02000220, 0x02000222, 0x02010021, 0x02010121, 0x02010221, 0x02020020, 0x02020022, 0x02020220,
+ 0x02020222, 0x00011001, 0x00011100, 0x00011102, 0x00021101, 0x01001001, 0x01001201, 0x01011101,
+ 0x01011202, 0x01021100, 0x01021101, 0x02011001, 0x02011201, 0x02021101, 0x00001011, 0x00001110,
+ 0x00001111, 0x00001112, 0x00011111, 0x00011210, 0x00011212, 0x00021211, 0x01001010, 0x01001111,
+ 0x01001212, 0x01011010, 0x01011011, 0x01011110, 0x01011111, 0x01011112, 0x01011211, 0x01021010,
+ 0x01021012, 0x01021111, 0x01021210, 0x01021212, 0x02001011, 0x02011011, 0x02011111, 0x02011210,
+ 0x02011212, 0x02021011, 0x02021110, 0x02021111, 0x02021112, 0x02021211, 0x00011120, 0x00011221,
+ 0x01001021, 0x01001120, 0x01011020, 0x01011022, 0x01011121, 0x01011220, 0x01021020, 0x01021021,
+ 0x01021122, 0x01021221, 0x02001121, 0x02011021, 0x02011120, 0x02011221, 0x00002000, 0x00002002,
+ 0x00002200, 0x00002202, 0x00012101, 0x00022000, 0x00022002, 0x00022200, 0x00022202, 0x01002101,
+ 0x01012001, 0x01012102, 0x01022101, 0x02002000, 0x02002002, 0x02002200, 0x02002202, 0x02012101,
+ 0x02022000, 0x02022002, 0x02022200, 0x02022202, 0x00002111, 0x00012011, 0x00012110, 0x00012211,
+ 0x00022110, 0x00022111, 0x01002011, 0x01012010, 0x01012011, 0x01012111, 0x01022011, 0x01022110,
+ 0x01022211, 0x02012011, 0x02012110, 0x02012112, 0x02012211, 0x02022111, 0x00002020, 0x00002022,
+ 0x00002220, 0x00002222, 0x00012121, 0x00022020, 0x00022022, 0x00022220, 0x00022222, 0x01002121,
+ 0x01012021, 0x01012221, 0x01022021, 0x01022121, 0x02002020, 0x02002022, 0x02002121, 0x02002220,
+ 0x02002222, 0x02012121, 0x02022020, 0x02022022, 0x02022220, 0x02022222, 0x00110000, 0x00110001,
+ 0x00110100, 0x00110201, 0x00120100, 0x00120101, 0x01100001, 0x01100100, 0x01110000, 0x01110101,
+ 0x01110200, 0x01120001, 0x01120100, 0x01120101, 0x01120201, 0x02110001, 0x02110100, 0x02110102,
+ 0x02120001, 0x02120101, 0x00100011, 0x00100110, 0x00100112, 0x00100211, 0x00110010, 0x00110012,
+ 0x00110111, 0x00110210, 0x00120011, 0x00120110, 0x00120211, 0x01100111, 0x01100212, 0x01110010,
+ 0x01110011, 0x01110012, 0x01110110, 0x01110111, 0x01110112, 0x01110211, 0x01120010, 0x01120111,
+ 0x02100110, 0x02110012, 0x02110111, 0x02120011, 0x02120110, 0x00110021, 0x00110120, 0x00110122,
+ 0x00120121, 0x01100020, 0x01100122, 0x01100221, 0x01110022, 0x01110121, 0x01110220, 0x01110222,
+ 0x01120120, 0x01120122, 0x02100121, 0x02110021, 0x02110120, 0x02110122, 0x02120121, 0x00101001,
+ 0x00101102, 0x00101201, 0x00111100, 0x00111101, 0x00111200, 0x00111201, 0x00121001, 0x00121102,
+ 0x01101001, 0x01101101, 0x01101102, 0x01101200, 0x01101202, 0x01111001, 0x01111100, 0x01111101,
+ 0x01111102, 0x01111201, 0x01121002, 0x01121101, 0x01121200, 0x02101100, 0x02101201, 0x02111000,
+ 0x02111100, 0x02111101, 0x02111200, 0x02111201, 0x02111202, 0x02121001, 0x02121100, 0x02121101,
+ 0x02121201, 0x00101012, 0x00101111, 0x00101212, 0x00111011, 0x00111110, 0x00111111, 0x00111112,
+ 0x00111211, 0x00121010, 0x00121012, 0x00121111, 0x00121210, 0x00121212, 0x01101011, 0x01101110,
+ 0x01101111, 0x01101112, 0x01111011, 0x01111012, 0x01111110, 0x01111111, 0x01111112, 0x01111211,
+ 0x01111212, 0x01121011, 0x01121110, 0x01121111, 0x01121112, 0x01121211, 0x02101010, 0x02101012,
+ 0x02101110, 0x02101111, 0x02101210, 0x02101212, 0x02111010, 0x02111011, 0x02111110, 0x02111111,
+ 0x02111112, 0x02111211, 0x02111212, 0x02121010, 0x02121012, 0x02121111, 0x00101021, 0x00101120,
+ 0x00101121, 0x00101122, 0x00111121, 0x00111122, 0x00111220, 0x00111222, 0x00121021, 0x00121122,
+ 0x01101020, 0x01101022, 0x01101120, 0x01101121, 0x01101220, 0x01101222, 0x01111021, 0x01111121,
+ 0x01111122, 0x01111220, 0x01111221, 0x01121021, 0x01121120, 0x01121121, 0x01121220, 0x01121221,
+ 0x01121222, 0x02101122, 0x02101222, 0x02111022, 0x02111121, 0x02121120, 0x02121221, 0x00112001,
+ 0x00112102, 0x00122101, 0x01102001, 0x01102100, 0x01102102, 0x01102201, 0x01112000, 0x01112101,
+ 0x01112200, 0x01112202, 0x01122000, 0x01122001, 0x01122100, 0x01122102, 0x01122201, 0x02102101,
+ 0x02112001, 0x02112100, 0x02122101, 0x00112010, 0x00112012, 0x00112111, 0x00112212, 0x00122011,
+ 0x00122111, 0x01102012, 0x01102110, 0x01102111, 0x01102210, 0x01112011, 0x01112110, 0x01112111,
+ 0x01112112, 0x01112211, 0x01112212, 0x01122010, 0x01122111, 0x01122212, 0x02102211, 0x02112011,
+ 0x02112012, 0x02112111, 0x02112210, 0x02122011, 0x02122112, 0x02122211, 0x00102221, 0x00112122,
+ 0x00122120, 0x00122122, 0x01102120, 0x01102122, 0x01102221, 0x01112020, 0x01112022, 0x01112121,
+ 0x01112220, 0x01122021, 0x01122122, 0x01122221, 0x02102121, 0x02112021, 0x02112122, 0x02112222,
+ 0x00200000, 0x00200002, 0x00200200, 0x00200202, 0x00210101, 0x00220000, 0x00220002, 0x00220101,
+ 0x00220200, 0x00220202, 0x01200101, 0x01210001, 0x01210201, 0x01220001, 0x01220101, 0x02200000,
+ 0x02200002, 0x02200200, 0x02200202, 0x02210101, 0x02220000, 0x02220002, 0x02220101, 0x02220200,
+ 0x02220202, 0x00200111, 0x00210011, 0x00210110, 0x00210211, 0x00220111, 0x01200012, 0x01200110,
+ 0x01200211, 0x01210111, 0x01210210, 0x01210212, 0x01220011, 0x01220110, 0x01220111, 0x01220112,
+ 0x02200111, 0x02210010, 0x02210112, 0x02210211, 0x02220111, 0x00200021, 0x00200220, 0x00200222,
+ 0x00210021, 0x00210121, 0x00220020, 0x00220022, 0x00220220, 0x00220222, 0x01200121, 0x01210021,
+ 0x01210122, 0x01210221, 0x01220121, 0x02200021, 0x02200220, 0x02200222, 0x02210021, 0x02210121,
+ 0x02220020, 0x02220022, 0x02220220, 0x02220222, 0x00201101, 0x00211100, 0x00211102, 0x00211201,
+ 0x00221101, 0x01201100, 0x01201101, 0x01201102, 0x01201201, 0x01211002, 0x01211101, 0x01211200,
+ 0x01211202, 0x01221102, 0x02201101, 0x02211001, 0x02211100, 0x02211201, 0x02221001, 0x02221101,
+ 0x00201211, 0x00211111, 0x00221011, 0x00221211, 0x01201010, 0x01201111, 0x01201210, 0x01211011,
+ 0x01211110, 0x01211111, 0x01211211, 0x01221012, 0x01221111, 0x01221210, 0x02201211, 0x02211010,
+ 0x02211110, 0x02211111, 0x02211210, 0x02211212, 0x02221011, 0x02221110, 0x02221112, 0x02221211,
+ 0x00201121, 0x00211020, 0x00211022, 0x00211221, 0x00221121, 0x01201021, 0x01201221, 0x01211121,
+ 0x01221020, 0x01221021, 0x01221221, 0x02201120, 0x02201122, 0x02211020, 0x02211222, 0x00202000,
+ 0x00202002, 0x00202200, 0x00202202, 0x00212101, 0x00222000, 0x00222002, 0x00222200, 0x00222202,
+ 0x01202101, 0x01212001, 0x01212100, 0x01222101, 0x02202000, 0x02202002, 0x02202200, 0x02202202,
+ 0x02222000, 0x02222002, 0x02222200, 0x02222202, 0x00202211, 0x00212011, 0x00212110, 0x00212211,
+ 0x00222111, 0x01202112, 0x01202211, 0x01212012, 0x01212111, 0x01222011, 0x01222110, 0x01222112,
+ 0x01222211, 0x02202111, 0x02212010, 0x02212112, 0x02212211, 0x02222110, 0x02222111, 0x00202020,
+ 0x00202022, 0x00202220, 0x00202222, 0x00222020, 0x00222022, 0x00222220, 0x00222222, 0x01202121,
+ 0x01212021, 0x01212122, 0x01212221, 0x01222121, 0x02202020, 0x02202022, 0x02202220, 0x02202222,
+ 0x02212121, 0x02222020, 0x02222022, 0x02222220, 0x02222222, 0x10000101, 0x10010001, 0x10010102,
+ 0x10020101, 0x11000201, 0x11010002, 0x11010101, 0x11010200, 0x11010202, 0x11020001, 0x11020100,
+ 0x11020102, 0x12010100, 0x12010201, 0x12020001, 0x12020102, 0x10000010, 0x10000011, 0x10000110,
+ 0x10000112, 0x10000211, 0x10010012, 0x10010111, 0x10010112, 0x10010210, 0x10010212, 0x10020011,
+ 0x10020112, 0x10020211, 0x11000111, 0x11000210, 0x11000212, 0x11010011, 0x11010110, 0x11010111,
+ 0x11010112, 0x11010211, 0x11010212, 0x11020111, 0x11020210, 0x11020212, 0x12000011, 0x12000110,
+ 0x12000112, 0x12010010, 0x12010012, 0x12010111, 0x12020010, 0x12020011, 0x12020012, 0x10000121,
+ 0x10010021, 0x10010120, 0x10010122, 0x10020121, 0x11000021, 0x11010022, 0x11010121, 0x11010222,
+ 0x11020120, 0x11020221, 0x12000221, 0x12010120, 0x12020121, 0x10001001, 0x10011101, 0x10011201,
+ 0x10021201, 0x11001101, 0x11001200, 0x11001202, 0x11011001, 0x11011100, 0x11011101, 0x11011102,
+ 0x11021001, 0x11021002, 0x11021101, 0x11021200, 0x11021202, 0x12001001, 0x12001102, 0x12001201,
+ 0x12011000, 0x12011002, 0x12011101, 0x12021000, 0x12021001, 0x12021201, 0x10001011, 0x10001012,
+ 0x10001111, 0x10001212, 0x10011011, 0x10011110, 0x10011111, 0x10011112, 0x10011211, 0x10021010,
+ 0x10021111, 0x10021212, 0x11001011, 0x11001110, 0x11001111, 0x11001112, 0x11001211, 0x11011010,
+ 0x11011011, 0x11011110, 0x11011111, 0x11011112, 0x11011210, 0x11011211, 0x11021011, 0x11021110,
+ 0x11021111, 0x11021112, 0x11021211, 0x12001012, 0x12001110, 0x12001111, 0x12001210, 0x12011011,
+ 0x12011110, 0x12011111, 0x12011112, 0x12011211, 0x12011212, 0x12021111, 0x12021210, 0x12021212,
+ 0x10001021, 0x10001121, 0x10001221, 0x10011120, 0x10011121, 0x10011220, 0x10011222, 0x10021021,
+ 0x10021120, 0x10021221, 0x11001020, 0x11001022, 0x11001121, 0x11001220, 0x11011020, 0x11011021,
+ 0x11011022, 0x11011121, 0x11011122, 0x11011221, 0x11021022, 0x11021121, 0x11021220, 0x12001021,
+ 0x12001121, 0x12001222, 0x12011120, 0x12011121, 0x12021021, 0x12021120, 0x12021122, 0x10002101,
+ 0x10012001, 0x10012101, 0x10012202, 0x10022101, 0x11002002, 0x11002201, 0x11012000, 0x11012101,
+ 0x11012200, 0x11022001, 0x11022100, 0x11022102, 0x11022201, 0x12002101, 0x12012001, 0x12012100,
+ 0x12012102, 0x12012201, 0x12022101, 0x10002011, 0x10002111, 0x10002112, 0x10002212, 0x10012010,
+ 0x10012110, 0x10012111, 0x10012210, 0x10022011, 0x10022110, 0x10022112, 0x11002010, 0x11002111,
+ 0x11002212, 0x11012011, 0x11012012, 0x11012110, 0x11012111, 0x11012112, 0x11012211, 0x11022010,
+ 0x11022012, 0x11022111, 0x11022112, 0x11022212, 0x12002112, 0x12002211, 0x12012012, 0x12012111,
+ 0x12012112, 0x12012210, 0x12022011, 0x12022110, 0x12022112, 0x12022211, 0x10012122, 0x11002120,
+ 0x11002122, 0x11002221, 0x11012121, 0x11012220, 0x11012222, 0x11022120, 0x11022221, 0x12012120,
+ 0x12022121, 0x10100001, 0x10100100, 0x10100101, 0x10100102, 0x10100201, 0x10110002, 0x10110101,
+ 0x10110202, 0x10120001, 0x10120100, 0x10120201, 0x11100000, 0x11100101, 0x11100200, 0x11110001,
+ 0x11110100, 0x11110101, 0x11110102, 0x11110201, 0x11120101, 0x11120200, 0x12100102, 0x12100201,
+ 0x12110101, 0x12110200, 0x12120000, 0x12120001, 0x12120102, 0x12120201, 0x10100111, 0x10100210,
+ 0x10100211, 0x10100212, 0x10110011, 0x10110110, 0x10110111, 0x10110112, 0x10110210, 0x10110211,
+ 0x10120010, 0x10120111, 0x10120112, 0x10120210, 0x10120212, 0x11100011, 0x11100110, 0x11100111,
+ 0x11100112, 0x11100211, 0x11110010, 0x11110011, 0x11110012, 0x11110110, 0x11110111, 0x11110112,
+ 0x11110210, 0x11110211, 0x11110212, 0x11120011, 0x11120110, 0x11120111, 0x11120112, 0x11120211,
+ 0x12100012, 0x12100111, 0x12110011, 0x12110110, 0x12110111, 0x12110112, 0x12110211, 0x12120010,
+ 0x12120111, 0x12120212, 0x10100021, 0x10100122, 0x10110022, 0x10110121, 0x10110222, 0x10120021,
+ 0x10120120, 0x11100022, 0x11100121, 0x11100222, 0x11110021, 0x11110120, 0x11110121, 0x11110122,
+ 0x11110221, 0x11120022, 0x11120121, 0x12100121, 0x12110020, 0x12110022, 0x12110121, 0x12110221,
+ 0x12110222, 0x12120120, 0x10101100, 0x10101101, 0x10111001, 0x10111100, 0x10111101, 0x10111102,
+ 0x10111200, 0x10111201, 0x10121001, 0x10121101, 0x10121200, 0x10121202, 0x11101001, 0x11101100,
+ 0x11101101, 0x11101102, 0x11101201, 0x11101202, 0x11111000, 0x11111001, 0x11111100, 0x11111101,
+ 0x11111102, 0x11111200, 0x11111201, 0x11111202, 0x11121001, 0x11121002, 0x11121100, 0x11121101,
+ 0x11121102, 0x11121201, 0x12101000, 0x12101200, 0x12101202, 0x12111001, 0x12111100, 0x12111101,
+ 0x12111102, 0x12111201, 0x12121001, 0x12121100, 0x12121101, 0x12121202, 0x10101011, 0x10101012,
+ 0x10101110, 0x10101111, 0x10101112, 0x10101211, 0x10111010, 0x10111011, 0x10111012, 0x10111110,
+ 0x10111111, 0x10111112, 0x10111211, 0x10111212, 0x10121011, 0x10121110, 0x10121111, 0x10121112,
+ 0x10121211, 0x11101010, 0x11101011, 0x11101012, 0x11101110, 0x11101111, 0x11101112, 0x11101210,
+ 0x11101211, 0x11111010, 0x11111011, 0x11111012, 0x11111110, 0x11111111, 0x11111112, 0x11111210,
+ 0x11111211, 0x11111212, 0x11121010, 0x11121011, 0x11121110, 0x11121111, 0x11121112, 0x11121210,
+ 0x11121211, 0x11121212, 0x12101011, 0x12101110, 0x12101111, 0x12101211, 0x12101212, 0x12111010,
+ 0x12111011, 0x12111110, 0x12111111, 0x12111112, 0x12111210, 0x12111211, 0x12121011, 0x12121110,
+ 0x12121111, 0x12121112, 0x12121211, 0x10101020, 0x10101021, 0x10101022, 0x10101120, 0x10101122,
+ 0x10101220, 0x10101221, 0x10111021, 0x10111120, 0x10111121, 0x10111220, 0x10111221, 0x10121020,
+ 0x10121021, 0x10121022, 0x10121120, 0x10121121, 0x10121122, 0x10121220, 0x10121221, 0x11101021,
+ 0x11101121, 0x11101122, 0x11101220, 0x11101221, 0x11101222, 0x11111020, 0x11111021, 0x11111022,
+ 0x11111120, 0x11111121, 0x11111122, 0x11111220, 0x11111221, 0x11111222, 0x11121021, 0x11121120,
+ 0x11121121, 0x11121221, 0x12101022, 0x12101121, 0x12101122, 0x12101220, 0x12101221, 0x12101222,
+ 0x12111021, 0x12111121, 0x12111222, 0x12121022, 0x12121121, 0x12121122, 0x12121220, 0x12121221,
+ 0x10102100, 0x10102101, 0x10102102, 0x10102201, 0x10112000, 0x10112101, 0x10112200, 0x10122001,
+ 0x10122202, 0x11102101, 0x11102200, 0x11102202, 0x11112001, 0x11112100, 0x11112101, 0x11112102,
+ 0x11112200, 0x11112201, 0x11122000, 0x11122002, 0x11122100, 0x11122101, 0x12102002, 0x12102201,
+ 0x12112000, 0x12112002, 0x12112101, 0x12112200, 0x12122001, 0x12122201, 0x10102011, 0x10102012,
+ 0x10102111, 0x10102212, 0x10112011, 0x10112110, 0x10112111, 0x10112112, 0x10112211, 0x10122111,
+ 0x11102011, 0x11102110, 0x11102111, 0x11102112, 0x11102211, 0x11112010, 0x11112011, 0x11112012,
+ 0x11112110, 0x11112111, 0x11112112, 0x11112210, 0x11112211, 0x11112212, 0x11122011, 0x11122110,
+ 0x11122111, 0x11122112, 0x11122211, 0x12102011, 0x12102111, 0x12102211, 0x12112011, 0x12112110,
+ 0x12112111, 0x12112112, 0x12112210, 0x12112211, 0x12122111, 0x10102120, 0x10102220, 0x10112121,
+ 0x10112222, 0x10122020, 0x10122121, 0x10122122, 0x10122221, 0x11102121, 0x11102220, 0x11102221,
+ 0x11112021, 0x11112121, 0x11112122, 0x11112220, 0x11112221, 0x11122022, 0x11122121, 0x11122220,
+ 0x11122222, 0x12102021, 0x12102222, 0x12112022, 0x12112121, 0x12112122, 0x12112220, 0x12112222,
+ 0x12122021, 0x10200101, 0x10210100, 0x10210102, 0x10210201, 0x10220101, 0x11200100, 0x11210000,
+ 0x11210101, 0x11210102, 0x11210200, 0x11210202, 0x11220001, 0x11220100, 0x11220102, 0x11220201,
+ 0x12200001, 0x12210102, 0x12220101, 0x10200011, 0x10200110, 0x10200112, 0x10200211, 0x10210012,
+ 0x10210111, 0x10220011, 0x10220012, 0x10220112, 0x10220211, 0x11200111, 0x11200211, 0x11210011,
+ 0x11210111, 0x11210112, 0x11210211, 0x11220111, 0x11220112, 0x11220212, 0x12200110, 0x12200212,
+ 0x12210012, 0x12210111, 0x12220011, 0x12220112, 0x12220211, 0x10210021, 0x10210122, 0x10210221,
+ 0x11200020, 0x11200021, 0x11200122, 0x11210121, 0x11210122, 0x11210220, 0x11220020, 0x12200121,
+ 0x12210021, 0x12210122, 0x12220121, 0x10211001, 0x10211002, 0x10211101, 0x10211102, 0x10211202,
+ 0x10221001, 0x10221102, 0x10221201, 0x11201000, 0x11201002, 0x11201101, 0x11201200, 0x11201202,
+ 0x11211001, 0x11211100, 0x11211101, 0x11211102, 0x11211201, 0x11211202, 0x11221000, 0x11221002,
+ 0x11221101, 0x12201100, 0x12201101, 0x12201201, 0x12211000, 0x12211002, 0x12211100, 0x12211101,
+ 0x12211102, 0x12211200, 0x12211202, 0x12221001, 0x12221100, 0x12221201, 0x10201111, 0x10201210,
+ 0x10201212, 0x10211011, 0x10211111, 0x10211112, 0x10211211, 0x11201110, 0x11201111, 0x11201112,
+ 0x11201211, 0x11211010, 0x11211011, 0x11211110, 0x11211111, 0x11211112, 0x11211211, 0x11221011,
+ 0x11221110, 0x11221111, 0x11221112, 0x11221211, 0x12201112, 0x12201211, 0x12201212, 0x12211011,
+ 0x12211111, 0x12211112, 0x12211211, 0x12211212, 0x12221012, 0x12221111, 0x12221112, 0x12221210,
+ 0x10201022, 0x10201221, 0x10211121, 0x10221020, 0x10221122, 0x10221220, 0x10221221, 0x11201020,
+ 0x11201121, 0x11201220, 0x11201222, 0x11211021, 0x11211120, 0x11211121, 0x11211122, 0x11211220,
+ 0x11211222, 0x11221020, 0x11221121, 0x11221220, 0x12201020, 0x12201022, 0x12201121, 0x12201222,
+ 0x12211120, 0x12211122, 0x12211220, 0x12211221, 0x12221020, 0x12221120, 0x12221122, 0x12221222,
+ 0x10212102, 0x10212201, 0x10222101, 0x11202001, 0x11212002, 0x11212101, 0x11212202, 0x11222001,
+ 0x11222201, 0x12202101, 0x12212001, 0x12212200, 0x12222102, 0x10202011, 0x10202110, 0x10212010,
+ 0x10212111, 0x10222011, 0x10222110, 0x10222112, 0x10222211, 0x11202010, 0x11202011, 0x11202111,
+ 0x11202112, 0x11202210, 0x11212011, 0x11212110, 0x11212111, 0x11212112, 0x11212211, 0x11222010,
+ 0x11222111, 0x11222212, 0x12202012, 0x12202110, 0x12202212, 0x12212111, 0x12222011, 0x12222110,
+ 0x12222111, 0x12222211, 0x10212021, 0x10212122, 0x10212220, 0x11202021, 0x11202120, 0x11202221,
+ 0x11212020, 0x11212121, 0x11212220, 0x11212222, 0x11222120, 0x11222121, 0x11222221, 0x12202122,
+ 0x12212120, 0x12212220, 0x12212222, 0x12222122, 0x20000000, 0x20000002, 0x20000200, 0x20000202,
+ 0x20020000, 0x20020002, 0x20020200, 0x20020202, 0x21000101, 0x21010000, 0x21010001, 0x21010100,
+ 0x21010102, 0x21010201, 0x21020101, 0x22000000, 0x22000002, 0x22000200, 0x22000202, 0x22010101,
+ 0x22020000, 0x22020002, 0x22020200, 0x22020202, 0x20000111, 0x20010011, 0x20010110, 0x20010112,
+ 0x20010211, 0x20020111, 0x21000011, 0x21000110, 0x21000211, 0x21010010, 0x21010012, 0x21010111,
+ 0x21010112, 0x21010210, 0x21010211, 0x21020110, 0x21020112, 0x21020211, 0x22000111, 0x22000211,
+ 0x22010110, 0x22010112, 0x22010211, 0x22020111, 0x20000020, 0x20000022, 0x20000220, 0x20000222,
+ 0x20010121, 0x20020020, 0x20020022, 0x20020220, 0x20020222, 0x21010021, 0x21010120, 0x21010221,
+ 0x21020121, 0x22000020, 0x22000022, 0x22000220, 0x22000222, 0x22010121, 0x22020020, 0x22020022,
+ 0x22020220, 0x22020222, 0x20011100, 0x20011201, 0x21001001, 0x21001100, 0x21011001, 0x21011101,
+ 0x21011202, 0x21021001, 0x21021100, 0x21021201, 0x22011100, 0x22011201, 0x20001011, 0x20001211,
+ 0x20011012, 0x20011111, 0x20011212, 0x20021112, 0x20021211, 0x21001010, 0x21001011, 0x21001111,
+ 0x21001210, 0x21011011, 0x21011110, 0x21011111, 0x21011112, 0x21011211, 0x21011212, 0x21021111,
+ 0x21021112, 0x21021210, 0x21021212, 0x22001011, 0x22001110, 0x22001112, 0x22001211, 0x22011010,
+ 0x22011012, 0x22011111, 0x22011210, 0x22021112, 0x20011021, 0x20011122, 0x20011221, 0x20021121,
+ 0x21001021, 0x21001120, 0x21001221, 0x21001222, 0x21011020, 0x21011121, 0x21011221, 0x21011222,
+ 0x21021021, 0x21021122, 0x21021222, 0x22001121, 0x22011021, 0x22011222, 0x22021120, 0x20002000,
+ 0x20002002, 0x20002200, 0x20002202, 0x20012101, 0x20022000, 0x20022002, 0x20022200, 0x20022202,
+ 0x21002001, 0x21002101, 0x21012001, 0x21012100, 0x21012201, 0x21022101, 0x21022201, 0x22002000,
+ 0x22002002, 0x22002200, 0x22002202, 0x22012101, 0x22022000, 0x22022002, 0x22022200, 0x22022202,
+ 0x20002111, 0x20002112, 0x20012011, 0x20012110, 0x20012112, 0x20022111, 0x21002011, 0x21002110,
+ 0x21002112, 0x21002211, 0x21012010, 0x21012012, 0x21012111, 0x21012212, 0x21022011, 0x21022110,
+ 0x22002111, 0x22012112, 0x22012211, 0x22022111, 0x20002020, 0x20002022, 0x20002220, 0x20002222,
+ 0x20012121, 0x20022020, 0x20022022, 0x20022220, 0x20022222, 0x21002121, 0x21012021, 0x21012120,
+ 0x21012122, 0x22002020, 0x22002022, 0x22002220, 0x22002222, 0x22012121, 0x22022020, 0x22022022,
+ 0x22022220, 0x22022222, 0x20100101, 0x20110001, 0x20110102, 0x20110200, 0x20110201, 0x20120101,
+ 0x21100001, 0x21100102, 0x21100201, 0x21110101, 0x21110200, 0x21110202, 0x21120201, 0x21120202,
+ 0x22100101, 0x22110001, 0x22110100, 0x22110102, 0x22110201, 0x22120101, 0x20100011, 0x20100110,
+ 0x20100112, 0x20100211, 0x20110010, 0x20110111, 0x20110210, 0x20110212, 0x20120011, 0x20120110,
+ 0x20120112, 0x20120211, 0x21100010, 0x21100111, 0x21110010, 0x21110011, 0x21110110, 0x21110111,
+ 0x21110112, 0x21110211, 0x21120012, 0x21120111, 0x22100110, 0x22100112, 0x22110012, 0x22110111,
+ 0x22110210, 0x22120011, 0x22120110, 0x22120112, 0x22120211, 0x20100121, 0x20110021, 0x20110120,
+ 0x20110221, 0x20120121, 0x21100120, 0x21100122, 0x21100221, 0x21110020, 0x21110022, 0x21110121,
+ 0x21110220, 0x21120122, 0x21120221, 0x22100121, 0x22110120, 0x22110122, 0x22120221, 0x20101001,
+ 0x20101100, 0x20101102, 0x20111000, 0x20111101, 0x20111200, 0x20121102, 0x21101000, 0x21101202,
+ 0x21111001, 0x21111100, 0x21111101, 0x21111102, 0x21111200, 0x21111201, 0x21121000, 0x21121001,
+ 0x21121002, 0x21121101, 0x22101100, 0x22101102, 0x22111002, 0x22111100, 0x22111101, 0x22111200,
+ 0x22121001, 0x22121201, 0x20101010, 0x20101111, 0x20101210, 0x20101212, 0x20111010, 0x20111011,
+ 0x20111110, 0x20111111, 0x20111112, 0x20111211, 0x20121011, 0x20121111, 0x20121211, 0x20121212,
+ 0x21101011, 0x21101110, 0x21101111, 0x21101112, 0x21101211, 0x21111010, 0x21111011, 0x21111012,
+ 0x21111110, 0x21111111, 0x21111112, 0x21111210, 0x21111211, 0x21111212, 0x21121011, 0x21121110,
+ 0x21121111, 0x21121112, 0x21121211, 0x22101011, 0x22101111, 0x22101210, 0x22111011, 0x22111012,
+ 0x22111110, 0x22111111, 0x22111112, 0x22111211, 0x22111212, 0x22121010, 0x22121012, 0x22121111,
+ 0x22121210, 0x22121212, 0x20101021, 0x20101120, 0x20111020, 0x20111121, 0x20111221, 0x20121020,
+ 0x20121122, 0x20121221, 0x21101121, 0x21101220, 0x21101221, 0x21111021, 0x21111022, 0x21111121,
+ 0x21111122, 0x21111221, 0x21121121, 0x21121220, 0x22101022, 0x22101120, 0x22101221, 0x22101222,
+ 0x22111022, 0x22111120, 0x22111121, 0x22121120, 0x22121122, 0x22121221, 0x20102101, 0x20112102,
+ 0x20112201, 0x20122101, 0x21102001, 0x21102102, 0x21112000, 0x21112002, 0x21112101, 0x21112102,
+ 0x21112202, 0x21122100, 0x21122101, 0x22102101, 0x22112001, 0x22112102, 0x22112201, 0x22122101,
+ 0x20102110, 0x20102112, 0x20102211, 0x20112010, 0x20112012, 0x20112111, 0x20112210, 0x20112212,
+ 0x20122010, 0x20122011, 0x20122110, 0x20122112, 0x21102010, 0x21102012, 0x21102111, 0x21102210,
+ 0x21102212, 0x21112011, 0x21112110, 0x21112111, 0x21112112, 0x21112211, 0x21122012, 0x21122111,
+ 0x21122112, 0x21122212, 0x22102011, 0x22102110, 0x22112010, 0x22112012, 0x22112111, 0x22112212,
+ 0x22122011, 0x22122112, 0x20102121, 0x20112121, 0x20122121, 0x21102120, 0x21102122, 0x21102221,
+ 0x21112020, 0x21112121, 0x21112220, 0x21122021, 0x22102121, 0x22112021, 0x22112120, 0x22112121,
+ 0x22112122, 0x20200000, 0x20200002, 0x20200200, 0x20200202, 0x20210101, 0x20220000, 0x20220002,
+ 0x20220200, 0x20220202, 0x21200101, 0x21210001, 0x21210100, 0x21210102, 0x21210201, 0x22200000,
+ 0x22200002, 0x22200200, 0x22200202, 0x22210101, 0x22220000, 0x22220002, 0x22220200, 0x22220202,
+ 0x20200111, 0x20200211, 0x20210011, 0x20210110, 0x20210112, 0x20210211, 0x20210212, 0x21200112,
+ 0x21200211, 0x21210011, 0x21210111, 0x21210210, 0x21210212, 0x21220011, 0x21220110, 0x22200111,
+ 0x22210010, 0x22210012, 0x22210112, 0x22210211, 0x20200022, 0x20200220, 0x20200222, 0x20210020,
+ 0x20210221, 0x20220022, 0x20220220, 0x20220222, 0x21200121, 0x21210021, 0x21210122, 0x21210221,
+ 0x21220121, 0x22200020, 0x22200022, 0x22200220, 0x22200222, 0x22210121, 0x22220020, 0x22220022,
+ 0x22220220, 0x22220222, 0x20211201, 0x20221101, 0x21201001, 0x21201100, 0x21211000, 0x21211100,
+ 0x21211101, 0x21211200, 0x21211202, 0x21221001, 0x21221101, 0x21221102, 0x21221200, 0x21221201,
+ 0x22201101, 0x20201112, 0x20201211, 0x20211010, 0x20211012, 0x20211111, 0x20211210, 0x20221112,
+ 0x20221211, 0x21201012, 0x21201111, 0x21211011, 0x21211110, 0x21211111, 0x21211112, 0x21211211,
+ 0x21221111, 0x21221212, 0x22201011, 0x22201110, 0x22201111, 0x22201112, 0x22201211, 0x22211012,
+ 0x22211111, 0x22211210, 0x20201121, 0x20211021, 0x20211122, 0x20211222, 0x20221021, 0x20221121,
+ 0x21201120, 0x21201122, 0x21201222, 0x21211022, 0x21211121, 0x21211122, 0x21211220, 0x21221020,
+ 0x21221022, 0x22201122, 0x22211020, 0x22211121, 0x22211122, 0x22211221, 0x22221021, 0x22221120,
+ 0x22221122, 0x20202000, 0x20202002, 0x20202200, 0x20202202, 0x20222000, 0x20222002, 0x20222200,
+ 0x20222202, 0x21212001, 0x21212100, 0x21212102, 0x21212201, 0x22202000, 0x22202002, 0x22202200,
+ 0x22202202, 0x22212101, 0x22222000, 0x22222002, 0x22222200, 0x22222202, 0x20202111, 0x20212110,
+ 0x20212211, 0x20222011, 0x20222111, 0x21202011, 0x21212010, 0x21212111, 0x21212212, 0x21222011,
+ 0x21222112, 0x21222211, 0x22212010, 0x22212112, 0x20202020, 0x20202022, 0x20202220, 0x20202222,
+ 0x20222020, 0x20222022, 0x20222220, 0x20222222, 0x21212021, 0x21212120, 0x21212122, 0x22202020,
+ 0x22202022, 0x22202220, 0x22202222, 0x22212121, 0x22222020, 0x22222022, 0x22222220, 0x22222222,
+GGML_TABLE_END()
+#endif
+
+#endif // GGML_COMMON_IMPL
+#endif // GGML_COMMON_IMPL
diff --git a/llama.cpp/ggml-cuda.cu b/llama.cpp/ggml-cuda.cu
index eaefb83d26..3a86b3b5f0 100644
--- a/llama.cpp/ggml-cuda.cu
+++ b/llama.cpp/ggml-cuda.cu
@@ -1,151 +1,103 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
-// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
+// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
#include
-#include
+#include
+#include
#include
+#include
+#include
#include
#include
#include
+#include
#include
#include
+#include