diff --git a/README.md b/README.md
index 9eadb71b..6eef0eda 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ We recommend everyone to use the hosted version at [stract.com](https://stract.c
 
 # ‍💼 License
 
-Stract is offered under the terms defined under the [LICENSE.md](LICENSE.md) file.
+Stract is offered under the terms defined under the [LICENSE.md](LICENSE.md) file unless otherwise specified in the relevant subdirectory.
 
 # 📬 Contact
 
diff --git a/assets/licenses.html b/assets/licenses.html
index ab81f847..91c35754 100644
--- a/assets/licenses.html
+++ b/assets/licenses.html
@@ -45,8 +45,8 @@ <h1>Third Party Licenses</h1>
         <h2>Overview of licenses:</h2>
         <ul class="licenses-overview">
             <li><a href="#Apache-2.0">Apache License 2.0</a> (411)</li>
-            <li><a href="#MIT">MIT License</a> (191)</li>
-            <li><a href="#AGPL-3.0">GNU Affero General Public License v3.0</a> (10)</li>
+            <li><a href="#MIT">MIT License</a> (192)</li>
+            <li><a href="#AGPL-3.0">GNU Affero General Public License v3.0</a> (9)</li>
             <li><a href="#BSD-3-Clause">BSD 3-Clause &quot;New&quot; or &quot;Revised&quot; License</a> (9)</li>
             <li><a href="#MPL-2.0">Mozilla Public License 2.0</a> (8)</li>
             <li><a href="#Unicode-3.0">Unicode License v3</a> (4)</li>
@@ -76,7 +76,6 @@ <h4>Used by:</h4>
                     <li><a href=" https://crates.io/crates/robotstxt ">robotstxt 0.1.0</a></li>
                     <li><a href=" https://crates.io/crates/simple_wal ">simple_wal 0.1.0</a></li>
                     <li><a href=" https://crates.io/crates/speedy_kv ">speedy_kv 0.1.0</a></li>
-                    <li><a href=" https://crates.io/crates/web-spell ">web-spell 0.1.0</a></li>
                 </ul>
                 <pre class="license-text">GNU AFFERO GENERAL PUBLIC LICENSE
 Version 3, 19 November 2007
@@ -13176,8 +13175,36 @@ <h4>Used by:</h4>
                 <h3 id="MIT">MIT License</h3>
                 <h4>Used by:</h4>
                 <ul class="license-used-by">
-                    <li><a href=" https://crates.io/crates/optics ">optics 0.1.0</a></li>
+                    <li><a href=" https://crates.io/crates/web-spell ">web-spell 0.1.0</a></li>
                     <li><a href=" https://crates.io/crates/zimba ">zimba 0.1.0</a></li>
+                </ul>
+                <pre class="license-text">MIT License
+
+Copyright (c) 2024 Stract ApS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the &quot;Software&quot;), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.</pre>
+            </li>
+            <li class="license">
+                <h3 id="MIT">MIT License</h3>
+                <h4>Used by:</h4>
+                <ul class="license-used-by">
+                    <li><a href=" https://crates.io/crates/optics ">optics 0.1.0</a></li>
                     <li><a href=" https://github.com/tokio-rs/async-stream ">async-stream-impl 0.3.6</a></li>
                     <li><a href=" https://github.com/tokio-rs/async-stream ">async-stream 0.3.6</a></li>
                     <li><a href=" https://github.com/durch/rust-s3 ">aws-creds 0.36.0</a></li>
diff --git a/crates/web-spell/Cargo.toml b/crates/web-spell/Cargo.toml
index c4b1f4ca..97913681 100644
--- a/crates/web-spell/Cargo.toml
+++ b/crates/web-spell/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 edition = "2021"
-license = "AGPL-3.0"
+license = "MIT"
 name = "web-spell"
 version = "0.1.0"
 
diff --git a/crates/web-spell/LICENSE b/crates/web-spell/LICENSE
new file mode 100644
index 00000000..409e7e2c
--- /dev/null
+++ b/crates/web-spell/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Stract ApS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/crates/web-spell/README.md b/crates/web-spell/README.md
index be6b40a2..1ac10707 100644
--- a/crates/web-spell/README.md
+++ b/crates/web-spell/README.md
@@ -1,13 +1,9 @@
 # Web Spell
 
-Automatic spelling correction from web data. It is based on the paper
+Automatic spelling correction from web data. It is roughly based on the paper
 [Using the Web for Language Independent Spellchecking and
 Autocorrection](http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/36180.pdf)
 from google.
 
-## Usage
-```rust
-let checker = SpellChecker::open("<path-to-model>", CorrectionConfig::default()).unwrap();
-let correction = checker.correct("hwllo", Lang::Eng);
-assert_eq!(correction.unwrap().terms, vec![CorrectionTerm::Corrected { orig: "hwllo".to_string(), correction: "hello".to_string() }]);
-```
+## License
+Web spell is licensed under the MIT license. See the [LICENSE](LICENSE) file for details.
\ No newline at end of file
diff --git a/crates/web-spell/src/config.rs b/crates/web-spell/src/config.rs
index 0a34217d..d9293a61 100644
--- a/crates/web-spell/src/config.rs
+++ b/crates/web-spell/src/config.rs
@@ -1,19 +1,3 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 fn misspelled_prob() -> f64 {
     0.1
 }
diff --git a/crates/web-spell/src/error_model.rs b/crates/web-spell/src/error_model.rs
index 5a7fae95..69c2ee5c 100644
--- a/crates/web-spell/src/error_model.rs
+++ b/crates/web-spell/src/error_model.rs
@@ -1,19 +1,3 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 use super::Result;
 use std::{
     collections::HashMap,
@@ -55,6 +39,7 @@ pub enum ErrorType {
 )]
 pub struct ErrorSequence(Vec<ErrorType>);
 
+/// Return all the possible ways to transform one string into another with a single edit.
 pub fn possible_errors(a: &str, b: &str) -> Option<ErrorSequence> {
     if a == b {
         return None;
@@ -165,6 +150,7 @@ impl From<StoredErrorModel> for ErrorModel {
     }
 }
 
+/// A model for the probability of an error sequence.
 #[derive(Debug)]
 pub struct ErrorModel {
     errors: HashMap<ErrorSequence, u64>,
@@ -185,6 +171,7 @@ impl ErrorModel {
         }
     }
 
+    /// Save the error model to disk.
     pub fn save<P: AsRef<Path>>(self, path: P) -> Result<()> {
         let file = OpenOptions::new()
             .write(true)
@@ -199,6 +186,7 @@ impl ErrorModel {
         Ok(())
     }
 
+    /// Open the error model from disk.
     pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
         let file = OpenOptions::new().read(true).open(path)?;
 
@@ -209,6 +197,7 @@ impl ErrorModel {
         Ok(stored.into())
     }
 
+    /// Add an error sequence to the error model.
     pub fn add(&mut self, a: &str, b: &str) {
         if let Some(errors) = possible_errors(a, b) {
             *self.errors.entry(errors).or_insert(0) += 1;
@@ -216,11 +205,13 @@ impl ErrorModel {
         }
     }
 
+    /// Get the probability of an error sequence.
     pub fn prob(&self, error: &ErrorSequence) -> f64 {
         let count = self.errors.get(error).unwrap_or(&0);
         *count as f64 / self.total as f64
     }
 
+    /// Get the log probability of an error sequence.
     pub fn log_prob(&self, error: &ErrorSequence) -> f64 {
         match self.errors.get(error) {
             Some(count) => (*count as f64).log2() - ((self.total + 1) as f64).log2(),
diff --git a/crates/web-spell/src/lib.rs b/crates/web-spell/src/lib.rs
index 54d5bfcb..f61f2b69 100644
--- a/crates/web-spell/src/lib.rs
+++ b/crates/web-spell/src/lib.rs
@@ -1,22 +1,24 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-//! This module contains the spell checker. It is based on the paper
+//! This module contains the spell checker. It is roughly based on the paper
 //! http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/36180.pdf
 //! from google.
+//!
+//! # Usage
+//!
+//! ```rust
+//! # use std::path::Path;
+//! # use web_spell::{CorrectionConfig, SpellChecker, Lang};
+//!
+//! # let path = Path::new("../data/web_spell/checker");
+//!
+//! # if !path.exists() {
+//! #     return;
+//! # }
+//!
+//! let checker = SpellChecker::open("<path-to-model>", CorrectionConfig::default());
+//! # let checker = SpellChecker::open(path, CorrectionConfig::default());
+//! let correction = checker.unwrap().correct("hwllo", &Lang::Eng);
+//! ```
+
 mod config;
 mod error_model;
 pub mod spell_checker;
@@ -26,6 +28,7 @@ mod trainer;
 
 pub use config::CorrectionConfig;
 pub use error_model::ErrorModel;
+pub use spell_checker::Lang;
 pub use spell_checker::SpellChecker;
 pub use stupid_backoff::StupidBackoff;
 pub use term_freqs::TermDict;
@@ -108,6 +111,7 @@ impl From<Correction> for String {
 }
 
 impl Correction {
+    /// Create an empty correction.
     pub fn empty(original: String) -> Self {
         Self {
             original,
@@ -115,10 +119,12 @@ impl Correction {
         }
     }
 
+    /// Push a term to the correction.
     pub fn push(&mut self, term: CorrectionTerm) {
         self.terms.push(term);
     }
 
+    /// Check if all terms are not corrected.
     pub fn is_all_orig(&self) -> bool {
         self.terms
             .iter()
@@ -126,6 +132,13 @@ impl Correction {
     }
 }
 
+/// Split text into sentence ranges by detecting common sentence boundaries like periods, exclamation marks,
+/// question marks and newlines. Returns a Vec of byte ranges for each detected sentence.
+///
+/// The splitting is optimized for performance and simplicity rather than perfect accuracy. It handles
+/// common cases like abbreviations, URLs, ellipses and whitespace trimming.
+///
+/// Note that this is a heuristic approach and may not handle all edge cases correctly.
 pub fn sentence_ranges(text: &str) -> Vec<Range<usize>> {
     let skip = ["mr.", "ms.", "dr."];
 
@@ -178,6 +191,7 @@ pub fn sentence_ranges(text: &str) -> Vec<Range<usize>> {
     res
 }
 
+/// Tokenize text into words.
 pub fn tokenize(text: &str) -> Vec<String> {
     text.to_lowercase()
         .split_whitespace()
@@ -188,11 +202,20 @@ pub fn tokenize(text: &str) -> Vec<String> {
         .map(|s| s.to_string())
         .collect()
 }
-pub struct MergePointer<'a> {
-    pub term: String,
-    pub value: u64,
-    pub stream: fst::map::Stream<'a>,
-    pub is_finished: bool,
+
+/// A pointer for merging two term streams.
+struct MergePointer<'a> {
+    /// The current head of the stream.
+    pub(crate) term: String,
+
+    /// The current head value.
+    pub(crate) value: u64,
+
+    /// The stream to merge.
+    pub(crate) stream: fst::map::Stream<'a>,
+
+    /// Whether the stream is finished.
+    pub(crate) is_finished: bool,
 }
 
 impl MergePointer<'_> {
@@ -234,6 +257,7 @@ impl PartialEq for MergePointer<'_> {
 
 impl Eq for MergePointer<'_> {}
 
+/// Get the next character boundary after or at the given index.
 fn ceil_char_boundary(str: &str, index: usize) -> usize {
     let mut res = index;
 
diff --git a/crates/web-spell/src/spell_checker.rs b/crates/web-spell/src/spell_checker.rs
index 47399d74..a737e554 100644
--- a/crates/web-spell/src/spell_checker.rs
+++ b/crates/web-spell/src/spell_checker.rs
@@ -1,38 +1,31 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 use super::Result;
 use std::{path::Path, str::FromStr};
 
 use fnv::FnvHashMap;
-use whatlang::Lang;
+pub use whatlang::Lang;
 
 use crate::config::CorrectionConfig;
 use crate::stupid_backoff::{IntoMiddle, LeftToRight, RightToLeft};
 
 use super::{error_model, Correction, CorrectionTerm, Error, ErrorModel, StupidBackoff, TermDict};
 
+/// A spell checker for a specific language.
 struct LangSpellChecker {
+    /// The term dictionary.
     term_dict: TermDict,
+
+    /// The language model.
     language_model: StupidBackoff,
+
+    /// Model of typical errors.
     error_model: ErrorModel,
+
+    /// The correction configuration.
     config: CorrectionConfig,
 }
 
 impl LangSpellChecker {
+    /// Open a spell checker for a specific language.
     fn open<P: AsRef<Path>>(path: P, config: CorrectionConfig) -> Result<Self> {
         let term_dict = TermDict::open(path.as_ref().join("term_dict"))?;
         let language_model = StupidBackoff::open(path.as_ref().join("stupid_backoff"))?;
@@ -46,6 +39,10 @@ impl LangSpellChecker {
         })
     }
 
+    /// Get the possible correction candidates for a given term.
+    ///
+    /// The candidates are generated by looking for candidates in the term dictionary
+    /// within a maximum edit distance.
     fn candidates(&self, term: &str) -> Vec<String> {
         // one edit for words of
         // up to four characters, two edits for up to twelve
@@ -61,6 +58,7 @@ impl LangSpellChecker {
         self.term_dict.search(term, max_edit_distance)
     }
 
+    /// Return the log probability of the term given the surrounding context of terms.
     fn lm_logprob(&self, term_idx: usize, context: &[String]) -> f64 {
         if term_idx == 0 {
             let strat = RightToLeft;
@@ -74,6 +72,11 @@ impl LangSpellChecker {
         }
     }
 
+    /// Score correction candidates for a given term using a combination of language model and error model probabilities.
+    ///
+    /// Returns the best candidate and its score, or None if no candidates are sufficiently better than the original term.
+    /// The score combines the language model probability of the candidate in context (weighted by config.lm_prob_weight)
+    /// and the error model probability of the transformations needed to get from the original term to the candidate.
     fn score_candidates(
         &self,
         term: &str,
@@ -118,6 +121,7 @@ impl LangSpellChecker {
         best_term
     }
 
+    /// Correct a single term at a time.
     fn correct_once(&self, text: &str) -> Option<Correction> {
         let orig_terms = super::tokenize(text);
         let mut terms = orig_terms.clone();
@@ -189,6 +193,7 @@ impl LangSpellChecker {
         Some(res)
     }
 
+    /// Correct all terms in a text.
     fn correct(&self, text: &str) -> Option<Correction> {
         // TODO:
         // sometimes the text should be corrected more than once.
@@ -198,11 +203,24 @@ impl LangSpellChecker {
     }
 }
 
+/// The main spell checker for detecting and correcting spelling mistakes.
+///
+/// This is the primary entry point for spell checking functionality. It analyzes text input
+/// and suggests corrections for misspelled words based on statistical models.
+///
+/// The spell checker uses statistical language models and error models trained on a large corpus of text such as the web to detect and correct
+/// spelling mistakes. The correction algorithm is roughly based on the approach described in Google's paper
+/// "Using the Web for Language Independent Spellchecking and Autocorrection".
+///
+/// Use [`SpellChecker::open`] to create a new instance from a model directory containing language-specific models.
+/// Then use [`SpellChecker::correct`] to correct text in a specific language.
 pub struct SpellChecker {
+    /// The language-specific spell checkers.
     lang_spell_checkers: FnvHashMap<Lang, LangSpellChecker>,
 }
 
 impl SpellChecker {
+    /// Open a spell checker from a model directory.
     pub fn open<P: AsRef<Path>>(path: P, config: CorrectionConfig) -> Result<Self> {
         if !path.as_ref().exists() {
             return Err(Error::CheckerNotFound);
@@ -243,6 +261,8 @@ impl SpellChecker {
             lang_spell_checkers,
         })
     }
+
+    /// Correct a text in a specific language.
     pub fn correct(&self, text: &str, lang: &Lang) -> Option<Correction> {
         self.lang_spell_checkers
             .get(lang)
@@ -273,6 +293,7 @@ mod tests {
 
         res
     }
+
     #[test]
     fn simple() {
         let path = Path::new("../data/web_spell/checker");
diff --git a/crates/web-spell/src/stupid_backoff.rs b/crates/web-spell/src/stupid_backoff.rs
index 2ebef6df..05d7c907 100644
--- a/crates/web-spell/src/stupid_backoff.rs
+++ b/crates/web-spell/src/stupid_backoff.rs
@@ -1,19 +1,3 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 use super::{tokenize, MergePointer, Result};
 use std::{
     cmp::Reverse,
@@ -62,6 +46,9 @@ impl AsRef<[u8]> for StoredNgram {
     }
 }
 
+/// A trainer for the stupid backoff language model.
+///
+/// This is used to train the language model from a corpus of text.
 pub struct StupidBackoffTrainer {
     max_ngram_size: usize,
     ngrams: BTreeMap<Ngram, u64>,
@@ -70,6 +57,8 @@ pub struct StupidBackoffTrainer {
 }
 
 impl StupidBackoffTrainer {
+    /// Create a new trainer for a given maximum n-gram size.
+    /// It's preferred to use an n-gram size of 3.
     pub fn new(max_ngram_size: usize) -> Self {
         Self {
             max_ngram_size,
@@ -79,6 +68,7 @@ impl StupidBackoffTrainer {
         }
     }
 
+    /// Train the model on a sequence of tokens.
     pub fn train(&mut self, tokens: &[String]) {
         for window in tokens.windows(self.max_ngram_size) {
             for i in 1..=window.len() {
@@ -105,6 +95,7 @@ impl StupidBackoffTrainer {
         }
     }
 
+    /// Build the language model from the trainer.
     pub fn build<P: AsRef<Path>>(self, path: P) -> Result<()> {
         if !path.as_ref().exists() {
             std::fs::create_dir_all(path.as_ref())?;
@@ -151,6 +142,7 @@ impl StupidBackoffTrainer {
     }
 }
 
+/// Merge multiple streams into a single FST.
 fn merge_streams(
     mut builder: fst::MapBuilder<BufWriter<File>>,
     streams: Vec<fst::map::Stream<'_, fst::automaton::AlwaysMatch>>,
@@ -208,6 +200,16 @@ fn merge_streams(
     Ok(())
 }
 
+/// A stupid backoff language model for scoring n-grams.
+///
+/// The model scores n-grams by recursively backing off to lower order n-grams when the full
+/// n-gram is not found in the training data. The backoff is done by multiplying the score
+/// by a constant factor (0.4).
+///
+/// The model stores n-grams in two FSTs:
+/// `ngrams` contains regular n-grams with their frequencies, while `rotated_ngrams` contains
+/// n-grams with their words rotated to enable efficient prefix queries. Additionally, the model
+/// maintains counts of total n-grams seen for each order n in `n_counts`.
 pub struct StupidBackoff {
     ngrams: fst::Map<memmap2::Mmap>,
     rotated_ngrams: fst::Map<memmap2::Mmap>,
@@ -216,6 +218,7 @@ pub struct StupidBackoff {
 }
 
 impl StupidBackoff {
+    /// Open a language model from a model directory.
     pub fn open<P: AsRef<Path>>(folder: P) -> Result<Self> {
         let mmap = unsafe { memmap2::Mmap::map(&File::open(folder.as_ref().join("ngrams.bin"))?)? };
         let ngrams = fst::Map::new(mmap)?;
@@ -237,6 +240,7 @@ impl StupidBackoff {
         })
     }
 
+    /// Merge multiple language models into a single model.
     pub fn merge<P: AsRef<Path>>(models: Vec<Self>, folder: P) -> Result<Self> {
         if !folder.as_ref().exists() {
             std::fs::create_dir_all(folder.as_ref())?;
@@ -307,6 +311,7 @@ impl StupidBackoff {
         })
     }
 
+    /// Return the frequency of the n-gram.
     pub fn freq(&self, words: &[String]) -> Option<u64> {
         if words.len() >= self.ngrams.len() || words.is_empty() {
             return None;
@@ -319,6 +324,7 @@ impl StupidBackoff {
         self.ngrams.get(ngram)
     }
 
+    /// Return the log probability of the n-gram.
     pub fn log_prob<S: NextWordsStrategy>(&self, words: &[String], strat: S) -> f64 {
         if words.len() >= self.ngrams.len() || words.is_empty() {
             return -(self.n_counts[0] as f64).log2();
@@ -336,10 +342,12 @@ impl StupidBackoff {
         }
     }
 
+    /// Return the probability of the n-gram.
     pub fn prob<S: NextWordsStrategy>(&self, words: &[String], strat: S) -> f64 {
         self.log_prob(words, strat).exp2()
     }
 
+    /// Given a word, return all n-grams where that word appears in the middle of the n-gram.
     pub fn contexts(&self, word: &str) -> Vec<(Vec<String>, u64)> {
         let q = word.to_string() + " ";
         let automaton = fst::automaton::Str::new(&q).starts_with();
@@ -360,13 +368,20 @@ impl StupidBackoff {
     }
 }
 
+/// A trait for strategies that determine the next words to consider when backing off.
 pub trait NextWordsStrategy: Sized {
+    /// The inverse strategy.
     type Inv: NextWordsStrategy;
 
+    /// Return the next words to consider.
     fn next_words<'a>(&mut self, words: &'a [String]) -> &'a [String];
+
+    /// Return the inverse strategy.
     fn inverse(self) -> Self::Inv;
 }
 
+/// A strategy that backs off by removing words from left to right. For example, given the sequence
+/// "the cat sat", it would first consider "cat sat", then just "sat".
 pub struct LeftToRight;
 
 impl NextWordsStrategy for LeftToRight {
@@ -381,6 +396,8 @@ impl NextWordsStrategy for LeftToRight {
     }
 }
 
+/// A strategy that backs off by removing words from right to left. For example, given the sequence
+/// "the cat sat", it would first consider "the cat", then just "the".
 pub struct RightToLeft;
 
 impl NextWordsStrategy for RightToLeft {
@@ -395,6 +412,8 @@ impl NextWordsStrategy for RightToLeft {
     }
 }
 
+/// A strategy that backs off by removing words from either left or right. For example, given the sequence
+/// "the cat sat", it would first consider "cat sat", then just "sat".
 #[derive(Default)]
 pub struct IntoMiddle {
     last_left: bool,
diff --git a/crates/web-spell/src/term_freqs.rs b/crates/web-spell/src/term_freqs.rs
index 7cf1c421..e6071932 100644
--- a/crates/web-spell/src/term_freqs.rs
+++ b/crates/web-spell/src/term_freqs.rs
@@ -1,19 +1,3 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 use super::{MergePointer, Result};
 use fst::{IntoStreamer, Streamer};
 
@@ -161,6 +145,7 @@ struct Metadata {
     dicts: Vec<Uuid>,
 }
 
+/// A dictionary of terms and their frequencies.
 pub struct TermDict {
     builder: DictBuilder,
     stored: Vec<StoredDict>,
@@ -169,6 +154,7 @@ pub struct TermDict {
 }
 
 impl TermDict {
+    /// Open a term dictionary from a model directory.
     pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
         if path.as_ref().exists() {
             let file = File::open(path.as_ref().join("meta.json"))?;
@@ -203,6 +189,7 @@ impl TermDict {
         }
     }
 
+    /// Insert a term into the dictionary.
     pub fn insert(&mut self, term: &str) {
         if term.len() <= 1 {
             return;
@@ -235,6 +222,7 @@ impl TermDict {
         self.builder.insert(term);
     }
 
+    /// Save the current state of the dictionary to disk.
     pub fn commit(&mut self) -> Result<()> {
         let builder = std::mem::take(&mut self.builder);
 
@@ -251,6 +239,7 @@ impl TermDict {
         Ok(())
     }
 
+    /// Remove unused dictionaries from disk.
     fn gc(&self) -> Result<()> {
         let all_dicts = self
             .path
@@ -277,6 +266,7 @@ impl TermDict {
         Ok(())
     }
 
+    /// Save the metadata to disk.
     fn save_meta(&self) -> Result<()> {
         let file = OpenOptions::new()
             .create(true)
@@ -289,6 +279,7 @@ impl TermDict {
         Ok(())
     }
 
+    /// Merge all dictionary segments into a single dictionary.
     pub fn merge_dicts(&mut self) -> Result<()> {
         if self.stored.len() <= 1 {
             return Ok(());
@@ -311,6 +302,7 @@ impl TermDict {
         Ok(())
     }
 
+    /// Get the frequency of a term across all dictionary segments.
     pub fn freq(&self, term: &str) -> Option<u64> {
         let mut freqs = None;
 
@@ -326,6 +318,7 @@ impl TermDict {
         freqs
     }
 
+    /// Get all terms in the dictionary.
     pub fn terms(&self) -> Vec<String> {
         let mut terms = Vec::new();
 
@@ -340,6 +333,7 @@ impl TermDict {
         terms
     }
 
+    /// Search for terms in the dictionary with a given edit distance.
     pub fn search(&self, term: &str, max_edit_distance: u32) -> Vec<String> {
         let mut res = Vec::new();
 
@@ -354,6 +348,7 @@ impl TermDict {
         res
     }
 
+    /// Merge another term dictionary into this one.
     pub fn merge(&mut self, other: Self) -> Result<()> {
         for stored in other.stored {
             let uuid = uuid::Uuid::new_v4();
@@ -369,6 +364,7 @@ impl TermDict {
         Ok(())
     }
 
+    /// Get the path to the model directory.
     pub(crate) fn path(&self) -> &Path {
         &self.path
     }
diff --git a/crates/web-spell/src/trainer.rs b/crates/web-spell/src/trainer.rs
index 13f4b19f..22101768 100644
--- a/crates/web-spell/src/trainer.rs
+++ b/crates/web-spell/src/trainer.rs
@@ -1,19 +1,3 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 use indicatif::ParallelProgressIterator;
 use rayon::prelude::*;
 
diff --git a/crates/zimba/LICENSE b/crates/zimba/LICENSE
new file mode 100644
index 00000000..409e7e2c
--- /dev/null
+++ b/crates/zimba/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Stract ApS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/crates/zimba/README.md b/crates/zimba/README.md
index 29744347..b7d55629 100644
--- a/crates/zimba/README.md
+++ b/crates/zimba/README.md
@@ -14,4 +14,7 @@ fn main() -> Result<(), Error> {
 
     Ok(())
 }
-```
\ No newline at end of file
+```
+
+## License
+Zimba is licensed under the MIT license. See the [LICENSE](LICENSE) file for details.
\ No newline at end of file
diff --git a/crates/zimba/src/lib.rs b/crates/zimba/src/lib.rs
index b936abd1..892d2575 100644
--- a/crates/zimba/src/lib.rs
+++ b/crates/zimba/src/lib.rs
@@ -1,21 +1,28 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 //! Zim file reader.
 //! https://wiki.openzim.org/wiki/ZIM_file_format
+//!
+//! The ZIM file format is used for storing web content in a highly compressed format.
+//! It is commonly used for offline storage of Wikipedia and other web content.
+//!
+//! A ZIM archive starts with a header that contains metadata about the file,
+//! including a magic number, version information, and pointers to various sections
+//! of the file. The header is followed by a list of MIME types, path pointers,
+//! title pointers, directory entries, and clusters.
+//!
+//! # Usage
+//! ```no_run
+//! use zimba::{ZimFile, Error};
+//!
+//! fn main() -> Result<(), Error> {
+//!     let zim_file = ZimFile::open("path/to/file.zim")?;
+//!
+//!     for article in zim_file.articles()? {
+//!         println!("{}", article.title);
+//!     }
+//!
+//!     Ok(())
+//! }
+//! ```
 
 pub mod wiki;
 
@@ -54,6 +61,7 @@ pub enum Error {
     Lzma(#[from] lzma::Error),
 }
 
+/// Read a zero-terminated string.
 fn read_zero_terminated(bytes: &[u8]) -> IResult<&[u8], String> {
     let (remaining, string) = map(take_while(|b| b != 0), |bytes: &[u8]| {
         String::from_utf8_lossy(bytes).into_owned()
@@ -112,21 +120,58 @@ impl NomParseNumber for u8 {
     }
 }
 
+/// The ZIM file header.
 #[derive(Debug)]
 #[allow(unused)]
 struct Header {
+    /// A 4-byte magic number. It must be `72_173_914` (0x44D495A) for a valid ZIM file.
     magic: u32,
+
+    /// Major version of the ZIM archive format.
+    /// Major version is updated when an incompatible
+    /// change is integrated in the format (a lib
+    /// made for a version N will probably not be
+    /// able to read a version N+1).
     major_version: u16,
+
+    /// Minor version of the ZIM archive format.
+    /// Minor version is updated when an compatible
+    /// change is integrated (a lib made for a
+    /// minor version n will be able to read a
+    /// minor version n+1).
     minor_version: u16,
+
+    /// Unique ID of this ZIM archive.
     uuid: u128,
+
+    /// Number of entries in the ZIM archive.
     entry_count: u32,
+
+    /// Number of clusters in the ZIM archive.
     cluster_count: u32,
+
+    /// Position of the URL pointer list.
     url_ptr_pos: u64,
+
+    /// Position of the title pointer list.
+    /// This is considered deprecated and should not be used if possible.
     title_ptr_pos: u64,
+
+    /// Position of the cluster pointer list.
     cluster_ptr_pos: u64,
+
+    /// Position of the MIME type list.
     mime_list_pos: u64,
+
+    /// Position of the main page or 0xFFFFFFFF if not set.
     main_page: u32,
+
+    /// Position of the layout page or 0xFFFFFFFF if not set.
     layout_page: u32,
+
+    /// Pointer to the MD5 checksum of this archive without
+    /// the checksum itself.
+    /// This points always 16 bytes before the end of the archive.
     checksum_pos: u64,
 }
 
@@ -191,8 +236,10 @@ impl Header {
     }
 }
 
+/// A list of MIME types.
 #[derive(Debug)]
 pub struct MimeTypes(Vec<String>);
+
 impl MimeTypes {
     fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
         let mut mime_types = Vec::new();
@@ -226,6 +273,7 @@ impl std::ops::Index<u16> for MimeTypes {
 #[derive(Debug)]
 pub struct UrlPointer(pub u64);
 
+/// A list of URL pointers.
 #[derive(Debug)]
 pub struct UrlPointerList(Vec<UrlPointer>);
 
@@ -256,6 +304,7 @@ impl UrlPointerList {
     }
 }
 
+/// A title pointer.
 #[derive(Debug)]
 #[allow(unused)]
 pub struct TitlePointer(u32);
@@ -291,6 +340,7 @@ impl TitlePointerList {
     }
 }
 
+/// A cluster pointer.
 #[derive(Debug)]
 struct ClusterPointer(u64);
 
@@ -325,6 +375,9 @@ impl ClusterPointerList {
     }
 }
 
+/// A directory entry in a ZIM file, representing either content or a redirect.
+/// Content entries contain actual data like articles or images, while redirect entries
+/// point to other entries in the archive.
 #[derive(Debug)]
 pub enum DirEntry {
     Content {
@@ -459,11 +512,17 @@ impl std::io::Read for CompressedReader<'_> {
     }
 }
 
+/// An offset in a cluster.
 #[derive(Debug)]
 struct ClusterOffset {
     offset: u64,
 }
 
+/// A cluster.
+///
+/// Clusters contain the actual data of the directory entries.
+/// The purpose of the clusters are that data of more than one directory entry can be compressed inside one cluster, making the compression much more efficient.
+/// Typically clusters have a size of about 1 MB.
 #[derive(Debug)]
 pub struct Cluster {
     blob_offsets: Vec<ClusterOffset>,
@@ -579,6 +638,7 @@ impl Cluster {
     }
 }
 
+/// A ZIM file.
 pub struct ZimFile {
     header: Header,
     mime_types: MimeTypes,
@@ -589,6 +649,7 @@ pub struct ZimFile {
 }
 
 impl ZimFile {
+    /// Open a ZIM file. The file is memory-mapped and only the header and pointers are read into memory upfront.
     pub fn open<P: AsRef<Path>>(path: P) -> Result<ZimFile, Error> {
         let file = File::open(path)?;
         let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? };
@@ -627,6 +688,7 @@ impl ZimFile {
         })
     }
 
+    /// Get a directory entry by its index.
     pub fn get_dir_entry(&self, index: usize) -> Result<Option<DirEntry>, Error> {
         if index >= self.header.entry_count as usize {
             return Ok(None);
@@ -636,6 +698,7 @@ impl ZimFile {
         Ok(Some(DirEntry::from_bytes(&self.mmap[pointer..])?))
     }
 
+    /// Get a cluster by its index.
     pub fn get_cluster(&self, index: u32) -> Result<Option<Cluster>, Error> {
         if index >= self.header.cluster_count {
             return Ok(None);
@@ -645,35 +708,42 @@ impl ZimFile {
         Ok(Some(Cluster::from_bytes(&self.mmap[pointer..])?))
     }
 
+    /// Get the MIME type list.
     #[must_use]
     pub fn mime_types(&self) -> &MimeTypes {
         &self.mime_types
     }
 
+    /// Get the URL pointers.
     #[must_use]
     pub fn url_pointers(&self) -> &UrlPointerList {
         &self.url_pointers
     }
 
+    /// Get the title pointers.
     #[must_use]
     pub fn title_pointers(&self) -> &TitlePointerList {
         &self.title_pointers
     }
 
+    /// Iterate over the directory entries.
     #[must_use]
     pub fn dir_entries(&self) -> DirEntryIterator<'_> {
         DirEntryIterator::new(&self.mmap, &self.url_pointers)
     }
 
+    /// Iterate over the articles.
     pub fn articles(&self) -> Result<ArticleIterator<'_>, Error> {
         ArticleIterator::new(self)
     }
 
+    /// Iterate over the images.
     pub fn images(&self) -> Result<ImageIterator<'_>, Error> {
         ImageIterator::new(self)
     }
 }
 
+/// An iterator over the directory entries.
 pub struct DirEntryIterator<'a> {
     mmap: &'a memmap2::Mmap,
     url_pointers: &'a UrlPointerList,
diff --git a/crates/zimba/src/wiki.rs b/crates/zimba/src/wiki.rs
index dc6d9c9d..87b751f8 100644
--- a/crates/zimba/src/wiki.rs
+++ b/crates/zimba/src/wiki.rs
@@ -1,19 +1,3 @@
-// Stract is an open source web search engine.
-// Copyright (C) 2024 Stract ApS
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
 use std::collections::HashMap;
 
 use crate::{Cluster, DirEntry, Error, ZimFile};
@@ -29,6 +13,7 @@ struct WorkingCluster<T> {
     data: Vec<T>,
 }
 
+/// An iterator over the articles.
 pub struct ArticleIterator<'a> {
     zim: &'a ZimFile,
     // key: cluster number, value: list of article refs in that cluster
@@ -87,6 +72,7 @@ impl<'a> ArticleIterator<'a> {
     }
 }
 
+/// Wikipedia article.
 #[derive(Debug)]
 pub struct Article {
     pub url: String,
@@ -145,6 +131,7 @@ struct ImageRef {
     url: String,
 }
 
+/// An iterator over the images.
 pub struct ImageIterator<'a> {
     zim: &'a ZimFile,
     // key: cluster number, value: list of article refs in that cluster