From 7bb498fb1700ee7c0415b352d6e689edcb918da5 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 13 Apr 2024 16:07:59 -0400
Subject: [PATCH 1/8] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 34706620f9..ef39c4fd4f 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,7 @@ litgpt chat \
   --checkpoint_dir out/phi-2-lora/final
 ```
 
+### Pretrain an LLM   
 Train an LLM from scratch on your own data via pretraining:
 
 ```bash
@@ -131,7 +132,8 @@ litgpt chat \
   --checkpoint_dir out/custom-model/final
 ```
 
-Specialize an already pretrained model by training on custom data:
+### Continue pretraining an LLM       
+This is another way of finetuning that specialize an already pretrained model by training on custom data:    
 
 ```
 mkdir -p custom_texts

From c81800f455dd997f786cbe2e110eff1f5c0d2d3b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 13 Apr 2024 16:10:57 -0400
Subject: [PATCH 2/8] Update CODEOWNERS

---
 .github/CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 8349e1320f..af64423bb7 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,2 @@
 * @awaelchli @carmocca @lantiga
+/README.md                           @williamfalcon @lantiga

From d84c5c1a6f871b9dd4f3fec8715ba4a72d035156 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 15 Apr 2024 12:36:35 -0400
Subject: [PATCH 3/8] Update README.md (#1291)

Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com>
---
 README.md | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ef39c4fd4f..17cb709a08 100644
--- a/README.md
+++ b/README.md
@@ -25,14 +25,24 @@ Uses the latest state-of-the-art techniques:
   <a href="#finetune-an-llm">Finetune, pretrain LLMs</a> •
   <a href="#choose-from-20-llms">Models</a> •
   <a href="#state-of-the-art-features">Features</a> •
-  <a href="#training-recipes">Training recipes (YAML)</a> •
-  <a href="#litgpt-design-principles">Design principles</a>
+  <a href="#training-recipes">Training recipes (YAML)</a>
 </p>
 
 </div>
 
 &nbsp;
 
+## Finetune, pretrain and deploy AI models Lightning fast ⚡⚡
+LitGPT is a command-line tool to use, pretrain, finetune and deploy LLMs. It is based on configs with highly-optimized recipes for training the world's largest, most powerful open-source LLMs.     
+
+We've reimplemented all the model architectures and training recipes for 3 reasons:   
+
+1. Remove all abstraction layers and have single file implementations.
+2. Guarantee Apache 2.0 compliance to enable enterprise use without limits.
+3. Optimized every detail of every model to get the fastest performance possible to lower cost and training speeds.       
+
+&nbsp;
+
 ## Install LitGPT
 
 Install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.):
@@ -60,8 +70,7 @@ pip install -e '.[all]'
 ---
 
 # Get started
-LitGPT is a command-line tool to use, pretrain, finetune and deploy LLMs.
-
+LitGPT is CLI and config-based. Select the model and the action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...):    
 
 &nbsp;
 

From c4831983ee488b89bdcb502e0763c496d8dee593 Mon Sep 17 00:00:00 2001
From: Sebastian Raschka <mail@sebastianraschka.com>
Date: Mon, 15 Apr 2024 12:36:47 -0400
Subject: [PATCH 4/8] Fix badge size (#1293)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 17cb709a08..0082d426a8 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Uses the latest state-of-the-art techniques:
 
 
 ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pytorch-lightning)
-![cpu-tests](https://github.com/lightning-AI/lit-stablelm/actions/workflows/cpu-tests.yml/badge.svg) [![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/lit-stablelm/blob/master/LICENSE) [![Discord](https://img.shields.io/discord/1077906959069626439?style=plastic)](https://discord.gg/VptPCZkGNa)
+![cpu-tests](https://github.com/lightning-AI/lit-stablelm/actions/workflows/cpu-tests.yml/badge.svg) [![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/lit-stablelm/blob/master/LICENSE) [![Discord](https://img.shields.io/discord/1077906959069626439)](https://discord.gg/VptPCZkGNa)
 
 <p align="center">
   <a href="https://lightning.ai/">Lightning.ai</a> •

From af8347be3192c4deb5b55059442ca7eacf07f341 Mon Sep 17 00:00:00 2001
From: Andrei-Aksionov <58434077+Andrei-Aksionov@users.noreply.github.com>
Date: Mon, 15 Apr 2024 20:15:46 +0300
Subject: [PATCH 5/8] Replace UV with regular pip (#1292)

---
 .github/workflows/cpu-tests.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
index 59a3ce3075..cbb8d2805c 100644
--- a/.github/workflows/cpu-tests.yml
+++ b/.github/workflows/cpu-tests.yml
@@ -39,14 +39,14 @@ jobs:
       uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
-
-    - name: Install uv
-      run: pip install uv
+        cache: 'pip'
+        cache-dependency-path: |
+          pyproject.toml
 
     - name: Install minimal dependencies
       run: |
-        uv pip install --system .
-        uv pip list
+        pip install .
+        pip list
         # make sure all modules are still importable with only the minimal dependencies available
         modules=$(
           find litgpt -type f -name "*.py" | \
@@ -58,8 +58,8 @@ jobs:
 
     - name: Install all dependencies
       run: |
-        uv pip install --system '.[all,test]'
-        uv pip list
+        pip install '.[all,test]'
+        pip list
 
     - name: Run tests
       run: |

From de1e1c76b973a474ce3894d02456457b53c5fc7f Mon Sep 17 00:00:00 2001
From: Mikhail Gerassimov <michailanatger@mail.ru>
Date: Mon, 15 Apr 2024 22:36:02 +0500
Subject: [PATCH 6/8] error: Unrecognized arguments: --val_split_fraction 0.1
 (#1295)

---
 README.md                    | 2 +-
 tutorials/prepare_dataset.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0082d426a8..6c55a1ee5c 100644
--- a/README.md
+++ b/README.md
@@ -106,7 +106,7 @@ litgpt finetune lora \
   --checkpoint_dir checkpoints/microsoft/phi-2 \
   --data JSON \
   --data.json_path my_custom_dataset.json \
-  --val_split_fraction 0.1 \
+  --data.val_split_fraction 0.1 \
   --out_dir out/phi-2-lora
 
 # 3) Chat with the model
diff --git a/tutorials/prepare_dataset.md b/tutorials/prepare_dataset.md
index 867b612f91..055b769bac 100644
--- a/tutorials/prepare_dataset.md
+++ b/tutorials/prepare_dataset.md
@@ -334,7 +334,7 @@ Then simply run any of the finetuning scripts with this input:
 litgpt finetune lora \
   --data JSON \
   --data.json_path path/to/your/data.json \
-  --val_split_fraction 0.1 \
+  --data.val_split_fraction 0.1 \
   --checkpoint_dir "checkpoints/tiiuae/falcon-7b"
 ```
 

From 67d0b0c6cbbcf10e63b81a9963807e0a3e1854bd Mon Sep 17 00:00:00 2001
From: Sebastian Raschka <mail@sebastianraschka.com>
Date: Mon, 15 Apr 2024 13:36:36 -0400
Subject: [PATCH 7/8] Add Pre-training Small Base LMs with Fewer Tokens paper
 to community projects (#1290)

---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 6c55a1ee5c..eaa5fbf7f5 100644
--- a/README.md
+++ b/README.md
@@ -483,6 +483,11 @@ LitGPT powered the [TinyLlama project](https://github.com/jzhang38/TinyLlama) an
 
 [MicroLlama](https://github.com/keeeeenw/MicroLlama) is a 300M Llama model pretrained on 50B tokens powered by TinyLlama and LitGPT.
 
+&nbsp;
+
+**🔬 Pre-training Small Base LMs with Fewer Tokens**
+
+The research paper ["Pre-training Small Base LMs with Fewer Tokens"](https://arxiv.org/abs/2404.08634), which utilizes LitGPT, develops smaller base language models by inheriting a few transformer blocks from larger models and training on a tiny fraction of the data used by the larger models. It demonstrates that these smaller models can perform comparably to larger models despite using significantly less training data and resources.
 
 &nbsp;
 

From 410a7126f82ea550d4a43dab89367547b073b5a3 Mon Sep 17 00:00:00 2001
From: Andrei-Aksionov <58434077+Andrei-Aksionov@users.noreply.github.com>
Date: Mon, 15 Apr 2024 21:22:32 +0300
Subject: [PATCH 8/8] Specify `UTF-8` encoding for every `open` function
 (#1283)

Co-authored-by: William Falcon <waf2107@columbia.edu>
---
 litgpt/data/tinystories.py              | 2 +-
 litgpt/eval/evaluate.py                 | 2 +-
 litgpt/prompts.py                       | 4 ++--
 litgpt/scripts/convert_hf_checkpoint.py | 2 +-
 litgpt/scripts/merge_lora.py            | 2 +-
 litgpt/tokenizer.py                     | 6 +++---
 litgpt/utils.py                         | 2 +-
 tests/data/test_tinystories.py          | 2 +-
 tests/test_config.py                    | 4 ++--
 tests/test_convert_lit_checkpoint.py    | 2 +-
 tests/test_evaluate.py                  | 2 +-
 tests/test_merge_lora.py                | 6 +++---
 tests/test_prompts.py                   | 4 ++--
 tests/test_utils.py                     | 4 ++--
 14 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/litgpt/data/tinystories.py b/litgpt/data/tinystories.py
index ec8f5d6cec..632a015e44 100644
--- a/litgpt/data/tinystories.py
+++ b/litgpt/data/tinystories.py
@@ -106,7 +106,7 @@ def val_dataloader(self) -> DataLoader:
 
 
 def tokenize(filename: str, tokenizer: Tokenizer):
-    with open(filename, "r") as f:
+    with open(filename, "r", encoding="utf-8") as f:
         data = json.load(f)
     global_rank = int(os.environ["DATA_OPTIMIZER_GLOBAL_RANK"])
     num_workers = int(os.environ["DATA_OPTIMIZER_NUM_WORKERS"])
diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py
index 097c02f4d2..12aa3179e9 100644
--- a/litgpt/eval/evaluate.py
+++ b/litgpt/eval/evaluate.py
@@ -92,7 +92,7 @@ def convert_and_evaluate(
     save_filepath = out_dir / Path("results.json") if save_filepath is None else Path(save_filepath)
     config_filepath = checkpoint_dir/"model_config.yaml"
 
-    with open(config_filepath) as f:
+    with open(config_filepath, encoding="utf-8") as f:
         config_dict = yaml.safe_load(f)
     repo_id = f"{config_dict['hf_config']['org']}/{config_dict['hf_config']['name']}"
 
diff --git a/litgpt/prompts.py b/litgpt/prompts.py
index df1a7150b6..d827413913 100644
--- a/litgpt/prompts.py
+++ b/litgpt/prompts.py
@@ -340,12 +340,12 @@ def save_prompt_style(style: Union[str, PromptStyle], checkpoint_dir: Path) -> N
     cls = type(style)
     # Allow saving the full module path for user-defined prompt classes
     config = {"class_path": f"{cls.__module__}.{cls.__name__}"}
-    with open(checkpoint_dir / "prompt_style.yaml", "w") as file:
+    with open(checkpoint_dir / "prompt_style.yaml", "w", encoding="utf-8") as file:
         yaml.dump(config, file)
 
 
 def load_prompt_style(checkpoint_dir: Path) -> PromptStyle:
-    with open(checkpoint_dir / "prompt_style.yaml", "r") as file:
+    with open(checkpoint_dir / "prompt_style.yaml", "r", encoding="utf-8") as file:
         config = yaml.safe_load(file)
     # Support loading the full module path for user-defined prompt classes
     full_module_path, cls_name = config["class_path"].rsplit(".", 1)
diff --git a/litgpt/scripts/convert_hf_checkpoint.py b/litgpt/scripts/convert_hf_checkpoint.py
index 98f8d4e01f..9fdb337ee4 100644
--- a/litgpt/scripts/convert_hf_checkpoint.py
+++ b/litgpt/scripts/convert_hf_checkpoint.py
@@ -329,7 +329,7 @@ def convert_hf_checkpoint(
     # Load the json file containing weight mapping
     pytorch_bin_map_json_path = checkpoint_dir / "pytorch_model.bin.index.json"
     if pytorch_bin_map_json_path.is_file():  # not all checkpoints have this file
-        with open(pytorch_bin_map_json_path) as json_map:
+        with open(pytorch_bin_map_json_path, encoding="utf-8") as json_map:
             bin_index = json.load(json_map)
         bin_files = {checkpoint_dir / bin for bin in bin_index["weight_map"].values()}
     else:
diff --git a/litgpt/scripts/merge_lora.py b/litgpt/scripts/merge_lora.py
index 1e1120d214..aff59daef4 100644
--- a/litgpt/scripts/merge_lora.py
+++ b/litgpt/scripts/merge_lora.py
@@ -72,7 +72,7 @@ def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Path, Opti
             f" the `litgpt/finetune/lora.py` script."
         )
 
-    with open(hparams_file, "r") as file:
+    with open(hparams_file, "r", encoding="utf-8") as file:
         hparams = yaml.safe_load(file)
 
     lora_params = {k: v for k, v in hparams.items() if k.startswith("lora_")}
diff --git a/litgpt/tokenizer.py b/litgpt/tokenizer.py
index 3a6758eb62..55c972e69a 100644
--- a/litgpt/tokenizer.py
+++ b/litgpt/tokenizer.py
@@ -33,14 +33,14 @@ def __init__(self, checkpoint_dir: Union[Path, str]) -> None:
             self.backend = "huggingface"
 
             if (special_tokens_path := checkpoint_dir / "tokenizer_config.json").is_file():
-                with open(special_tokens_path) as fp:
+                with open(special_tokens_path, encoding="utf-8") as fp:
                     config = json.load(fp)
                 bos_token = config.get("bos_token")
                 self.bos_id = self.token_to_id(bos_token) if bos_token is not None else None
                 eos_token = config.get("eos_token")
                 self.eos_id = self.token_to_id(eos_token) if eos_token is not None else None
             if (special_tokens_path := checkpoint_dir / "generation_config.json").is_file():
-                with open(special_tokens_path) as fp:
+                with open(special_tokens_path, encoding="utf-8") as fp:
                     config = json.load(fp)
                 if self.bos_id is None:
                     self.bos_id = config.get("bos_token_id")
@@ -71,7 +71,7 @@ def token_to_id(self, token: str) -> int:
     def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
         if not (tokenizer_config_path := checkpoint_dir / "tokenizer_config.json").is_file():
             return False
-        with open(tokenizer_config_path) as fp:
+        with open(tokenizer_config_path, encoding="utf-8") as fp:
             config = json.load(fp)
         if any(config.get(check, False) for check in ("add_bos_token", "add_prefix_space")):
             return True
diff --git a/litgpt/utils.py b/litgpt/utils.py
index 37ebdfd6f9..0e40d336a1 100644
--- a/litgpt/utils.py
+++ b/litgpt/utils.py
@@ -446,7 +446,7 @@ def save_hyperparameters(function: callable, checkpoint_dir: Path) -> None:
 
 def save_config(config: "Config", checkpoint_dir: Path) -> None:
     config_dict = asdict(config)
-    with open(checkpoint_dir / "model_config.yaml", "w") as fp:
+    with open(checkpoint_dir / "model_config.yaml", "w", encoding="utf-8") as fp:
         yaml.dump(config_dict, fp)
 
 
diff --git a/tests/data/test_tinystories.py b/tests/data/test_tinystories.py
index bfb009c1a9..e1c15d67db 100644
--- a/tests/data/test_tinystories.py
+++ b/tests/data/test_tinystories.py
@@ -43,7 +43,7 @@ def test_tokenize(tmp_path, monkeypatch):
     story1, story2 = "foo bar", "    fun    "
     data = [{"story": story1}, {"story": story2}]
     shard_path = tmp_path / "data.json"
-    with open(shard_path, "w") as f:
+    with open(shard_path, "w", encoding="utf-8") as f:
         json.dump(data, f)
 
     class Tokenizer:
diff --git a/tests/test_config.py b/tests/test_config.py
index d1b0c57bd2..e1b6b28850 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -60,7 +60,7 @@ def test_from_checkpoint(tmp_path):
 
     # 3. If only `lit_config.py` exists.
     config_data = {"name": "pythia-14m", "block_size": 24, "n_layer": 2}
-    with open(tmp_path / "model_config.yaml", "w") as file:
+    with open(tmp_path / "model_config.yaml", "w", encoding="utf-8") as file:
         yaml.dump(config_data, file)
     config = Config.from_checkpoint(tmp_path)
     assert config.name == "pythia-14m"
@@ -69,7 +69,7 @@ def test_from_checkpoint(tmp_path):
 
     # 4. Both `lit_config.py` and a matching config exist, but `lit_config.py` supersedes matching config
     (tmp_path / "pythia-14m").mkdir()
-    with open(tmp_path / "pythia-14m/model_config.yaml", "w") as file:
+    with open(tmp_path / "pythia-14m/model_config.yaml", "w", encoding="utf-8") as file:
         yaml.dump(config_data, file)
     config = Config.from_checkpoint(tmp_path / "pythia-14m")
     assert config.name == "pythia-14m"
diff --git a/tests/test_convert_lit_checkpoint.py b/tests/test_convert_lit_checkpoint.py
index 8464a53d8d..f44609a4f1 100644
--- a/tests/test_convert_lit_checkpoint.py
+++ b/tests/test_convert_lit_checkpoint.py
@@ -35,7 +35,7 @@ def test_convert_lit_checkpoint(tmp_path):
     checkpoint_path = tmp_path / "lit_model.pth"
     config_path = tmp_path / "model_config.yaml"
     torch.save(ours_model.state_dict(), checkpoint_path)
-    with open(config_path, "w") as fp:
+    with open(config_path, "w", encoding="utf-8") as fp:
         yaml.dump(asdict(ours_config), fp)
     output_dir = tmp_path / "out_dir"
 
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 94b90b4551..3ea6994532 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -35,7 +35,7 @@ def test_evaluate_script(tmp_path, monkeypatch):
     checkpoint_path = tmp_path / "lit_model.pth"
     torch.save(ours_model.state_dict(), checkpoint_path)
     config_path = tmp_path / "model_config.yaml"
-    with open(config_path, "w") as fp:
+    with open(config_path, "w", encoding="utf-8") as fp:
         yaml.dump(asdict(ours_config), fp)
 
     fn_kwargs = dict(
diff --git a/tests/test_merge_lora.py b/tests/test_merge_lora.py
index 4e505c09b5..2c8458f02b 100644
--- a/tests/test_merge_lora.py
+++ b/tests/test_merge_lora.py
@@ -31,7 +31,7 @@ def test_merge_lora(tmp_path, fake_checkpoint_dir, pretrained_dtype, lora_dtype)
 
     # Create a fake pretrained checkpoint
     config = dict(block_size=128, padded_vocab_size=256, n_layer=3, n_head=8, n_embd=16)
-    with open(pretrained_checkpoint_dir / "model_config.yaml", "w") as fp:
+    with open(pretrained_checkpoint_dir / "model_config.yaml", "w", encoding="utf-8") as fp:
         yaml.dump(config, fp)
     base_model = GPT.from_name("pythia-14m", **config).to(dtype=pretrained_dtype)
     state_dict = base_model.state_dict()
@@ -45,7 +45,7 @@ def test_merge_lora(tmp_path, fake_checkpoint_dir, pretrained_dtype, lora_dtype)
     assert len(state_dict) == 6
     torch.save(state_dict, lora_checkpoint_dir / "lit_model.pth.lora")
     hparams = dict(checkpoint_dir=str(pretrained_checkpoint_dir), **lora_kwargs)
-    with open(lora_checkpoint_dir / "hyperparameters.yaml", "w") as file:
+    with open(lora_checkpoint_dir / "hyperparameters.yaml", "w", encoding="utf-8") as file:
         yaml.dump(hparams, file)
     shutil.copyfile(pretrained_checkpoint_dir / "model_config.yaml", lora_checkpoint_dir / "model_config.yaml")
 
@@ -80,7 +80,7 @@ def test_load_lora_metadata(fake_checkpoint_dir):
         load_lora_metadata(fake_checkpoint_dir)
 
     hparams = dict(precision="bf16-mixed", checkpoint_dir="checkpoints/meta-llama/Llama-2-7b", lora_r=8, lora_alpha=16)
-    with open(fake_checkpoint_dir / "hyperparameters.yaml", "w") as file:
+    with open(fake_checkpoint_dir / "hyperparameters.yaml", "w", encoding="utf-8") as file:
         yaml.dump(hparams, file)
 
     lora_args, pretrained_dir, precision = load_lora_metadata(fake_checkpoint_dir)
diff --git a/tests/test_prompts.py b/tests/test_prompts.py
index 3a4ef8dcc2..3250ce4801 100644
--- a/tests/test_prompts.py
+++ b/tests/test_prompts.py
@@ -98,7 +98,7 @@ def test_save_load_prompt_style(tmp_path):
     assert not has_prompt_style(checkpoint_dir)
     save_prompt_style("alpaca", checkpoint_dir)
     assert has_prompt_style(checkpoint_dir)
-    with open(checkpoint_dir / "prompt_style.yaml", "r") as file:
+    with open(checkpoint_dir / "prompt_style.yaml", "r", encoding="utf-8") as file:
         contents = yaml.safe_load(file)
     assert contents == {"class_path": "litgpt.prompts.Alpaca"}
     loaded = load_prompt_style(checkpoint_dir)
@@ -108,7 +108,7 @@ def test_save_load_prompt_style(tmp_path):
     checkpoint_dir = tmp_path / "custom"
     checkpoint_dir.mkdir()
     save_prompt_style(CustomPromptStyle(), checkpoint_dir)
-    with open(checkpoint_dir / "prompt_style.yaml", "r") as file:
+    with open(checkpoint_dir / "prompt_style.yaml", "r", encoding="utf-8") as file:
         contents = yaml.safe_load(file)
     assert contents == {"class_path": "test_prompts.CustomPromptStyle"}
     loaded = load_prompt_style(checkpoint_dir)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index d76ae98056..99d883a3f2 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -250,7 +250,7 @@ def test_save_hyperparameters(tmp_path):
     with mock.patch("sys.argv", ["any.py", "--out_dir", str(tmp_path), "--foo", "True"]):
         CLI(_test_function)
 
-    with open(tmp_path / "hyperparameters.yaml", "r") as file:
+    with open(tmp_path / "hyperparameters.yaml", "r", encoding="utf-8") as file:
         hparams = yaml.full_load(file)
 
     assert hparams["out_dir"] == str(tmp_path)
@@ -277,7 +277,7 @@ def test_save_hyperparameters_known_commands(command, tmp_path):
     with mock.patch("sys.argv", [*command.split(" "), "--out_dir", str(tmp_path), "--foo", "True"]):
         save_hyperparameters(_test_function2, tmp_path)
 
-    with open(tmp_path / "hyperparameters.yaml", "r") as file:
+    with open(tmp_path / "hyperparameters.yaml", "r", encoding="utf-8") as file:
         hparams = yaml.full_load(file)
 
     assert hparams["out_dir"] == str(tmp_path)