CodeGemma-7b-it (#1272)

Lightning-AI · Apr 11, 2024 · ca07e5e · ca07e5e
1 parent 88f6574
commit ca07e5e
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 2 deletions.
diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
@@ -16,7 +16,6 @@ defaults:
 
 env:
   HF_TOKEN: ${{ secrets.HF_TOKEN }}
-  UV_HTTP_TIMEOUT: 500
 
 jobs:
   cpu-tests:

diff --git a/README.md b/README.md
@@ -140,6 +140,7 @@ Use, Finetune, pretrain, deploy over 20+ LLMs ([full list](tutorials/download_mo
 
 | Model | Model size | Author | Reference |
 |----|----|----|----|
+| CodeGemma | 7B | Google | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma) |
 | Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
 | Dolly | 3B, 7B, 12B | Databricks | [Conover et al. 2023](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) |
 | Falcon | 7B, 40B, 180B | TII UAE | [TII 2023](https://falconllm.tii.ae)                                                                                         |

diff --git a/litgpt/config.py b/litgpt/config.py
@@ -888,6 +888,32 @@ def norm_class(self) -> Type:
     copy["hf_config"]["name"] = f"{c['hf_config']['name']}-it"
     configs.append(copy)
 
+##################
+# Google CodeGemma
+##################
+codegemma = [
+    # https://huggingface.co/google/codegemma-7b-it/blob/main/config.json
+    dict(
+        name="CodeGemma-7b-it",
+        hf_config=dict(org="google", name="codegemma-7b-it"),
+        scale_embeddings=True,
+        vocab_size=256000,
+        padding_multiple=64,
+        n_embd=3072,
+        n_layer=28,
+        n_head=16,
+        head_size=256,
+        rotary_percentage=1.0,
+        parallel_residual=False,
+        bias=False,
+        norm_class_name="RMSNorm",
+        mlp_class_name="GemmaMLP",
+        gelu_approximate="tanh",
+        intermediate_size=24576,
+    ),
+]
+configs.extend(codegemma)
+
 
 ##########################
 # Stability AI FreeWilly2

diff --git a/litgpt/prompts.py b/litgpt/prompts.py
@@ -330,7 +330,7 @@ def model_name_to_prompt_style(model_name: str) -> PromptStyle:
         return Phi2()
     if re.search(r"tiny-llama.*chat", model_name):
         return TinyLlama()
-    if re.search(r"Gemma.*-it", model_name):
+    if re.search(r"(Code)?Gemma.*-it", model_name):
         return Gemma()
     return Default()
 

diff --git a/tutorials/download_model_weights.md b/tutorials/download_model_weights.md
@@ -5,6 +5,7 @@ LitGPT supports a variety of LLM architectures with publicly available weights.
 
 | Model                                        | Model size                               | Reference                                                                                                                    |
 |----------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|
+| CodeGemma by Google                          | 7B                                       | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma)                                                                      |
 | Code Llama by Meta AI                        | 7B, 13B, 34B, 70B                        | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950)                                                                      |
 | Dolly by Databricks                          | 3B, 7B, 12B                              | [Conover et al. 2023](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) |
 | Falcon by TII UAE                            | 7B, 40B, 180B                            | [TII 2023](https://falconllm.tii.ae)                                                                                         |
@@ -84,6 +85,7 @@ garage-bAInd/Platypus2-70B
 garage-bAInd/Platypus2-70B-instruct
 garage-bAInd/Platypus2-7B
 garage-bAInd/Stable-Platypus2-13B
+google/codegemma-7b-it
 google/gemma-2b
 google/gemma-2b-it
 google/gemma-7b
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,7 +16,6 @@ defaults: @@
     env:
       HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      UV_HTTP_TIMEOUT: 500
     jobs:
       cpu-tests:
@@ Expand Down @@