wte scaling was missed for AdapterV2 during merging PRs

Lightning-AI · Mar 26, 2024 · 8c72690 · 8c72690
1 parent 126a7e2
commit 8c72690
Showing 1 changed file with 2 additions and 0 deletions.
diff --git a/litgpt/adapter_v2.py b/litgpt/adapter_v2.py
@@ -99,6 +99,8 @@ def forward(
             mask = None
 
         x = self.transformer.wte(idx)  # token embeddings of shape (b, t, n_embd)
+        if self.config.scale_embeddings:
+            x = x * (self.config.n_embd**0.5)
         for block in self.transformer.h:
             x = block(x, cos, sin, mask, input_pos)
         x = self.transformer.ln_f(x)