From be1e428d41b5936903172855f7f30861ca7eb49a Mon Sep 17 00:00:00 2001
From: Xiaoyun Zhang <xiaoyuz@microsoft.com>
Date: Fri, 27 Sep 2024 13:10:30 -0700
Subject: [PATCH] [GenAI] pack GenAI core package (#7246)

* update

* enable llama3_2

* fix tests

* pack GenAI core
---
 .../Llama/{LLaMA3_1.cs => LlamaSample.cs}     |   7 +-
 .../Microsoft.ML.GenAI.Samples/Program.cs     |   5 +-
 .../Microsoft.ML.GenAI.Core.csproj            |   2 +-
 src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs   |  14 +
 .../LlamaForCausalLM.cs                       |  46 +++-
 .../Microsoft.ML.GenAI.LLaMA.csproj           |   9 +-
 .../Module/LlamaModel.cs                      |   4 +-
 .../Config/meta-llama-3.2-1B-Instruct.json    |  35 +++
 .../Config/meta-llama-3.2-3B-Instruct.json    |  35 +++
 .../Microsoft.ML.GenAI.Mistral.csproj         |   4 +-
 .../Microsoft.ML.GenAI.Phi.csproj             |   4 +-
 ...2Tests.Llama_3_2_1b_ShapeTest.approved.txt | 146 ++++++++++
 ...2Tests.Llama_3_2_3b_ShapeTest.approved.txt | 254 ++++++++++++++++++
 .../LLaMA3_2Tests.cs                          |  46 ++++
 .../Microsoft.ML.GenAI.LLaMA.Tests.csproj     |   1 +
 15 files changed, 591 insertions(+), 21 deletions(-)
 rename docs/samples/Microsoft.ML.GenAI.Samples/Llama/{LLaMA3_1.cs => LlamaSample.cs} (85%)
 create mode 100644 src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json
 create mode 100644 src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json
 create mode 100644 test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt
 create mode 100644 test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt
 create mode 100644 test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs

diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LlamaSample.cs
similarity index 85%
rename from docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs
rename to docs/samples/Microsoft.ML.GenAI.Samples/Llama/LlamaSample.cs
index 49fcdf5892..97248ed272 100644
--- a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs
+++ b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LlamaSample.cs
@@ -16,7 +16,7 @@ namespace Microsoft.ML.GenAI.Samples.Llama;
 
 internal class LlamaSample
 {
-    public static async void Run()
+    public static async Task RunLlama(string weightFolder, string checkPointName = "model.safetensors.index.json")
     {
         var device = "cuda";
         if (device == "cuda")
@@ -24,10 +24,9 @@ public static async void Run()
             torch.InitializeDeviceType(DeviceType.CUDA);
         }
 
-        var defaultType = ScalarType.Float16;
+        var defaultType = ScalarType.BFloat16;
         torch.manual_seed(1);
         torch.set_default_dtype(defaultType);
-        var weightFolder = @"C:\Users\xiaoyuz\source\repos\Meta-Llama-3.1-8B-Instruct";
         var configName = "config.json";
         var originalWeightFolder = Path.Combine(weightFolder, "original");
 
@@ -35,7 +34,7 @@ public static async void Run()
         var stopWatch = System.Diagnostics.Stopwatch.StartNew();
         stopWatch.Start();
         var tokenizer = LlamaTokenizerHelper.FromPretrained(originalWeightFolder);
-        var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, layersOnTargetDevice: -1);
+        var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, checkPointName: checkPointName, layersOnTargetDevice: 26, quantizeToInt8: true);
 
         var pipeline = new CausalLMPipeline<TiktokenTokenizer, LlamaForCausalLM>(tokenizer, model, device);
 
diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs
index cf166c7552..769e9f0fbe 100644
--- a/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs
+++ b/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs
@@ -1,5 +1,4 @@
 ﻿// See https://aka.ms/new-console-template for more information
-using Microsoft.ML.GenAI.Samples.Mistral;
-using Microsoft.ML.GenAI.Samples.Phi3Mini;
+using Microsoft.ML.GenAI.Samples.Llama;
 
-await Mistral_7B_Instruct.WeatherChatAsync();
+await LlamaSample.RunLlama(@"C:\Users\xiaoyuz\source\repos\Llama-3.2-3B-Instruct");
diff --git a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
index 64087de176..0486831b27 100644
--- a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
+++ b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFrameworks>net6.0;net8.0</TargetFrameworks>
-    <IsPackable>false</IsPackable>
+    <IsPackable>true</IsPackable>
     <Nullable>enable</Nullable>
     <LangVersion>preview</LangVersion>
   </PropertyGroup>
diff --git a/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs b/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs
index a8a6985ee8..75bcd18571 100644
--- a/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs
+++ b/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs
@@ -45,11 +45,15 @@ static LlamaConfig()
         var llama3_1_8b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-8B-Instruct.json");
         var llama3_1_70b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-70B-Instruct.json");
         var llama3_1_405b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-405B-Instruct.json");
+        var llama3_2_1b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.2-1B-Instruct.json");
+        var llama3_2_3b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.2-3B-Instruct.json");
 #pragma warning restore MSML_ParameterLocalVarName // Parameter or local variable name not standard
 
         Llama3_1_8B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_1_8b_content) ?? throw new ArgumentNullException(nameof(llama3_1_8b_content));
         Llama3_1_70B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_1_70b_content) ?? throw new ArgumentNullException(nameof(llama3_1_70b_content));
         Llama3_1_405B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_1_405b_content) ?? throw new ArgumentNullException(nameof(llama3_1_405b_content));
+        Llama3_2_1B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_2_1b_content) ?? throw new ArgumentNullException(nameof(llama3_2_1b_content));
+        Llama_3_2_3B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_2_3b_content) ?? throw new ArgumentNullException(nameof(llama3_2_3b_content));
     }
 
 #pragma warning disable MSML_GeneralName // This name should be PascalCased
@@ -67,6 +71,16 @@ static LlamaConfig()
     /// The llama-3.1-405B-Instruct configuration created from https://huggingface.co/meta-llama/Meta-Llama-3.1-405B.
     /// </summary>
     public static LlamaConfig Llama3_1_405B_Instruct { get; }
+
+    /// <summary>
+    /// The llama-3.2-3B-Instruct configuration created from https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct.
+    /// </summary>
+    public static LlamaConfig Llama_3_2_3B_Instruct { get; }
+
+    /// <summary>
+    /// The llama-3.2-1B-Instruct configuration created from https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct.
+    /// </summary>
+    public static LlamaConfig Llama3_2_1B_Instruct { get; }
 #pragma warning restore MSML_GeneralName // This name should be PascalCased
 
     [JsonPropertyName("attention_bias")]
diff --git a/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs b/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs
index b7e038da1b..0384efda8a 100644
--- a/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs
+++ b/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs
@@ -8,6 +8,7 @@
 using Microsoft.ML.GenAI.Core.Extension;
 using Microsoft.ML.GenAI.LLaMA.Module;
 using TorchSharp;
+using TorchSharp.Modules;
 using TorchSharp.PyBridge;
 using static TorchSharp.torch;
 
@@ -19,7 +20,7 @@ public class LlamaForCausalLM : nn.Module<CausalLMModelInput, CausalLMModelOutpu
     private readonly int _vocabSize;
 
 #pragma warning disable MSML_PrivateFieldName // Private field name not in: _camelCase format
-    private readonly GenAILinear lm_head;
+    private readonly Linear lm_head;
     private readonly LlamaModel model;
 #pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format
 
@@ -30,9 +31,29 @@ public LlamaForCausalLM(LlamaConfig config, string? device = null)
         _vocabSize = config.VocabSize;
 
         model = new LlamaModel(config, device);
-        lm_head = new GenAILinear(config.HiddenSize, config.VocabSize, hasBias: false);
 
-        this.RegisterComponents();
+        // When tie word embeddings is true, the lm_head shares the same weight as the embedding layer.
+        // therefore, the lm_head weight won't be initialized here.
+        // instead, it will be loaded from the embedding layer after the model is loaded.
+        if (config.TieWordEmbeddings)
+        {
+            this.RegisterComponents();
+            lm_head = nn.Linear(config.HiddenSize, config.VocabSize, hasBias: false, dtype: config.DType);
+        }
+        else
+        {
+            lm_head = nn.Linear(config.HiddenSize, config.VocabSize, hasBias: false, dtype: config.DType);
+            this.RegisterComponents();
+        }
+
+    }
+
+    private void TieWordEmbeddings()
+    {
+        var embeddingWeight = model.Embedding.state_dict();
+        this.lm_head.load_state_dict(embeddingWeight);
+
+        this.lm_head.to(device: model.Embedding.weight!.device);
     }
 
 #pragma warning disable MSML_GeneralName // This name should be PascalCased
@@ -61,6 +82,11 @@ public static LlamaForCausalLM FromPretrained(
 
         model.LoadSafeTensors(modelFolder, checkPointName);
         model = model.to(device);
+        if (modelConfig.TieWordEmbeddings)
+        {
+            model.TieWordEmbeddings();
+        }
+
 
         return model;
     }
@@ -107,8 +133,22 @@ public static LlamaForCausalLM FromPretrained(
 
         model.LoadSafeTensors(modelFolder, checkPointName);
 
+        if (quantizeToInt8)
+        {
+            model.ToInt8QuantizeModule();
+        }
+        else if (quantizeToInt4)
+        {
+            model.ToInt4QuantizeModule();
+        }
+
         model = model.ToDynamicLoadingModel(deviceMap, targetDevice);
 
+        if (modelConfig.TieWordEmbeddings)
+        {
+            model.TieWordEmbeddings();
+        }
+
         torch.set_default_device(originalDefaultDevice);
 
         return model;
diff --git a/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj b/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj
index 896f47e5b7..9fd5d267ac 100644
--- a/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj
+++ b/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj
@@ -13,13 +13,16 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" PrivateAssets="all" />
-    <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" PrivateAssets="all" />
-    <ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
   </ItemGroup>
 
   <ItemGroup>
     <EmbeddedResource Include="Resource\Config\*.json" />
   </ItemGroup>
 
+  <ItemGroup>
+    <None Remove="Resource\Config\meta-llama-3.2-1B-Instruct.json" />
+    <None Remove="Resource\Config\meta-llama-3.2-3B-Instruct.json" />
+  </ItemGroup>
+
 </Project>
diff --git a/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs b/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs
index ec65128332..d8596a43ca 100644
--- a/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs
+++ b/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs
@@ -30,7 +30,7 @@ public LlamaModel(LlamaConfig config, string? device = null)
         this._paddingIdx = config.PadTokenId;
         this._vocabSize = config.VocabSize;
         var headDim = config.HiddenSize / config.NumAttentionHeads;
-        this.embed_tokens = nn.Embedding(config.VocabSize, config.HiddenSize, padding_idx: this._paddingIdx, dtype: config.DType, device: device);
+        this.embed_tokens = nn.Embedding(config.VocabSize, config.HiddenSize, padding_idx: this._paddingIdx, dtype: config.DType);
         this.layers = new ModuleList<LlamaDecoderLayer>();
 
         for (int i = 0; i < config.NumHiddenLayers; i++)
@@ -47,6 +47,8 @@ public LlamaModel(LlamaConfig config, string? device = null)
         };
     }
 
+    public Embedding Embedding => this.embed_tokens;
+
 #pragma warning disable MSML_GeneralName // This name should be PascalCased
     public override CausalLMModelOutput forward(CausalLMModelInput input)
 #pragma warning restore MSML_GeneralName // This name should be PascalCased
diff --git a/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json
new file mode 100644
index 0000000000..b5d8a60c73
--- /dev/null
+++ b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json
@@ -0,0 +1,35 @@
+{
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "use_cache": true,
+  "vocab_size": 128256
+}
diff --git a/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json
new file mode 100644
index 0000000000..91d931a367
--- /dev/null
+++ b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json
@@ -0,0 +1,35 @@
+{
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 3072,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 24,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "use_cache": true,
+  "vocab_size": 128256
+}
diff --git a/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj b/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj
index 896f47e5b7..6dbf9f1aa5 100644
--- a/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj
+++ b/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj
@@ -13,9 +13,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" PrivateAssets="all" />
-    <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" PrivateAssets="all" />
-    <ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj b/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj
index a813828d2f..b614d2f73a 100644
--- a/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj
+++ b/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj
@@ -13,9 +13,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" PrivateAssets="all" />
-    <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" PrivateAssets="all" />
-    <ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt
new file mode 100644
index 0000000000..42edde3277
--- /dev/null
+++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt
@@ -0,0 +1,146 @@
+﻿0: model.embed_tokens.weight shape: [128256, 2048]
+1: model.layers.0.input_layernorm.weight shape: [2048]
+2: model.layers.0.mlp.down_proj.weight shape: [2048, 8192]
+3: model.layers.0.mlp.gate_proj.weight shape: [8192, 2048]
+4: model.layers.0.mlp.up_proj.weight shape: [8192, 2048]
+5: model.layers.0.post_attention_layernorm.weight shape: [2048]
+6: model.layers.0.self_attn.k_proj.weight shape: [512, 2048]
+7: model.layers.0.self_attn.o_proj.weight shape: [2048, 2048]
+8: model.layers.0.self_attn.q_proj.weight shape: [2048, 2048]
+9: model.layers.0.self_attn.v_proj.weight shape: [512, 2048]
+10: model.layers.1.input_layernorm.weight shape: [2048]
+11: model.layers.1.mlp.down_proj.weight shape: [2048, 8192]
+12: model.layers.1.mlp.gate_proj.weight shape: [8192, 2048]
+13: model.layers.1.mlp.up_proj.weight shape: [8192, 2048]
+14: model.layers.1.post_attention_layernorm.weight shape: [2048]
+15: model.layers.1.self_attn.k_proj.weight shape: [512, 2048]
+16: model.layers.1.self_attn.o_proj.weight shape: [2048, 2048]
+17: model.layers.1.self_attn.q_proj.weight shape: [2048, 2048]
+18: model.layers.1.self_attn.v_proj.weight shape: [512, 2048]
+19: model.layers.10.input_layernorm.weight shape: [2048]
+20: model.layers.10.mlp.down_proj.weight shape: [2048, 8192]
+21: model.layers.10.mlp.gate_proj.weight shape: [8192, 2048]
+22: model.layers.10.mlp.up_proj.weight shape: [8192, 2048]
+23: model.layers.10.post_attention_layernorm.weight shape: [2048]
+24: model.layers.10.self_attn.k_proj.weight shape: [512, 2048]
+25: model.layers.10.self_attn.o_proj.weight shape: [2048, 2048]
+26: model.layers.10.self_attn.q_proj.weight shape: [2048, 2048]
+27: model.layers.10.self_attn.v_proj.weight shape: [512, 2048]
+28: model.layers.11.input_layernorm.weight shape: [2048]
+29: model.layers.11.mlp.down_proj.weight shape: [2048, 8192]
+30: model.layers.11.mlp.gate_proj.weight shape: [8192, 2048]
+31: model.layers.11.mlp.up_proj.weight shape: [8192, 2048]
+32: model.layers.11.post_attention_layernorm.weight shape: [2048]
+33: model.layers.11.self_attn.k_proj.weight shape: [512, 2048]
+34: model.layers.11.self_attn.o_proj.weight shape: [2048, 2048]
+35: model.layers.11.self_attn.q_proj.weight shape: [2048, 2048]
+36: model.layers.11.self_attn.v_proj.weight shape: [512, 2048]
+37: model.layers.12.input_layernorm.weight shape: [2048]
+38: model.layers.12.mlp.down_proj.weight shape: [2048, 8192]
+39: model.layers.12.mlp.gate_proj.weight shape: [8192, 2048]
+40: model.layers.12.mlp.up_proj.weight shape: [8192, 2048]
+41: model.layers.12.post_attention_layernorm.weight shape: [2048]
+42: model.layers.12.self_attn.k_proj.weight shape: [512, 2048]
+43: model.layers.12.self_attn.o_proj.weight shape: [2048, 2048]
+44: model.layers.12.self_attn.q_proj.weight shape: [2048, 2048]
+45: model.layers.12.self_attn.v_proj.weight shape: [512, 2048]
+46: model.layers.13.input_layernorm.weight shape: [2048]
+47: model.layers.13.mlp.down_proj.weight shape: [2048, 8192]
+48: model.layers.13.mlp.gate_proj.weight shape: [8192, 2048]
+49: model.layers.13.mlp.up_proj.weight shape: [8192, 2048]
+50: model.layers.13.post_attention_layernorm.weight shape: [2048]
+51: model.layers.13.self_attn.k_proj.weight shape: [512, 2048]
+52: model.layers.13.self_attn.o_proj.weight shape: [2048, 2048]
+53: model.layers.13.self_attn.q_proj.weight shape: [2048, 2048]
+54: model.layers.13.self_attn.v_proj.weight shape: [512, 2048]
+55: model.layers.14.input_layernorm.weight shape: [2048]
+56: model.layers.14.mlp.down_proj.weight shape: [2048, 8192]
+57: model.layers.14.mlp.gate_proj.weight shape: [8192, 2048]
+58: model.layers.14.mlp.up_proj.weight shape: [8192, 2048]
+59: model.layers.14.post_attention_layernorm.weight shape: [2048]
+60: model.layers.14.self_attn.k_proj.weight shape: [512, 2048]
+61: model.layers.14.self_attn.o_proj.weight shape: [2048, 2048]
+62: model.layers.14.self_attn.q_proj.weight shape: [2048, 2048]
+63: model.layers.14.self_attn.v_proj.weight shape: [512, 2048]
+64: model.layers.15.input_layernorm.weight shape: [2048]
+65: model.layers.15.mlp.down_proj.weight shape: [2048, 8192]
+66: model.layers.15.mlp.gate_proj.weight shape: [8192, 2048]
+67: model.layers.15.mlp.up_proj.weight shape: [8192, 2048]
+68: model.layers.15.post_attention_layernorm.weight shape: [2048]
+69: model.layers.15.self_attn.k_proj.weight shape: [512, 2048]
+70: model.layers.15.self_attn.o_proj.weight shape: [2048, 2048]
+71: model.layers.15.self_attn.q_proj.weight shape: [2048, 2048]
+72: model.layers.15.self_attn.v_proj.weight shape: [512, 2048]
+73: model.layers.2.input_layernorm.weight shape: [2048]
+74: model.layers.2.mlp.down_proj.weight shape: [2048, 8192]
+75: model.layers.2.mlp.gate_proj.weight shape: [8192, 2048]
+76: model.layers.2.mlp.up_proj.weight shape: [8192, 2048]
+77: model.layers.2.post_attention_layernorm.weight shape: [2048]
+78: model.layers.2.self_attn.k_proj.weight shape: [512, 2048]
+79: model.layers.2.self_attn.o_proj.weight shape: [2048, 2048]
+80: model.layers.2.self_attn.q_proj.weight shape: [2048, 2048]
+81: model.layers.2.self_attn.v_proj.weight shape: [512, 2048]
+82: model.layers.3.input_layernorm.weight shape: [2048]
+83: model.layers.3.mlp.down_proj.weight shape: [2048, 8192]
+84: model.layers.3.mlp.gate_proj.weight shape: [8192, 2048]
+85: model.layers.3.mlp.up_proj.weight shape: [8192, 2048]
+86: model.layers.3.post_attention_layernorm.weight shape: [2048]
+87: model.layers.3.self_attn.k_proj.weight shape: [512, 2048]
+88: model.layers.3.self_attn.o_proj.weight shape: [2048, 2048]
+89: model.layers.3.self_attn.q_proj.weight shape: [2048, 2048]
+90: model.layers.3.self_attn.v_proj.weight shape: [512, 2048]
+91: model.layers.4.input_layernorm.weight shape: [2048]
+92: model.layers.4.mlp.down_proj.weight shape: [2048, 8192]
+93: model.layers.4.mlp.gate_proj.weight shape: [8192, 2048]
+94: model.layers.4.mlp.up_proj.weight shape: [8192, 2048]
+95: model.layers.4.post_attention_layernorm.weight shape: [2048]
+96: model.layers.4.self_attn.k_proj.weight shape: [512, 2048]
+97: model.layers.4.self_attn.o_proj.weight shape: [2048, 2048]
+98: model.layers.4.self_attn.q_proj.weight shape: [2048, 2048]
+99: model.layers.4.self_attn.v_proj.weight shape: [512, 2048]
+100: model.layers.5.input_layernorm.weight shape: [2048]
+101: model.layers.5.mlp.down_proj.weight shape: [2048, 8192]
+102: model.layers.5.mlp.gate_proj.weight shape: [8192, 2048]
+103: model.layers.5.mlp.up_proj.weight shape: [8192, 2048]
+104: model.layers.5.post_attention_layernorm.weight shape: [2048]
+105: model.layers.5.self_attn.k_proj.weight shape: [512, 2048]
+106: model.layers.5.self_attn.o_proj.weight shape: [2048, 2048]
+107: model.layers.5.self_attn.q_proj.weight shape: [2048, 2048]
+108: model.layers.5.self_attn.v_proj.weight shape: [512, 2048]
+109: model.layers.6.input_layernorm.weight shape: [2048]
+110: model.layers.6.mlp.down_proj.weight shape: [2048, 8192]
+111: model.layers.6.mlp.gate_proj.weight shape: [8192, 2048]
+112: model.layers.6.mlp.up_proj.weight shape: [8192, 2048]
+113: model.layers.6.post_attention_layernorm.weight shape: [2048]
+114: model.layers.6.self_attn.k_proj.weight shape: [512, 2048]
+115: model.layers.6.self_attn.o_proj.weight shape: [2048, 2048]
+116: model.layers.6.self_attn.q_proj.weight shape: [2048, 2048]
+117: model.layers.6.self_attn.v_proj.weight shape: [512, 2048]
+118: model.layers.7.input_layernorm.weight shape: [2048]
+119: model.layers.7.mlp.down_proj.weight shape: [2048, 8192]
+120: model.layers.7.mlp.gate_proj.weight shape: [8192, 2048]
+121: model.layers.7.mlp.up_proj.weight shape: [8192, 2048]
+122: model.layers.7.post_attention_layernorm.weight shape: [2048]
+123: model.layers.7.self_attn.k_proj.weight shape: [512, 2048]
+124: model.layers.7.self_attn.o_proj.weight shape: [2048, 2048]
+125: model.layers.7.self_attn.q_proj.weight shape: [2048, 2048]
+126: model.layers.7.self_attn.v_proj.weight shape: [512, 2048]
+127: model.layers.8.input_layernorm.weight shape: [2048]
+128: model.layers.8.mlp.down_proj.weight shape: [2048, 8192]
+129: model.layers.8.mlp.gate_proj.weight shape: [8192, 2048]
+130: model.layers.8.mlp.up_proj.weight shape: [8192, 2048]
+131: model.layers.8.post_attention_layernorm.weight shape: [2048]
+132: model.layers.8.self_attn.k_proj.weight shape: [512, 2048]
+133: model.layers.8.self_attn.o_proj.weight shape: [2048, 2048]
+134: model.layers.8.self_attn.q_proj.weight shape: [2048, 2048]
+135: model.layers.8.self_attn.v_proj.weight shape: [512, 2048]
+136: model.layers.9.input_layernorm.weight shape: [2048]
+137: model.layers.9.mlp.down_proj.weight shape: [2048, 8192]
+138: model.layers.9.mlp.gate_proj.weight shape: [8192, 2048]
+139: model.layers.9.mlp.up_proj.weight shape: [8192, 2048]
+140: model.layers.9.post_attention_layernorm.weight shape: [2048]
+141: model.layers.9.self_attn.k_proj.weight shape: [512, 2048]
+142: model.layers.9.self_attn.o_proj.weight shape: [2048, 2048]
+143: model.layers.9.self_attn.q_proj.weight shape: [2048, 2048]
+144: model.layers.9.self_attn.v_proj.weight shape: [512, 2048]
+145: model.norm.weight shape: [2048]
diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt
new file mode 100644
index 0000000000..f26687dcb0
--- /dev/null
+++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt
@@ -0,0 +1,254 @@
+﻿0: model.embed_tokens.weight shape: [128256, 3072]
+1: model.layers.0.input_layernorm.weight shape: [3072]
+2: model.layers.0.mlp.down_proj.weight shape: [3072, 8192]
+3: model.layers.0.mlp.gate_proj.weight shape: [8192, 3072]
+4: model.layers.0.mlp.up_proj.weight shape: [8192, 3072]
+5: model.layers.0.post_attention_layernorm.weight shape: [3072]
+6: model.layers.0.self_attn.k_proj.weight shape: [1024, 3072]
+7: model.layers.0.self_attn.o_proj.weight shape: [3072, 3072]
+8: model.layers.0.self_attn.q_proj.weight shape: [3072, 3072]
+9: model.layers.0.self_attn.v_proj.weight shape: [1024, 3072]
+10: model.layers.1.input_layernorm.weight shape: [3072]
+11: model.layers.1.mlp.down_proj.weight shape: [3072, 8192]
+12: model.layers.1.mlp.gate_proj.weight shape: [8192, 3072]
+13: model.layers.1.mlp.up_proj.weight shape: [8192, 3072]
+14: model.layers.1.post_attention_layernorm.weight shape: [3072]
+15: model.layers.1.self_attn.k_proj.weight shape: [1024, 3072]
+16: model.layers.1.self_attn.o_proj.weight shape: [3072, 3072]
+17: model.layers.1.self_attn.q_proj.weight shape: [3072, 3072]
+18: model.layers.1.self_attn.v_proj.weight shape: [1024, 3072]
+19: model.layers.10.input_layernorm.weight shape: [3072]
+20: model.layers.10.mlp.down_proj.weight shape: [3072, 8192]
+21: model.layers.10.mlp.gate_proj.weight shape: [8192, 3072]
+22: model.layers.10.mlp.up_proj.weight shape: [8192, 3072]
+23: model.layers.10.post_attention_layernorm.weight shape: [3072]
+24: model.layers.10.self_attn.k_proj.weight shape: [1024, 3072]
+25: model.layers.10.self_attn.o_proj.weight shape: [3072, 3072]
+26: model.layers.10.self_attn.q_proj.weight shape: [3072, 3072]
+27: model.layers.10.self_attn.v_proj.weight shape: [1024, 3072]
+28: model.layers.11.input_layernorm.weight shape: [3072]
+29: model.layers.11.mlp.down_proj.weight shape: [3072, 8192]
+30: model.layers.11.mlp.gate_proj.weight shape: [8192, 3072]
+31: model.layers.11.mlp.up_proj.weight shape: [8192, 3072]
+32: model.layers.11.post_attention_layernorm.weight shape: [3072]
+33: model.layers.11.self_attn.k_proj.weight shape: [1024, 3072]
+34: model.layers.11.self_attn.o_proj.weight shape: [3072, 3072]
+35: model.layers.11.self_attn.q_proj.weight shape: [3072, 3072]
+36: model.layers.11.self_attn.v_proj.weight shape: [1024, 3072]
+37: model.layers.12.input_layernorm.weight shape: [3072]
+38: model.layers.12.mlp.down_proj.weight shape: [3072, 8192]
+39: model.layers.12.mlp.gate_proj.weight shape: [8192, 3072]
+40: model.layers.12.mlp.up_proj.weight shape: [8192, 3072]
+41: model.layers.12.post_attention_layernorm.weight shape: [3072]
+42: model.layers.12.self_attn.k_proj.weight shape: [1024, 3072]
+43: model.layers.12.self_attn.o_proj.weight shape: [3072, 3072]
+44: model.layers.12.self_attn.q_proj.weight shape: [3072, 3072]
+45: model.layers.12.self_attn.v_proj.weight shape: [1024, 3072]
+46: model.layers.13.input_layernorm.weight shape: [3072]
+47: model.layers.13.mlp.down_proj.weight shape: [3072, 8192]
+48: model.layers.13.mlp.gate_proj.weight shape: [8192, 3072]
+49: model.layers.13.mlp.up_proj.weight shape: [8192, 3072]
+50: model.layers.13.post_attention_layernorm.weight shape: [3072]
+51: model.layers.13.self_attn.k_proj.weight shape: [1024, 3072]
+52: model.layers.13.self_attn.o_proj.weight shape: [3072, 3072]
+53: model.layers.13.self_attn.q_proj.weight shape: [3072, 3072]
+54: model.layers.13.self_attn.v_proj.weight shape: [1024, 3072]
+55: model.layers.14.input_layernorm.weight shape: [3072]
+56: model.layers.14.mlp.down_proj.weight shape: [3072, 8192]
+57: model.layers.14.mlp.gate_proj.weight shape: [8192, 3072]
+58: model.layers.14.mlp.up_proj.weight shape: [8192, 3072]
+59: model.layers.14.post_attention_layernorm.weight shape: [3072]
+60: model.layers.14.self_attn.k_proj.weight shape: [1024, 3072]
+61: model.layers.14.self_attn.o_proj.weight shape: [3072, 3072]
+62: model.layers.14.self_attn.q_proj.weight shape: [3072, 3072]
+63: model.layers.14.self_attn.v_proj.weight shape: [1024, 3072]
+64: model.layers.15.input_layernorm.weight shape: [3072]
+65: model.layers.15.mlp.down_proj.weight shape: [3072, 8192]
+66: model.layers.15.mlp.gate_proj.weight shape: [8192, 3072]
+67: model.layers.15.mlp.up_proj.weight shape: [8192, 3072]
+68: model.layers.15.post_attention_layernorm.weight shape: [3072]
+69: model.layers.15.self_attn.k_proj.weight shape: [1024, 3072]
+70: model.layers.15.self_attn.o_proj.weight shape: [3072, 3072]
+71: model.layers.15.self_attn.q_proj.weight shape: [3072, 3072]
+72: model.layers.15.self_attn.v_proj.weight shape: [1024, 3072]
+73: model.layers.16.input_layernorm.weight shape: [3072]
+74: model.layers.16.mlp.down_proj.weight shape: [3072, 8192]
+75: model.layers.16.mlp.gate_proj.weight shape: [8192, 3072]
+76: model.layers.16.mlp.up_proj.weight shape: [8192, 3072]
+77: model.layers.16.post_attention_layernorm.weight shape: [3072]
+78: model.layers.16.self_attn.k_proj.weight shape: [1024, 3072]
+79: model.layers.16.self_attn.o_proj.weight shape: [3072, 3072]
+80: model.layers.16.self_attn.q_proj.weight shape: [3072, 3072]
+81: model.layers.16.self_attn.v_proj.weight shape: [1024, 3072]
+82: model.layers.17.input_layernorm.weight shape: [3072]
+83: model.layers.17.mlp.down_proj.weight shape: [3072, 8192]
+84: model.layers.17.mlp.gate_proj.weight shape: [8192, 3072]
+85: model.layers.17.mlp.up_proj.weight shape: [8192, 3072]
+86: model.layers.17.post_attention_layernorm.weight shape: [3072]
+87: model.layers.17.self_attn.k_proj.weight shape: [1024, 3072]
+88: model.layers.17.self_attn.o_proj.weight shape: [3072, 3072]
+89: model.layers.17.self_attn.q_proj.weight shape: [3072, 3072]
+90: model.layers.17.self_attn.v_proj.weight shape: [1024, 3072]
+91: model.layers.18.input_layernorm.weight shape: [3072]
+92: model.layers.18.mlp.down_proj.weight shape: [3072, 8192]
+93: model.layers.18.mlp.gate_proj.weight shape: [8192, 3072]
+94: model.layers.18.mlp.up_proj.weight shape: [8192, 3072]
+95: model.layers.18.post_attention_layernorm.weight shape: [3072]
+96: model.layers.18.self_attn.k_proj.weight shape: [1024, 3072]
+97: model.layers.18.self_attn.o_proj.weight shape: [3072, 3072]
+98: model.layers.18.self_attn.q_proj.weight shape: [3072, 3072]
+99: model.layers.18.self_attn.v_proj.weight shape: [1024, 3072]
+100: model.layers.19.input_layernorm.weight shape: [3072]
+101: model.layers.19.mlp.down_proj.weight shape: [3072, 8192]
+102: model.layers.19.mlp.gate_proj.weight shape: [8192, 3072]
+103: model.layers.19.mlp.up_proj.weight shape: [8192, 3072]
+104: model.layers.19.post_attention_layernorm.weight shape: [3072]
+105: model.layers.19.self_attn.k_proj.weight shape: [1024, 3072]
+106: model.layers.19.self_attn.o_proj.weight shape: [3072, 3072]
+107: model.layers.19.self_attn.q_proj.weight shape: [3072, 3072]
+108: model.layers.19.self_attn.v_proj.weight shape: [1024, 3072]
+109: model.layers.2.input_layernorm.weight shape: [3072]
+110: model.layers.2.mlp.down_proj.weight shape: [3072, 8192]
+111: model.layers.2.mlp.gate_proj.weight shape: [8192, 3072]
+112: model.layers.2.mlp.up_proj.weight shape: [8192, 3072]
+113: model.layers.2.post_attention_layernorm.weight shape: [3072]
+114: model.layers.2.self_attn.k_proj.weight shape: [1024, 3072]
+115: model.layers.2.self_attn.o_proj.weight shape: [3072, 3072]
+116: model.layers.2.self_attn.q_proj.weight shape: [3072, 3072]
+117: model.layers.2.self_attn.v_proj.weight shape: [1024, 3072]
+118: model.layers.20.input_layernorm.weight shape: [3072]
+119: model.layers.20.mlp.down_proj.weight shape: [3072, 8192]
+120: model.layers.20.mlp.gate_proj.weight shape: [8192, 3072]
+121: model.layers.20.mlp.up_proj.weight shape: [8192, 3072]
+122: model.layers.20.post_attention_layernorm.weight shape: [3072]
+123: model.layers.20.self_attn.k_proj.weight shape: [1024, 3072]
+124: model.layers.20.self_attn.o_proj.weight shape: [3072, 3072]
+125: model.layers.20.self_attn.q_proj.weight shape: [3072, 3072]
+126: model.layers.20.self_attn.v_proj.weight shape: [1024, 3072]
+127: model.layers.21.input_layernorm.weight shape: [3072]
+128: model.layers.21.mlp.down_proj.weight shape: [3072, 8192]
+129: model.layers.21.mlp.gate_proj.weight shape: [8192, 3072]
+130: model.layers.21.mlp.up_proj.weight shape: [8192, 3072]
+131: model.layers.21.post_attention_layernorm.weight shape: [3072]
+132: model.layers.21.self_attn.k_proj.weight shape: [1024, 3072]
+133: model.layers.21.self_attn.o_proj.weight shape: [3072, 3072]
+134: model.layers.21.self_attn.q_proj.weight shape: [3072, 3072]
+135: model.layers.21.self_attn.v_proj.weight shape: [1024, 3072]
+136: model.layers.22.input_layernorm.weight shape: [3072]
+137: model.layers.22.mlp.down_proj.weight shape: [3072, 8192]
+138: model.layers.22.mlp.gate_proj.weight shape: [8192, 3072]
+139: model.layers.22.mlp.up_proj.weight shape: [8192, 3072]
+140: model.layers.22.post_attention_layernorm.weight shape: [3072]
+141: model.layers.22.self_attn.k_proj.weight shape: [1024, 3072]
+142: model.layers.22.self_attn.o_proj.weight shape: [3072, 3072]
+143: model.layers.22.self_attn.q_proj.weight shape: [3072, 3072]
+144: model.layers.22.self_attn.v_proj.weight shape: [1024, 3072]
+145: model.layers.23.input_layernorm.weight shape: [3072]
+146: model.layers.23.mlp.down_proj.weight shape: [3072, 8192]
+147: model.layers.23.mlp.gate_proj.weight shape: [8192, 3072]
+148: model.layers.23.mlp.up_proj.weight shape: [8192, 3072]
+149: model.layers.23.post_attention_layernorm.weight shape: [3072]
+150: model.layers.23.self_attn.k_proj.weight shape: [1024, 3072]
+151: model.layers.23.self_attn.o_proj.weight shape: [3072, 3072]
+152: model.layers.23.self_attn.q_proj.weight shape: [3072, 3072]
+153: model.layers.23.self_attn.v_proj.weight shape: [1024, 3072]
+154: model.layers.24.input_layernorm.weight shape: [3072]
+155: model.layers.24.mlp.down_proj.weight shape: [3072, 8192]
+156: model.layers.24.mlp.gate_proj.weight shape: [8192, 3072]
+157: model.layers.24.mlp.up_proj.weight shape: [8192, 3072]
+158: model.layers.24.post_attention_layernorm.weight shape: [3072]
+159: model.layers.24.self_attn.k_proj.weight shape: [1024, 3072]
+160: model.layers.24.self_attn.o_proj.weight shape: [3072, 3072]
+161: model.layers.24.self_attn.q_proj.weight shape: [3072, 3072]
+162: model.layers.24.self_attn.v_proj.weight shape: [1024, 3072]
+163: model.layers.25.input_layernorm.weight shape: [3072]
+164: model.layers.25.mlp.down_proj.weight shape: [3072, 8192]
+165: model.layers.25.mlp.gate_proj.weight shape: [8192, 3072]
+166: model.layers.25.mlp.up_proj.weight shape: [8192, 3072]
+167: model.layers.25.post_attention_layernorm.weight shape: [3072]
+168: model.layers.25.self_attn.k_proj.weight shape: [1024, 3072]
+169: model.layers.25.self_attn.o_proj.weight shape: [3072, 3072]
+170: model.layers.25.self_attn.q_proj.weight shape: [3072, 3072]
+171: model.layers.25.self_attn.v_proj.weight shape: [1024, 3072]
+172: model.layers.26.input_layernorm.weight shape: [3072]
+173: model.layers.26.mlp.down_proj.weight shape: [3072, 8192]
+174: model.layers.26.mlp.gate_proj.weight shape: [8192, 3072]
+175: model.layers.26.mlp.up_proj.weight shape: [8192, 3072]
+176: model.layers.26.post_attention_layernorm.weight shape: [3072]
+177: model.layers.26.self_attn.k_proj.weight shape: [1024, 3072]
+178: model.layers.26.self_attn.o_proj.weight shape: [3072, 3072]
+179: model.layers.26.self_attn.q_proj.weight shape: [3072, 3072]
+180: model.layers.26.self_attn.v_proj.weight shape: [1024, 3072]
+181: model.layers.27.input_layernorm.weight shape: [3072]
+182: model.layers.27.mlp.down_proj.weight shape: [3072, 8192]
+183: model.layers.27.mlp.gate_proj.weight shape: [8192, 3072]
+184: model.layers.27.mlp.up_proj.weight shape: [8192, 3072]
+185: model.layers.27.post_attention_layernorm.weight shape: [3072]
+186: model.layers.27.self_attn.k_proj.weight shape: [1024, 3072]
+187: model.layers.27.self_attn.o_proj.weight shape: [3072, 3072]
+188: model.layers.27.self_attn.q_proj.weight shape: [3072, 3072]
+189: model.layers.27.self_attn.v_proj.weight shape: [1024, 3072]
+190: model.layers.3.input_layernorm.weight shape: [3072]
+191: model.layers.3.mlp.down_proj.weight shape: [3072, 8192]
+192: model.layers.3.mlp.gate_proj.weight shape: [8192, 3072]
+193: model.layers.3.mlp.up_proj.weight shape: [8192, 3072]
+194: model.layers.3.post_attention_layernorm.weight shape: [3072]
+195: model.layers.3.self_attn.k_proj.weight shape: [1024, 3072]
+196: model.layers.3.self_attn.o_proj.weight shape: [3072, 3072]
+197: model.layers.3.self_attn.q_proj.weight shape: [3072, 3072]
+198: model.layers.3.self_attn.v_proj.weight shape: [1024, 3072]
+199: model.layers.4.input_layernorm.weight shape: [3072]
+200: model.layers.4.mlp.down_proj.weight shape: [3072, 8192]
+201: model.layers.4.mlp.gate_proj.weight shape: [8192, 3072]
+202: model.layers.4.mlp.up_proj.weight shape: [8192, 3072]
+203: model.layers.4.post_attention_layernorm.weight shape: [3072]
+204: model.layers.4.self_attn.k_proj.weight shape: [1024, 3072]
+205: model.layers.4.self_attn.o_proj.weight shape: [3072, 3072]
+206: model.layers.4.self_attn.q_proj.weight shape: [3072, 3072]
+207: model.layers.4.self_attn.v_proj.weight shape: [1024, 3072]
+208: model.layers.5.input_layernorm.weight shape: [3072]
+209: model.layers.5.mlp.down_proj.weight shape: [3072, 8192]
+210: model.layers.5.mlp.gate_proj.weight shape: [8192, 3072]
+211: model.layers.5.mlp.up_proj.weight shape: [8192, 3072]
+212: model.layers.5.post_attention_layernorm.weight shape: [3072]
+213: model.layers.5.self_attn.k_proj.weight shape: [1024, 3072]
+214: model.layers.5.self_attn.o_proj.weight shape: [3072, 3072]
+215: model.layers.5.self_attn.q_proj.weight shape: [3072, 3072]
+216: model.layers.5.self_attn.v_proj.weight shape: [1024, 3072]
+217: model.layers.6.input_layernorm.weight shape: [3072]
+218: model.layers.6.mlp.down_proj.weight shape: [3072, 8192]
+219: model.layers.6.mlp.gate_proj.weight shape: [8192, 3072]
+220: model.layers.6.mlp.up_proj.weight shape: [8192, 3072]
+221: model.layers.6.post_attention_layernorm.weight shape: [3072]
+222: model.layers.6.self_attn.k_proj.weight shape: [1024, 3072]
+223: model.layers.6.self_attn.o_proj.weight shape: [3072, 3072]
+224: model.layers.6.self_attn.q_proj.weight shape: [3072, 3072]
+225: model.layers.6.self_attn.v_proj.weight shape: [1024, 3072]
+226: model.layers.7.input_layernorm.weight shape: [3072]
+227: model.layers.7.mlp.down_proj.weight shape: [3072, 8192]
+228: model.layers.7.mlp.gate_proj.weight shape: [8192, 3072]
+229: model.layers.7.mlp.up_proj.weight shape: [8192, 3072]
+230: model.layers.7.post_attention_layernorm.weight shape: [3072]
+231: model.layers.7.self_attn.k_proj.weight shape: [1024, 3072]
+232: model.layers.7.self_attn.o_proj.weight shape: [3072, 3072]
+233: model.layers.7.self_attn.q_proj.weight shape: [3072, 3072]
+234: model.layers.7.self_attn.v_proj.weight shape: [1024, 3072]
+235: model.layers.8.input_layernorm.weight shape: [3072]
+236: model.layers.8.mlp.down_proj.weight shape: [3072, 8192]
+237: model.layers.8.mlp.gate_proj.weight shape: [8192, 3072]
+238: model.layers.8.mlp.up_proj.weight shape: [8192, 3072]
+239: model.layers.8.post_attention_layernorm.weight shape: [3072]
+240: model.layers.8.self_attn.k_proj.weight shape: [1024, 3072]
+241: model.layers.8.self_attn.o_proj.weight shape: [3072, 3072]
+242: model.layers.8.self_attn.q_proj.weight shape: [3072, 3072]
+243: model.layers.8.self_attn.v_proj.weight shape: [1024, 3072]
+244: model.layers.9.input_layernorm.weight shape: [3072]
+245: model.layers.9.mlp.down_proj.weight shape: [3072, 8192]
+246: model.layers.9.mlp.gate_proj.weight shape: [8192, 3072]
+247: model.layers.9.mlp.up_proj.weight shape: [8192, 3072]
+248: model.layers.9.post_attention_layernorm.weight shape: [3072]
+249: model.layers.9.self_attn.k_proj.weight shape: [1024, 3072]
+250: model.layers.9.self_attn.o_proj.weight shape: [3072, 3072]
+251: model.layers.9.self_attn.q_proj.weight shape: [3072, 3072]
+252: model.layers.9.self_attn.v_proj.weight shape: [1024, 3072]
+253: model.norm.weight shape: [3072]
diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs b/test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs
new file mode 100644
index 0000000000..6babd14910
--- /dev/null
+++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs
@@ -0,0 +1,46 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using ApprovalTests;
+using ApprovalTests.Namers;
+using ApprovalTests.Reporters;
+using Microsoft.ML.GenAI.Core.Extension;
+using TorchSharp;
+using Xunit;
+
+namespace Microsoft.ML.GenAI.LLaMA.Tests;
+
+[Collection("NoParallelization")]
+public class LLaMA3_2Tests
+{
+    public LLaMA3_2Tests()
+    {
+        if (Environment.GetEnvironmentVariable("HELIX_CORRELATION_ID") != null)
+        {
+            Approvals.UseAssemblyLocationForApprovedFiles();
+        }
+
+        torch.set_default_device("meta");
+    }
+
+    [Fact]
+    [UseReporter(typeof(DiffReporter))]
+    [UseApprovalSubdirectory("Approvals")]
+    public void Llama_3_2_1b_ShapeTest()
+    {
+        var model = new LlamaForCausalLM(LlamaConfig.Llama3_2_1B_Instruct);
+        var stateDictStr = model.PeekShape();
+        Approvals.Verify(stateDictStr);
+    }
+
+    [WindowsOnlyFact]
+    [UseReporter(typeof(DiffReporter))]
+    [UseApprovalSubdirectory("Approvals")]
+    public void Llama_3_2_3b_ShapeTest()
+    {
+        var model = new LlamaForCausalLM(LlamaConfig.Llama_3_2_3B_Instruct);
+        var stateDictStr = model.PeekShape();
+        Approvals.Verify(stateDictStr);
+    }
+}
diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj b/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj
index 1f5948ca76..d135f09bbb 100644
--- a/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj
+++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj
@@ -27,6 +27,7 @@
 
   <ItemGroup Condition="'$(TargetArchitecture)' != 'x64'">
     <Compile Remove="LLaMA3_1Tests.cs" />
+    <Compile Remove="LLaMA3_2Tests.cs" />
   </ItemGroup>
 
   <ItemGroup>