From be1e428d41b5936903172855f7f30861ca7eb49a Mon Sep 17 00:00:00 2001 From: Xiaoyun Zhang Date: Fri, 27 Sep 2024 13:10:30 -0700 Subject: [PATCH] [GenAI] pack GenAI core package (#7246) * update * enable llama3_2 * fix tests * pack GenAI core --- .../Llama/{LLaMA3_1.cs => LlamaSample.cs} | 7 +- .../Microsoft.ML.GenAI.Samples/Program.cs | 5 +- .../Microsoft.ML.GenAI.Core.csproj | 2 +- src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs | 14 + .../LlamaForCausalLM.cs | 46 +++- .../Microsoft.ML.GenAI.LLaMA.csproj | 9 +- .../Module/LlamaModel.cs | 4 +- .../Config/meta-llama-3.2-1B-Instruct.json | 35 +++ .../Config/meta-llama-3.2-3B-Instruct.json | 35 +++ .../Microsoft.ML.GenAI.Mistral.csproj | 4 +- .../Microsoft.ML.GenAI.Phi.csproj | 4 +- ...2Tests.Llama_3_2_1b_ShapeTest.approved.txt | 146 ++++++++++ ...2Tests.Llama_3_2_3b_ShapeTest.approved.txt | 254 ++++++++++++++++++ .../LLaMA3_2Tests.cs | 46 ++++ .../Microsoft.ML.GenAI.LLaMA.Tests.csproj | 1 + 15 files changed, 591 insertions(+), 21 deletions(-) rename docs/samples/Microsoft.ML.GenAI.Samples/Llama/{LLaMA3_1.cs => LlamaSample.cs} (85%) create mode 100644 src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json create mode 100644 src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json create mode 100644 test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt create mode 100644 test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt create mode 100644 test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LlamaSample.cs similarity index 85% rename from docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs rename to docs/samples/Microsoft.ML.GenAI.Samples/Llama/LlamaSample.cs index 49fcdf5892..97248ed272 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LLaMA3_1.cs +++ b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/LlamaSample.cs @@ -16,7 +16,7 @@ namespace Microsoft.ML.GenAI.Samples.Llama; internal class LlamaSample { - public static async void Run() + public static async Task RunLlama(string weightFolder, string checkPointName = "model.safetensors.index.json") { var device = "cuda"; if (device == "cuda") @@ -24,10 +24,9 @@ public static async void Run() torch.InitializeDeviceType(DeviceType.CUDA); } - var defaultType = ScalarType.Float16; + var defaultType = ScalarType.BFloat16; torch.manual_seed(1); torch.set_default_dtype(defaultType); - var weightFolder = @"C:\Users\xiaoyuz\source\repos\Meta-Llama-3.1-8B-Instruct"; var configName = "config.json"; var originalWeightFolder = Path.Combine(weightFolder, "original"); @@ -35,7 +34,7 @@ public static async void Run() var stopWatch = System.Diagnostics.Stopwatch.StartNew(); stopWatch.Start(); var tokenizer = LlamaTokenizerHelper.FromPretrained(originalWeightFolder); - var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, layersOnTargetDevice: -1); + var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, checkPointName: checkPointName, layersOnTargetDevice: 26, quantizeToInt8: true); var pipeline = new CausalLMPipeline(tokenizer, model, device); diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs index cf166c7552..769e9f0fbe 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.GenAI.Samples/Program.cs @@ -1,5 +1,4 @@ // See https://aka.ms/new-console-template for more information -using Microsoft.ML.GenAI.Samples.Mistral; -using Microsoft.ML.GenAI.Samples.Phi3Mini; +using Microsoft.ML.GenAI.Samples.Llama; -await Mistral_7B_Instruct.WeatherChatAsync(); +await LlamaSample.RunLlama(@"C:\Users\xiaoyuz\source\repos\Llama-3.2-3B-Instruct"); diff --git a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj index 64087de176..0486831b27 100644 --- a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj +++ b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj @@ -2,7 +2,7 @@ net6.0;net8.0 - false + true enable preview diff --git a/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs b/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs index a8a6985ee8..75bcd18571 100644 --- a/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs +++ b/src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs @@ -45,11 +45,15 @@ static LlamaConfig() var llama3_1_8b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-8B-Instruct.json"); var llama3_1_70b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-70B-Instruct.json"); var llama3_1_405b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-405B-Instruct.json"); + var llama3_2_1b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.2-1B-Instruct.json"); + var llama3_2_3b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.2-3B-Instruct.json"); #pragma warning restore MSML_ParameterLocalVarName // Parameter or local variable name not standard Llama3_1_8B_Instruct = JsonSerializer.Deserialize(llama3_1_8b_content) ?? throw new ArgumentNullException(nameof(llama3_1_8b_content)); Llama3_1_70B_Instruct = JsonSerializer.Deserialize(llama3_1_70b_content) ?? throw new ArgumentNullException(nameof(llama3_1_70b_content)); Llama3_1_405B_Instruct = JsonSerializer.Deserialize(llama3_1_405b_content) ?? throw new ArgumentNullException(nameof(llama3_1_405b_content)); + Llama3_2_1B_Instruct = JsonSerializer.Deserialize(llama3_2_1b_content) ?? throw new ArgumentNullException(nameof(llama3_2_1b_content)); + Llama_3_2_3B_Instruct = JsonSerializer.Deserialize(llama3_2_3b_content) ?? throw new ArgumentNullException(nameof(llama3_2_3b_content)); } #pragma warning disable MSML_GeneralName // This name should be PascalCased @@ -67,6 +71,16 @@ static LlamaConfig() /// The llama-3.1-405B-Instruct configuration created from https://huggingface.co/meta-llama/Meta-Llama-3.1-405B. /// public static LlamaConfig Llama3_1_405B_Instruct { get; } + + /// + /// The llama-3.2-3B-Instruct configuration created from https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct. + /// + public static LlamaConfig Llama_3_2_3B_Instruct { get; } + + /// + /// The llama-3.2-1B-Instruct configuration created from https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct. + /// + public static LlamaConfig Llama3_2_1B_Instruct { get; } #pragma warning restore MSML_GeneralName // This name should be PascalCased [JsonPropertyName("attention_bias")] diff --git a/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs b/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs index b7e038da1b..0384efda8a 100644 --- a/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs +++ b/src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs @@ -8,6 +8,7 @@ using Microsoft.ML.GenAI.Core.Extension; using Microsoft.ML.GenAI.LLaMA.Module; using TorchSharp; +using TorchSharp.Modules; using TorchSharp.PyBridge; using static TorchSharp.torch; @@ -19,7 +20,7 @@ public class LlamaForCausalLM : nn.Module - - - + + + + + + diff --git a/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs b/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs index ec65128332..d8596a43ca 100644 --- a/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs +++ b/src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs @@ -30,7 +30,7 @@ public LlamaModel(LlamaConfig config, string? device = null) this._paddingIdx = config.PadTokenId; this._vocabSize = config.VocabSize; var headDim = config.HiddenSize / config.NumAttentionHeads; - this.embed_tokens = nn.Embedding(config.VocabSize, config.HiddenSize, padding_idx: this._paddingIdx, dtype: config.DType, device: device); + this.embed_tokens = nn.Embedding(config.VocabSize, config.HiddenSize, padding_idx: this._paddingIdx, dtype: config.DType); this.layers = new ModuleList(); for (int i = 0; i < config.NumHiddenLayers; i++) @@ -47,6 +47,8 @@ public LlamaModel(LlamaConfig config, string? device = null) }; } + public Embedding Embedding => this.embed_tokens; + #pragma warning disable MSML_GeneralName // This name should be PascalCased public override CausalLMModelOutput forward(CausalLMModelInput input) #pragma warning restore MSML_GeneralName // This name should be PascalCased diff --git a/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json new file mode 100644 index 0000000000..b5d8a60c73 --- /dev/null +++ b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-1B-Instruct.json @@ -0,0 +1,35 @@ +{ + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json new file mode 100644 index 0000000000..91d931a367 --- /dev/null +++ b/src/Microsoft.ML.GenAI.LLaMA/Resource/Config/meta-llama-3.2-3B-Instruct.json @@ -0,0 +1,35 @@ +{ + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj b/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj index 896f47e5b7..6dbf9f1aa5 100644 --- a/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj +++ b/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj @@ -13,9 +13,7 @@ - - - + diff --git a/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj b/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj index a813828d2f..b614d2f73a 100644 --- a/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj +++ b/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj @@ -13,9 +13,7 @@ - - - + diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt new file mode 100644 index 0000000000..42edde3277 --- /dev/null +++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_1b_ShapeTest.approved.txt @@ -0,0 +1,146 @@ +0: model.embed_tokens.weight shape: [128256, 2048] +1: model.layers.0.input_layernorm.weight shape: [2048] +2: model.layers.0.mlp.down_proj.weight shape: [2048, 8192] +3: model.layers.0.mlp.gate_proj.weight shape: [8192, 2048] +4: model.layers.0.mlp.up_proj.weight shape: [8192, 2048] +5: model.layers.0.post_attention_layernorm.weight shape: [2048] +6: model.layers.0.self_attn.k_proj.weight shape: [512, 2048] +7: model.layers.0.self_attn.o_proj.weight shape: [2048, 2048] +8: model.layers.0.self_attn.q_proj.weight shape: [2048, 2048] +9: model.layers.0.self_attn.v_proj.weight shape: [512, 2048] +10: model.layers.1.input_layernorm.weight shape: [2048] +11: model.layers.1.mlp.down_proj.weight shape: [2048, 8192] +12: model.layers.1.mlp.gate_proj.weight shape: [8192, 2048] +13: model.layers.1.mlp.up_proj.weight shape: [8192, 2048] +14: model.layers.1.post_attention_layernorm.weight shape: [2048] +15: model.layers.1.self_attn.k_proj.weight shape: [512, 2048] +16: model.layers.1.self_attn.o_proj.weight shape: [2048, 2048] +17: model.layers.1.self_attn.q_proj.weight shape: [2048, 2048] +18: model.layers.1.self_attn.v_proj.weight shape: [512, 2048] +19: model.layers.10.input_layernorm.weight shape: [2048] +20: model.layers.10.mlp.down_proj.weight shape: [2048, 8192] +21: model.layers.10.mlp.gate_proj.weight shape: [8192, 2048] +22: model.layers.10.mlp.up_proj.weight shape: [8192, 2048] +23: model.layers.10.post_attention_layernorm.weight shape: [2048] +24: model.layers.10.self_attn.k_proj.weight shape: [512, 2048] +25: model.layers.10.self_attn.o_proj.weight shape: [2048, 2048] +26: model.layers.10.self_attn.q_proj.weight shape: [2048, 2048] +27: model.layers.10.self_attn.v_proj.weight shape: [512, 2048] +28: model.layers.11.input_layernorm.weight shape: [2048] +29: model.layers.11.mlp.down_proj.weight shape: [2048, 8192] +30: model.layers.11.mlp.gate_proj.weight shape: [8192, 2048] +31: model.layers.11.mlp.up_proj.weight shape: [8192, 2048] +32: model.layers.11.post_attention_layernorm.weight shape: [2048] +33: model.layers.11.self_attn.k_proj.weight shape: [512, 2048] +34: model.layers.11.self_attn.o_proj.weight shape: [2048, 2048] +35: model.layers.11.self_attn.q_proj.weight shape: [2048, 2048] +36: model.layers.11.self_attn.v_proj.weight shape: [512, 2048] +37: model.layers.12.input_layernorm.weight shape: [2048] +38: model.layers.12.mlp.down_proj.weight shape: [2048, 8192] +39: model.layers.12.mlp.gate_proj.weight shape: [8192, 2048] +40: model.layers.12.mlp.up_proj.weight shape: [8192, 2048] +41: model.layers.12.post_attention_layernorm.weight shape: [2048] +42: model.layers.12.self_attn.k_proj.weight shape: [512, 2048] +43: model.layers.12.self_attn.o_proj.weight shape: [2048, 2048] +44: model.layers.12.self_attn.q_proj.weight shape: [2048, 2048] +45: model.layers.12.self_attn.v_proj.weight shape: [512, 2048] +46: model.layers.13.input_layernorm.weight shape: [2048] +47: model.layers.13.mlp.down_proj.weight shape: [2048, 8192] +48: model.layers.13.mlp.gate_proj.weight shape: [8192, 2048] +49: model.layers.13.mlp.up_proj.weight shape: [8192, 2048] +50: model.layers.13.post_attention_layernorm.weight shape: [2048] +51: model.layers.13.self_attn.k_proj.weight shape: [512, 2048] +52: model.layers.13.self_attn.o_proj.weight shape: [2048, 2048] +53: model.layers.13.self_attn.q_proj.weight shape: [2048, 2048] +54: model.layers.13.self_attn.v_proj.weight shape: [512, 2048] +55: model.layers.14.input_layernorm.weight shape: [2048] +56: model.layers.14.mlp.down_proj.weight shape: [2048, 8192] +57: model.layers.14.mlp.gate_proj.weight shape: [8192, 2048] +58: model.layers.14.mlp.up_proj.weight shape: [8192, 2048] +59: model.layers.14.post_attention_layernorm.weight shape: [2048] +60: model.layers.14.self_attn.k_proj.weight shape: [512, 2048] +61: model.layers.14.self_attn.o_proj.weight shape: [2048, 2048] +62: model.layers.14.self_attn.q_proj.weight shape: [2048, 2048] +63: model.layers.14.self_attn.v_proj.weight shape: [512, 2048] +64: model.layers.15.input_layernorm.weight shape: [2048] +65: model.layers.15.mlp.down_proj.weight shape: [2048, 8192] +66: model.layers.15.mlp.gate_proj.weight shape: [8192, 2048] +67: model.layers.15.mlp.up_proj.weight shape: [8192, 2048] +68: model.layers.15.post_attention_layernorm.weight shape: [2048] +69: model.layers.15.self_attn.k_proj.weight shape: [512, 2048] +70: model.layers.15.self_attn.o_proj.weight shape: [2048, 2048] +71: model.layers.15.self_attn.q_proj.weight shape: [2048, 2048] +72: model.layers.15.self_attn.v_proj.weight shape: [512, 2048] +73: model.layers.2.input_layernorm.weight shape: [2048] +74: model.layers.2.mlp.down_proj.weight shape: [2048, 8192] +75: model.layers.2.mlp.gate_proj.weight shape: [8192, 2048] +76: model.layers.2.mlp.up_proj.weight shape: [8192, 2048] +77: model.layers.2.post_attention_layernorm.weight shape: [2048] +78: model.layers.2.self_attn.k_proj.weight shape: [512, 2048] +79: model.layers.2.self_attn.o_proj.weight shape: [2048, 2048] +80: model.layers.2.self_attn.q_proj.weight shape: [2048, 2048] +81: model.layers.2.self_attn.v_proj.weight shape: [512, 2048] +82: model.layers.3.input_layernorm.weight shape: [2048] +83: model.layers.3.mlp.down_proj.weight shape: [2048, 8192] +84: model.layers.3.mlp.gate_proj.weight shape: [8192, 2048] +85: model.layers.3.mlp.up_proj.weight shape: [8192, 2048] +86: model.layers.3.post_attention_layernorm.weight shape: [2048] +87: model.layers.3.self_attn.k_proj.weight shape: [512, 2048] +88: model.layers.3.self_attn.o_proj.weight shape: [2048, 2048] +89: model.layers.3.self_attn.q_proj.weight shape: [2048, 2048] +90: model.layers.3.self_attn.v_proj.weight shape: [512, 2048] +91: model.layers.4.input_layernorm.weight shape: [2048] +92: model.layers.4.mlp.down_proj.weight shape: [2048, 8192] +93: model.layers.4.mlp.gate_proj.weight shape: [8192, 2048] +94: model.layers.4.mlp.up_proj.weight shape: [8192, 2048] +95: model.layers.4.post_attention_layernorm.weight shape: [2048] +96: model.layers.4.self_attn.k_proj.weight shape: [512, 2048] +97: model.layers.4.self_attn.o_proj.weight shape: [2048, 2048] +98: model.layers.4.self_attn.q_proj.weight shape: [2048, 2048] +99: model.layers.4.self_attn.v_proj.weight shape: [512, 2048] +100: model.layers.5.input_layernorm.weight shape: [2048] +101: model.layers.5.mlp.down_proj.weight shape: [2048, 8192] +102: model.layers.5.mlp.gate_proj.weight shape: [8192, 2048] +103: model.layers.5.mlp.up_proj.weight shape: [8192, 2048] +104: model.layers.5.post_attention_layernorm.weight shape: [2048] +105: model.layers.5.self_attn.k_proj.weight shape: [512, 2048] +106: model.layers.5.self_attn.o_proj.weight shape: [2048, 2048] +107: model.layers.5.self_attn.q_proj.weight shape: [2048, 2048] +108: model.layers.5.self_attn.v_proj.weight shape: [512, 2048] +109: model.layers.6.input_layernorm.weight shape: [2048] +110: model.layers.6.mlp.down_proj.weight shape: [2048, 8192] +111: model.layers.6.mlp.gate_proj.weight shape: [8192, 2048] +112: model.layers.6.mlp.up_proj.weight shape: [8192, 2048] +113: model.layers.6.post_attention_layernorm.weight shape: [2048] +114: model.layers.6.self_attn.k_proj.weight shape: [512, 2048] +115: model.layers.6.self_attn.o_proj.weight shape: [2048, 2048] +116: model.layers.6.self_attn.q_proj.weight shape: [2048, 2048] +117: model.layers.6.self_attn.v_proj.weight shape: [512, 2048] +118: model.layers.7.input_layernorm.weight shape: [2048] +119: model.layers.7.mlp.down_proj.weight shape: [2048, 8192] +120: model.layers.7.mlp.gate_proj.weight shape: [8192, 2048] +121: model.layers.7.mlp.up_proj.weight shape: [8192, 2048] +122: model.layers.7.post_attention_layernorm.weight shape: [2048] +123: model.layers.7.self_attn.k_proj.weight shape: [512, 2048] +124: model.layers.7.self_attn.o_proj.weight shape: [2048, 2048] +125: model.layers.7.self_attn.q_proj.weight shape: [2048, 2048] +126: model.layers.7.self_attn.v_proj.weight shape: [512, 2048] +127: model.layers.8.input_layernorm.weight shape: [2048] +128: model.layers.8.mlp.down_proj.weight shape: [2048, 8192] +129: model.layers.8.mlp.gate_proj.weight shape: [8192, 2048] +130: model.layers.8.mlp.up_proj.weight shape: [8192, 2048] +131: model.layers.8.post_attention_layernorm.weight shape: [2048] +132: model.layers.8.self_attn.k_proj.weight shape: [512, 2048] +133: model.layers.8.self_attn.o_proj.weight shape: [2048, 2048] +134: model.layers.8.self_attn.q_proj.weight shape: [2048, 2048] +135: model.layers.8.self_attn.v_proj.weight shape: [512, 2048] +136: model.layers.9.input_layernorm.weight shape: [2048] +137: model.layers.9.mlp.down_proj.weight shape: [2048, 8192] +138: model.layers.9.mlp.gate_proj.weight shape: [8192, 2048] +139: model.layers.9.mlp.up_proj.weight shape: [8192, 2048] +140: model.layers.9.post_attention_layernorm.weight shape: [2048] +141: model.layers.9.self_attn.k_proj.weight shape: [512, 2048] +142: model.layers.9.self_attn.o_proj.weight shape: [2048, 2048] +143: model.layers.9.self_attn.q_proj.weight shape: [2048, 2048] +144: model.layers.9.self_attn.v_proj.weight shape: [512, 2048] +145: model.norm.weight shape: [2048] diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt new file mode 100644 index 0000000000..f26687dcb0 --- /dev/null +++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Approvals/LLaMA3_2Tests.Llama_3_2_3b_ShapeTest.approved.txt @@ -0,0 +1,254 @@ +0: model.embed_tokens.weight shape: [128256, 3072] +1: model.layers.0.input_layernorm.weight shape: [3072] +2: model.layers.0.mlp.down_proj.weight shape: [3072, 8192] +3: model.layers.0.mlp.gate_proj.weight shape: [8192, 3072] +4: model.layers.0.mlp.up_proj.weight shape: [8192, 3072] +5: model.layers.0.post_attention_layernorm.weight shape: [3072] +6: model.layers.0.self_attn.k_proj.weight shape: [1024, 3072] +7: model.layers.0.self_attn.o_proj.weight shape: [3072, 3072] +8: model.layers.0.self_attn.q_proj.weight shape: [3072, 3072] +9: model.layers.0.self_attn.v_proj.weight shape: [1024, 3072] +10: model.layers.1.input_layernorm.weight shape: [3072] +11: model.layers.1.mlp.down_proj.weight shape: [3072, 8192] +12: model.layers.1.mlp.gate_proj.weight shape: [8192, 3072] +13: model.layers.1.mlp.up_proj.weight shape: [8192, 3072] +14: model.layers.1.post_attention_layernorm.weight shape: [3072] +15: model.layers.1.self_attn.k_proj.weight shape: [1024, 3072] +16: model.layers.1.self_attn.o_proj.weight shape: [3072, 3072] +17: model.layers.1.self_attn.q_proj.weight shape: [3072, 3072] +18: model.layers.1.self_attn.v_proj.weight shape: [1024, 3072] +19: model.layers.10.input_layernorm.weight shape: [3072] +20: model.layers.10.mlp.down_proj.weight shape: [3072, 8192] +21: model.layers.10.mlp.gate_proj.weight shape: [8192, 3072] +22: model.layers.10.mlp.up_proj.weight shape: [8192, 3072] +23: model.layers.10.post_attention_layernorm.weight shape: [3072] +24: model.layers.10.self_attn.k_proj.weight shape: [1024, 3072] +25: model.layers.10.self_attn.o_proj.weight shape: [3072, 3072] +26: model.layers.10.self_attn.q_proj.weight shape: [3072, 3072] +27: model.layers.10.self_attn.v_proj.weight shape: [1024, 3072] +28: model.layers.11.input_layernorm.weight shape: [3072] +29: model.layers.11.mlp.down_proj.weight shape: [3072, 8192] +30: model.layers.11.mlp.gate_proj.weight shape: [8192, 3072] +31: model.layers.11.mlp.up_proj.weight shape: [8192, 3072] +32: model.layers.11.post_attention_layernorm.weight shape: [3072] +33: model.layers.11.self_attn.k_proj.weight shape: [1024, 3072] +34: model.layers.11.self_attn.o_proj.weight shape: [3072, 3072] +35: model.layers.11.self_attn.q_proj.weight shape: [3072, 3072] +36: model.layers.11.self_attn.v_proj.weight shape: [1024, 3072] +37: model.layers.12.input_layernorm.weight shape: [3072] +38: model.layers.12.mlp.down_proj.weight shape: [3072, 8192] +39: model.layers.12.mlp.gate_proj.weight shape: [8192, 3072] +40: model.layers.12.mlp.up_proj.weight shape: [8192, 3072] +41: model.layers.12.post_attention_layernorm.weight shape: [3072] +42: model.layers.12.self_attn.k_proj.weight shape: [1024, 3072] +43: model.layers.12.self_attn.o_proj.weight shape: [3072, 3072] +44: model.layers.12.self_attn.q_proj.weight shape: [3072, 3072] +45: model.layers.12.self_attn.v_proj.weight shape: [1024, 3072] +46: model.layers.13.input_layernorm.weight shape: [3072] +47: model.layers.13.mlp.down_proj.weight shape: [3072, 8192] +48: model.layers.13.mlp.gate_proj.weight shape: [8192, 3072] +49: model.layers.13.mlp.up_proj.weight shape: [8192, 3072] +50: model.layers.13.post_attention_layernorm.weight shape: [3072] +51: model.layers.13.self_attn.k_proj.weight shape: [1024, 3072] +52: model.layers.13.self_attn.o_proj.weight shape: [3072, 3072] +53: model.layers.13.self_attn.q_proj.weight shape: [3072, 3072] +54: model.layers.13.self_attn.v_proj.weight shape: [1024, 3072] +55: model.layers.14.input_layernorm.weight shape: [3072] +56: model.layers.14.mlp.down_proj.weight shape: [3072, 8192] +57: model.layers.14.mlp.gate_proj.weight shape: [8192, 3072] +58: model.layers.14.mlp.up_proj.weight shape: [8192, 3072] +59: model.layers.14.post_attention_layernorm.weight shape: [3072] +60: model.layers.14.self_attn.k_proj.weight shape: [1024, 3072] +61: model.layers.14.self_attn.o_proj.weight shape: [3072, 3072] +62: model.layers.14.self_attn.q_proj.weight shape: [3072, 3072] +63: model.layers.14.self_attn.v_proj.weight shape: [1024, 3072] +64: model.layers.15.input_layernorm.weight shape: [3072] +65: model.layers.15.mlp.down_proj.weight shape: [3072, 8192] +66: model.layers.15.mlp.gate_proj.weight shape: [8192, 3072] +67: model.layers.15.mlp.up_proj.weight shape: [8192, 3072] +68: model.layers.15.post_attention_layernorm.weight shape: [3072] +69: model.layers.15.self_attn.k_proj.weight shape: [1024, 3072] +70: model.layers.15.self_attn.o_proj.weight shape: [3072, 3072] +71: model.layers.15.self_attn.q_proj.weight shape: [3072, 3072] +72: model.layers.15.self_attn.v_proj.weight shape: [1024, 3072] +73: model.layers.16.input_layernorm.weight shape: [3072] +74: model.layers.16.mlp.down_proj.weight shape: [3072, 8192] +75: model.layers.16.mlp.gate_proj.weight shape: [8192, 3072] +76: model.layers.16.mlp.up_proj.weight shape: [8192, 3072] +77: model.layers.16.post_attention_layernorm.weight shape: [3072] +78: model.layers.16.self_attn.k_proj.weight shape: [1024, 3072] +79: model.layers.16.self_attn.o_proj.weight shape: [3072, 3072] +80: model.layers.16.self_attn.q_proj.weight shape: [3072, 3072] +81: model.layers.16.self_attn.v_proj.weight shape: [1024, 3072] +82: model.layers.17.input_layernorm.weight shape: [3072] +83: model.layers.17.mlp.down_proj.weight shape: [3072, 8192] +84: model.layers.17.mlp.gate_proj.weight shape: [8192, 3072] +85: model.layers.17.mlp.up_proj.weight shape: [8192, 3072] +86: model.layers.17.post_attention_layernorm.weight shape: [3072] +87: model.layers.17.self_attn.k_proj.weight shape: [1024, 3072] +88: model.layers.17.self_attn.o_proj.weight shape: [3072, 3072] +89: model.layers.17.self_attn.q_proj.weight shape: [3072, 3072] +90: model.layers.17.self_attn.v_proj.weight shape: [1024, 3072] +91: model.layers.18.input_layernorm.weight shape: [3072] +92: model.layers.18.mlp.down_proj.weight shape: [3072, 8192] +93: model.layers.18.mlp.gate_proj.weight shape: [8192, 3072] +94: model.layers.18.mlp.up_proj.weight shape: [8192, 3072] +95: model.layers.18.post_attention_layernorm.weight shape: [3072] +96: model.layers.18.self_attn.k_proj.weight shape: [1024, 3072] +97: model.layers.18.self_attn.o_proj.weight shape: [3072, 3072] +98: model.layers.18.self_attn.q_proj.weight shape: [3072, 3072] +99: model.layers.18.self_attn.v_proj.weight shape: [1024, 3072] +100: model.layers.19.input_layernorm.weight shape: [3072] +101: model.layers.19.mlp.down_proj.weight shape: [3072, 8192] +102: model.layers.19.mlp.gate_proj.weight shape: [8192, 3072] +103: model.layers.19.mlp.up_proj.weight shape: [8192, 3072] +104: model.layers.19.post_attention_layernorm.weight shape: [3072] +105: model.layers.19.self_attn.k_proj.weight shape: [1024, 3072] +106: model.layers.19.self_attn.o_proj.weight shape: [3072, 3072] +107: model.layers.19.self_attn.q_proj.weight shape: [3072, 3072] +108: model.layers.19.self_attn.v_proj.weight shape: [1024, 3072] +109: model.layers.2.input_layernorm.weight shape: [3072] +110: model.layers.2.mlp.down_proj.weight shape: [3072, 8192] +111: model.layers.2.mlp.gate_proj.weight shape: [8192, 3072] +112: model.layers.2.mlp.up_proj.weight shape: [8192, 3072] +113: model.layers.2.post_attention_layernorm.weight shape: [3072] +114: model.layers.2.self_attn.k_proj.weight shape: [1024, 3072] +115: model.layers.2.self_attn.o_proj.weight shape: [3072, 3072] +116: model.layers.2.self_attn.q_proj.weight shape: [3072, 3072] +117: model.layers.2.self_attn.v_proj.weight shape: [1024, 3072] +118: model.layers.20.input_layernorm.weight shape: [3072] +119: model.layers.20.mlp.down_proj.weight shape: [3072, 8192] +120: model.layers.20.mlp.gate_proj.weight shape: [8192, 3072] +121: model.layers.20.mlp.up_proj.weight shape: [8192, 3072] +122: model.layers.20.post_attention_layernorm.weight shape: [3072] +123: model.layers.20.self_attn.k_proj.weight shape: [1024, 3072] +124: model.layers.20.self_attn.o_proj.weight shape: [3072, 3072] +125: model.layers.20.self_attn.q_proj.weight shape: [3072, 3072] +126: model.layers.20.self_attn.v_proj.weight shape: [1024, 3072] +127: model.layers.21.input_layernorm.weight shape: [3072] +128: model.layers.21.mlp.down_proj.weight shape: [3072, 8192] +129: model.layers.21.mlp.gate_proj.weight shape: [8192, 3072] +130: model.layers.21.mlp.up_proj.weight shape: [8192, 3072] +131: model.layers.21.post_attention_layernorm.weight shape: [3072] +132: model.layers.21.self_attn.k_proj.weight shape: [1024, 3072] +133: model.layers.21.self_attn.o_proj.weight shape: [3072, 3072] +134: model.layers.21.self_attn.q_proj.weight shape: [3072, 3072] +135: model.layers.21.self_attn.v_proj.weight shape: [1024, 3072] +136: model.layers.22.input_layernorm.weight shape: [3072] +137: model.layers.22.mlp.down_proj.weight shape: [3072, 8192] +138: model.layers.22.mlp.gate_proj.weight shape: [8192, 3072] +139: model.layers.22.mlp.up_proj.weight shape: [8192, 3072] +140: model.layers.22.post_attention_layernorm.weight shape: [3072] +141: model.layers.22.self_attn.k_proj.weight shape: [1024, 3072] +142: model.layers.22.self_attn.o_proj.weight shape: [3072, 3072] +143: model.layers.22.self_attn.q_proj.weight shape: [3072, 3072] +144: model.layers.22.self_attn.v_proj.weight shape: [1024, 3072] +145: model.layers.23.input_layernorm.weight shape: [3072] +146: model.layers.23.mlp.down_proj.weight shape: [3072, 8192] +147: model.layers.23.mlp.gate_proj.weight shape: [8192, 3072] +148: model.layers.23.mlp.up_proj.weight shape: [8192, 3072] +149: model.layers.23.post_attention_layernorm.weight shape: [3072] +150: model.layers.23.self_attn.k_proj.weight shape: [1024, 3072] +151: model.layers.23.self_attn.o_proj.weight shape: [3072, 3072] +152: model.layers.23.self_attn.q_proj.weight shape: [3072, 3072] +153: model.layers.23.self_attn.v_proj.weight shape: [1024, 3072] +154: model.layers.24.input_layernorm.weight shape: [3072] +155: model.layers.24.mlp.down_proj.weight shape: [3072, 8192] +156: model.layers.24.mlp.gate_proj.weight shape: [8192, 3072] +157: model.layers.24.mlp.up_proj.weight shape: [8192, 3072] +158: model.layers.24.post_attention_layernorm.weight shape: [3072] +159: model.layers.24.self_attn.k_proj.weight shape: [1024, 3072] +160: model.layers.24.self_attn.o_proj.weight shape: [3072, 3072] +161: model.layers.24.self_attn.q_proj.weight shape: [3072, 3072] +162: model.layers.24.self_attn.v_proj.weight shape: [1024, 3072] +163: model.layers.25.input_layernorm.weight shape: [3072] +164: model.layers.25.mlp.down_proj.weight shape: [3072, 8192] +165: model.layers.25.mlp.gate_proj.weight shape: [8192, 3072] +166: model.layers.25.mlp.up_proj.weight shape: [8192, 3072] +167: model.layers.25.post_attention_layernorm.weight shape: [3072] +168: model.layers.25.self_attn.k_proj.weight shape: [1024, 3072] +169: model.layers.25.self_attn.o_proj.weight shape: [3072, 3072] +170: model.layers.25.self_attn.q_proj.weight shape: [3072, 3072] +171: model.layers.25.self_attn.v_proj.weight shape: [1024, 3072] +172: model.layers.26.input_layernorm.weight shape: [3072] +173: model.layers.26.mlp.down_proj.weight shape: [3072, 8192] +174: model.layers.26.mlp.gate_proj.weight shape: [8192, 3072] +175: model.layers.26.mlp.up_proj.weight shape: [8192, 3072] +176: model.layers.26.post_attention_layernorm.weight shape: [3072] +177: model.layers.26.self_attn.k_proj.weight shape: [1024, 3072] +178: model.layers.26.self_attn.o_proj.weight shape: [3072, 3072] +179: model.layers.26.self_attn.q_proj.weight shape: [3072, 3072] +180: model.layers.26.self_attn.v_proj.weight shape: [1024, 3072] +181: model.layers.27.input_layernorm.weight shape: [3072] +182: model.layers.27.mlp.down_proj.weight shape: [3072, 8192] +183: model.layers.27.mlp.gate_proj.weight shape: [8192, 3072] +184: model.layers.27.mlp.up_proj.weight shape: [8192, 3072] +185: model.layers.27.post_attention_layernorm.weight shape: [3072] +186: model.layers.27.self_attn.k_proj.weight shape: [1024, 3072] +187: model.layers.27.self_attn.o_proj.weight shape: [3072, 3072] +188: model.layers.27.self_attn.q_proj.weight shape: [3072, 3072] +189: model.layers.27.self_attn.v_proj.weight shape: [1024, 3072] +190: model.layers.3.input_layernorm.weight shape: [3072] +191: model.layers.3.mlp.down_proj.weight shape: [3072, 8192] +192: model.layers.3.mlp.gate_proj.weight shape: [8192, 3072] +193: model.layers.3.mlp.up_proj.weight shape: [8192, 3072] +194: model.layers.3.post_attention_layernorm.weight shape: [3072] +195: model.layers.3.self_attn.k_proj.weight shape: [1024, 3072] +196: model.layers.3.self_attn.o_proj.weight shape: [3072, 3072] +197: model.layers.3.self_attn.q_proj.weight shape: [3072, 3072] +198: model.layers.3.self_attn.v_proj.weight shape: [1024, 3072] +199: model.layers.4.input_layernorm.weight shape: [3072] +200: model.layers.4.mlp.down_proj.weight shape: [3072, 8192] +201: model.layers.4.mlp.gate_proj.weight shape: [8192, 3072] +202: model.layers.4.mlp.up_proj.weight shape: [8192, 3072] +203: model.layers.4.post_attention_layernorm.weight shape: [3072] +204: model.layers.4.self_attn.k_proj.weight shape: [1024, 3072] +205: model.layers.4.self_attn.o_proj.weight shape: [3072, 3072] +206: model.layers.4.self_attn.q_proj.weight shape: [3072, 3072] +207: model.layers.4.self_attn.v_proj.weight shape: [1024, 3072] +208: model.layers.5.input_layernorm.weight shape: [3072] +209: model.layers.5.mlp.down_proj.weight shape: [3072, 8192] +210: model.layers.5.mlp.gate_proj.weight shape: [8192, 3072] +211: model.layers.5.mlp.up_proj.weight shape: [8192, 3072] +212: model.layers.5.post_attention_layernorm.weight shape: [3072] +213: model.layers.5.self_attn.k_proj.weight shape: [1024, 3072] +214: model.layers.5.self_attn.o_proj.weight shape: [3072, 3072] +215: model.layers.5.self_attn.q_proj.weight shape: [3072, 3072] +216: model.layers.5.self_attn.v_proj.weight shape: [1024, 3072] +217: model.layers.6.input_layernorm.weight shape: [3072] +218: model.layers.6.mlp.down_proj.weight shape: [3072, 8192] +219: model.layers.6.mlp.gate_proj.weight shape: [8192, 3072] +220: model.layers.6.mlp.up_proj.weight shape: [8192, 3072] +221: model.layers.6.post_attention_layernorm.weight shape: [3072] +222: model.layers.6.self_attn.k_proj.weight shape: [1024, 3072] +223: model.layers.6.self_attn.o_proj.weight shape: [3072, 3072] +224: model.layers.6.self_attn.q_proj.weight shape: [3072, 3072] +225: model.layers.6.self_attn.v_proj.weight shape: [1024, 3072] +226: model.layers.7.input_layernorm.weight shape: [3072] +227: model.layers.7.mlp.down_proj.weight shape: [3072, 8192] +228: model.layers.7.mlp.gate_proj.weight shape: [8192, 3072] +229: model.layers.7.mlp.up_proj.weight shape: [8192, 3072] +230: model.layers.7.post_attention_layernorm.weight shape: [3072] +231: model.layers.7.self_attn.k_proj.weight shape: [1024, 3072] +232: model.layers.7.self_attn.o_proj.weight shape: [3072, 3072] +233: model.layers.7.self_attn.q_proj.weight shape: [3072, 3072] +234: model.layers.7.self_attn.v_proj.weight shape: [1024, 3072] +235: model.layers.8.input_layernorm.weight shape: [3072] +236: model.layers.8.mlp.down_proj.weight shape: [3072, 8192] +237: model.layers.8.mlp.gate_proj.weight shape: [8192, 3072] +238: model.layers.8.mlp.up_proj.weight shape: [8192, 3072] +239: model.layers.8.post_attention_layernorm.weight shape: [3072] +240: model.layers.8.self_attn.k_proj.weight shape: [1024, 3072] +241: model.layers.8.self_attn.o_proj.weight shape: [3072, 3072] +242: model.layers.8.self_attn.q_proj.weight shape: [3072, 3072] +243: model.layers.8.self_attn.v_proj.weight shape: [1024, 3072] +244: model.layers.9.input_layernorm.weight shape: [3072] +245: model.layers.9.mlp.down_proj.weight shape: [3072, 8192] +246: model.layers.9.mlp.gate_proj.weight shape: [8192, 3072] +247: model.layers.9.mlp.up_proj.weight shape: [8192, 3072] +248: model.layers.9.post_attention_layernorm.weight shape: [3072] +249: model.layers.9.self_attn.k_proj.weight shape: [1024, 3072] +250: model.layers.9.self_attn.o_proj.weight shape: [3072, 3072] +251: model.layers.9.self_attn.q_proj.weight shape: [3072, 3072] +252: model.layers.9.self_attn.v_proj.weight shape: [1024, 3072] +253: model.norm.weight shape: [3072] diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs b/test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs new file mode 100644 index 0000000000..6babd14910 --- /dev/null +++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/LLaMA3_2Tests.cs @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using ApprovalTests; +using ApprovalTests.Namers; +using ApprovalTests.Reporters; +using Microsoft.ML.GenAI.Core.Extension; +using TorchSharp; +using Xunit; + +namespace Microsoft.ML.GenAI.LLaMA.Tests; + +[Collection("NoParallelization")] +public class LLaMA3_2Tests +{ + public LLaMA3_2Tests() + { + if (Environment.GetEnvironmentVariable("HELIX_CORRELATION_ID") != null) + { + Approvals.UseAssemblyLocationForApprovedFiles(); + } + + torch.set_default_device("meta"); + } + + [Fact] + [UseReporter(typeof(DiffReporter))] + [UseApprovalSubdirectory("Approvals")] + public void Llama_3_2_1b_ShapeTest() + { + var model = new LlamaForCausalLM(LlamaConfig.Llama3_2_1B_Instruct); + var stateDictStr = model.PeekShape(); + Approvals.Verify(stateDictStr); + } + + [WindowsOnlyFact] + [UseReporter(typeof(DiffReporter))] + [UseApprovalSubdirectory("Approvals")] + public void Llama_3_2_3b_ShapeTest() + { + var model = new LlamaForCausalLM(LlamaConfig.Llama_3_2_3B_Instruct); + var stateDictStr = model.PeekShape(); + Approvals.Verify(stateDictStr); + } +} diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj b/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj index 1f5948ca76..d135f09bbb 100644 --- a/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj +++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj @@ -27,6 +27,7 @@ +