Skip to content

Commit

Permalink
[GenAI] pack GenAI core package (#7246)
Browse files Browse the repository at this point in the history
* update

* enable llama3_2

* fix tests

* pack GenAI core
  • Loading branch information
LittleLittleCloud authored Sep 27, 2024
1 parent 817a77f commit be1e428
Show file tree
Hide file tree
Showing 15 changed files with 591 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,25 @@ namespace Microsoft.ML.GenAI.Samples.Llama;

internal class LlamaSample
{
public static async void Run()
public static async Task RunLlama(string weightFolder, string checkPointName = "model.safetensors.index.json")
{
var device = "cuda";
if (device == "cuda")
{
torch.InitializeDeviceType(DeviceType.CUDA);
}

var defaultType = ScalarType.Float16;
var defaultType = ScalarType.BFloat16;
torch.manual_seed(1);
torch.set_default_dtype(defaultType);
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Meta-Llama-3.1-8B-Instruct";
var configName = "config.json";
var originalWeightFolder = Path.Combine(weightFolder, "original");

Console.WriteLine("Loading Llama from huggingface model weight folder");
var stopWatch = System.Diagnostics.Stopwatch.StartNew();
stopWatch.Start();
var tokenizer = LlamaTokenizerHelper.FromPretrained(originalWeightFolder);
var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, layersOnTargetDevice: -1);
var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, checkPointName: checkPointName, layersOnTargetDevice: 26, quantizeToInt8: true);

var pipeline = new CausalLMPipeline<TiktokenTokenizer, LlamaForCausalLM>(tokenizer, model, device);

Expand Down
5 changes: 2 additions & 3 deletions docs/samples/Microsoft.ML.GenAI.Samples/Program.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// See https://aka.ms/new-console-template for more information
using Microsoft.ML.GenAI.Samples.Mistral;
using Microsoft.ML.GenAI.Samples.Phi3Mini;
using Microsoft.ML.GenAI.Samples.Llama;

await Mistral_7B_Instruct.WeatherChatAsync();
await LlamaSample.RunLlama(@"C:\Users\xiaoyuz\source\repos\Llama-3.2-3B-Instruct");
2 changes: 1 addition & 1 deletion src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<IsPackable>false</IsPackable>
<IsPackable>true</IsPackable>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
</PropertyGroup>
Expand Down
14 changes: 14 additions & 0 deletions src/Microsoft.ML.GenAI.LLaMA/LlamaConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,15 @@ static LlamaConfig()
var llama3_1_8b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-8B-Instruct.json");
var llama3_1_70b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-70B-Instruct.json");
var llama3_1_405b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.1-405B-Instruct.json");
var llama3_2_1b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.2-1B-Instruct.json");
var llama3_2_3b_content = Utils.GetEmbeddedResource("Microsoft.ML.GenAI.LLaMA.Resource.Config.meta-llama-3.2-3B-Instruct.json");
#pragma warning restore MSML_ParameterLocalVarName // Parameter or local variable name not standard

Llama3_1_8B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_1_8b_content) ?? throw new ArgumentNullException(nameof(llama3_1_8b_content));
Llama3_1_70B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_1_70b_content) ?? throw new ArgumentNullException(nameof(llama3_1_70b_content));
Llama3_1_405B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_1_405b_content) ?? throw new ArgumentNullException(nameof(llama3_1_405b_content));
Llama3_2_1B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_2_1b_content) ?? throw new ArgumentNullException(nameof(llama3_2_1b_content));
Llama_3_2_3B_Instruct = JsonSerializer.Deserialize<LlamaConfig>(llama3_2_3b_content) ?? throw new ArgumentNullException(nameof(llama3_2_3b_content));
}

#pragma warning disable MSML_GeneralName // This name should be PascalCased
Expand All @@ -67,6 +71,16 @@ static LlamaConfig()
/// The llama-3.1-405B-Instruct configuration created from https://huggingface.co/meta-llama/Meta-Llama-3.1-405B.
/// </summary>
public static LlamaConfig Llama3_1_405B_Instruct { get; }

/// <summary>
/// The llama-3.2-3B-Instruct configuration created from https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct.
/// </summary>
public static LlamaConfig Llama_3_2_3B_Instruct { get; }

/// <summary>
/// The llama-3.2-1B-Instruct configuration created from https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct.
/// </summary>
public static LlamaConfig Llama3_2_1B_Instruct { get; }
#pragma warning restore MSML_GeneralName // This name should be PascalCased

[JsonPropertyName("attention_bias")]
Expand Down
46 changes: 43 additions & 3 deletions src/Microsoft.ML.GenAI.LLaMA/LlamaForCausalLM.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Microsoft.ML.GenAI.Core.Extension;
using Microsoft.ML.GenAI.LLaMA.Module;
using TorchSharp;
using TorchSharp.Modules;
using TorchSharp.PyBridge;
using static TorchSharp.torch;

Expand All @@ -19,7 +20,7 @@ public class LlamaForCausalLM : nn.Module<CausalLMModelInput, CausalLMModelOutpu
private readonly int _vocabSize;

#pragma warning disable MSML_PrivateFieldName // Private field name not in: _camelCase format
private readonly GenAILinear lm_head;
private readonly Linear lm_head;
private readonly LlamaModel model;
#pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format

Expand All @@ -30,9 +31,29 @@ public LlamaForCausalLM(LlamaConfig config, string? device = null)
_vocabSize = config.VocabSize;

model = new LlamaModel(config, device);
lm_head = new GenAILinear(config.HiddenSize, config.VocabSize, hasBias: false);

this.RegisterComponents();
// When tie word embeddings is true, the lm_head shares the same weight as the embedding layer.
// therefore, the lm_head weight won't be initialized here.
// instead, it will be loaded from the embedding layer after the model is loaded.
if (config.TieWordEmbeddings)
{
this.RegisterComponents();
lm_head = nn.Linear(config.HiddenSize, config.VocabSize, hasBias: false, dtype: config.DType);
}
else
{
lm_head = nn.Linear(config.HiddenSize, config.VocabSize, hasBias: false, dtype: config.DType);
this.RegisterComponents();
}

}

private void TieWordEmbeddings()
{
var embeddingWeight = model.Embedding.state_dict();
this.lm_head.load_state_dict(embeddingWeight);

this.lm_head.to(device: model.Embedding.weight!.device);
}

#pragma warning disable MSML_GeneralName // This name should be PascalCased
Expand Down Expand Up @@ -61,6 +82,11 @@ public static LlamaForCausalLM FromPretrained(

model.LoadSafeTensors(modelFolder, checkPointName);
model = model.to(device);
if (modelConfig.TieWordEmbeddings)
{
model.TieWordEmbeddings();
}


return model;
}
Expand Down Expand Up @@ -107,8 +133,22 @@ public static LlamaForCausalLM FromPretrained(

model.LoadSafeTensors(modelFolder, checkPointName);

if (quantizeToInt8)
{
model.ToInt8QuantizeModule();
}
else if (quantizeToInt4)
{
model.ToInt4QuantizeModule();
}

model = model.ToDynamicLoadingModel(deviceMap, targetDevice);

if (modelConfig.TieWordEmbeddings)
{
model.TieWordEmbeddings();
}

torch.set_default_device(originalDefaultDevice);

return model;
Expand Down
9 changes: 6 additions & 3 deletions src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" PrivateAssets="all" />
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" PrivateAssets="all" />
<ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
<ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="Resource\Config\*.json" />
</ItemGroup>

<ItemGroup>
<None Remove="Resource\Config\meta-llama-3.2-1B-Instruct.json" />
<None Remove="Resource\Config\meta-llama-3.2-3B-Instruct.json" />
</ItemGroup>

</Project>
4 changes: 3 additions & 1 deletion src/Microsoft.ML.GenAI.LLaMA/Module/LlamaModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public LlamaModel(LlamaConfig config, string? device = null)
this._paddingIdx = config.PadTokenId;
this._vocabSize = config.VocabSize;
var headDim = config.HiddenSize / config.NumAttentionHeads;
this.embed_tokens = nn.Embedding(config.VocabSize, config.HiddenSize, padding_idx: this._paddingIdx, dtype: config.DType, device: device);
this.embed_tokens = nn.Embedding(config.VocabSize, config.HiddenSize, padding_idx: this._paddingIdx, dtype: config.DType);
this.layers = new ModuleList<LlamaDecoderLayer>();

for (int i = 0; i < config.NumHiddenLayers; i++)
Expand All @@ -47,6 +47,8 @@ public LlamaModel(LlamaConfig config, string? device = null)
};
}

public Embedding Embedding => this.embed_tokens;

#pragma warning disable MSML_GeneralName // This name should be PascalCased
public override CausalLMModelOutput forward(CausalLMModelInput input)
#pragma warning restore MSML_GeneralName // This name should be PascalCased
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": [
128001,
128008,
128009
],
"head_dim": 64,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 8192,
"max_position_embeddings": 131072,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 16,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"use_cache": true,
"vocab_size": 128256
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": [
128001,
128008,
128009
],
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 3072,
"initializer_range": 0.02,
"intermediate_size": 8192,
"max_position_embeddings": 131072,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 24,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"use_cache": true,
"vocab_size": 128256
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" PrivateAssets="all" />
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" PrivateAssets="all" />
<ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
<ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
</ItemGroup>

<ItemGroup>
Expand Down
4 changes: 1 addition & 3 deletions src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" PrivateAssets="all" />
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" PrivateAssets="all" />
<ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
<ProjectReference Include="..\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
</ItemGroup>

<ItemGroup>
Expand Down
Loading

0 comments on commit be1e428

Please sign in to comment.