From 5d966605e9429d864922859451d58f79f07f21d6 Mon Sep 17 00:00:00 2001 From: Andrei-Aksionov <58434077+Andrei-Aksionov@users.noreply.github.com> Date: Wed, 24 Apr 2024 00:51:14 +0300 Subject: [PATCH] Phi (tests): create a class directly from HF (#1343) --- .gitignore | 2 - tests/test_convert_lit_checkpoint.py | 70 ++------------------- tests/test_model.py | 91 +++------------------------- 3 files changed, 15 insertions(+), 148 deletions(-) diff --git a/.gitignore b/.gitignore index dbb8acd6ab..a2e84c57ad 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,3 @@ checkpoints out wandb events.out.tfevents* - -tests/reference_models diff --git a/tests/test_convert_lit_checkpoint.py b/tests/test_convert_lit_checkpoint.py index f44609a4f1..ca4ee9881e 100644 --- a/tests/test_convert_lit_checkpoint.py +++ b/tests/test_convert_lit_checkpoint.py @@ -222,73 +222,13 @@ def test_against_original_open_llama_3b(): @torch.inference_mode() -def test_against_hf_phi_1_5(): - wd = Path(__file__).parent.parent.absolute() - workdir = wd / "tests" / "reference_models" - workdir.mkdir(parents=True, exist_ok=True) - file_paths = [workdir / "original_phi_1_5.py", workdir / "configuration_phi.py"] - urls = [ - "https://huggingface.co/microsoft/phi-1_5/raw/main/modeling_phi.py", - "https://huggingface.co/microsoft/phi-1_5/raw/main/configuration_phi.py", - ] - for file_path, url in zip(file_paths, urls): - if not file_path.is_file(): - urlretrieve(url=url, filename=file_path) - - from reference_models.configuration_phi import PhiConfig - from reference_models.original_phi_1_5 import PhiForCausalLM +@pytest.mark.parametrize("model_name", ("phi-1_5", "phi-2")) +def test_against_hf_phi(model_name): + from transformers.models.phi.configuration_phi import PhiConfig + from transformers.models.phi.modeling_phi import PhiForCausalLM ours_config = Config.from_name( - "phi-1_5", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5 - ) - T = 5 - theirs_config = PhiConfig( - vocab_size=ours_config.padded_vocab_size, - max_position_embeddings=ours_config.block_size, - hidden_size=ours_config.n_embd, - intermediate_size=ours_config.intermediate_size, - num_attention_heads=ours_config.n_head, - num_hidden_layers=ours_config.n_layer, - partial_rotary_factor=ours_config.rotary_percentage, - ) - - ours_model = GPT(ours_config) - ours_state_dict = ours_model.state_dict() - theirs_state_dict = {} - copy_weights_phi(ours_config, theirs_state_dict, ours_state_dict) - theirs_model = PhiForCausalLM(theirs_config) - # strict=False because we don't save the rotary embeddings inv frequency - keys = theirs_model.load_state_dict(theirs_state_dict, strict=False) - assert not keys.unexpected_keys - assert all("inv_freq" in k for k in keys.missing_keys) - - # test end to end - x = torch.tensor([[9856, 23, 491, 1536, 304]], dtype=torch.int32) - assert x.size(1) == T - ours_y = ours_model(x) - theirs_y = theirs_model(x)["logits"] - torch.testing.assert_close(ours_y, theirs_y) - - -@torch.inference_mode() -def test_against_hf_phi_2(): - wd = Path(__file__).parent.parent.absolute() - workdir = wd / "tests" / "reference_models" - workdir.mkdir(parents=True, exist_ok=True) - file_paths = [workdir / "original_phi_2.py", workdir / "configuration_phi.py"] - urls = [ - "https://huggingface.co/microsoft/phi-2/raw/main/modeling_phi.py", - "https://huggingface.co/microsoft/phi-2/raw/main/configuration_phi.py", - ] - for file_path, url in zip(file_paths, urls): - if not file_path.is_file(): - urlretrieve(url=url, filename=file_path) - - from reference_models.configuration_phi import PhiConfig - from reference_models.original_phi_2 import PhiForCausalLM - - ours_config = Config.from_name( - "phi-2", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5 + model_name, padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5 ) T = 5 theirs_config = PhiConfig( diff --git a/tests/test_model.py b/tests/test_model.py index 0537098342..7743c4f143 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -207,11 +207,11 @@ def test_against_original_open_llama_3b(device, dtype): @pytest.mark.parametrize( "ours_kwargs", [ - {"name": "Llama-2-7b-hf"}, - {"name": "CodeLlama-7b-hf"}, - {"name": "Llama-2-70b-chat-hf", "n_query_groups": 1}, - {"name": "Llama-3-8B"}, - {"name": "Llama-3-8B-Instruct"} + {"name": "Llama-2-7b-hf"}, + {"name": "CodeLlama-7b-hf"}, + {"name": "Llama-2-70b-chat-hf", "n_query_groups": 1}, + {"name": "Llama-3-8B"}, + {"name": "Llama-3-8B-Instruct"}, ], ) @pytest.mark.parametrize( @@ -267,6 +267,7 @@ def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype): @torch.inference_mode() +@pytest.mark.parametrize("model_name", ("phi-1_5", "phi-2")) @pytest.mark.parametrize( ("device", "dtype"), [ @@ -278,86 +279,14 @@ def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype): ), ], ) -def test_against_hf_phi_1_5(device, dtype): - wd = Path(__file__).parent.parent.resolve() - workdir = wd / "tests" / "reference_models" - workdir.mkdir(parents=True, exist_ok=True) - file_paths = [workdir / "original_phi_1_5.py", workdir / "configuration_phi.py"] - urls = [ - "https://huggingface.co/microsoft/phi-1_5/raw/main/modeling_phi.py", - "https://huggingface.co/microsoft/phi-1_5/raw/main/configuration_phi.py", - ] - for file_path, url in zip(file_paths, urls): - if not file_path.is_file(): - urlretrieve(url=url, filename=file_path) - - from reference_models.configuration_phi import PhiConfig - from reference_models.original_phi_1_5 import PhiForCausalLM +def test_against_hf_phi(model_name, device, dtype): + from transformers.models.phi.configuration_phi import PhiConfig + from transformers.models.phi.modeling_phi import PhiForCausalLM torch.set_default_dtype(dtype) ours_config = Config.from_name( - "phi-1_5", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5 - ) - T = 5 - theirs_config = PhiConfig( - vocab_size=ours_config.padded_vocab_size, - max_position_embeddings=ours_config.block_size, - hidden_size=ours_config.n_embd, - intermediate_size=ours_config.intermediate_size, - num_attention_heads=ours_config.n_head, - num_hidden_layers=ours_config.n_layer, - partial_rotary_factor=ours_config.rotary_percentage, - torch_dtype=dtype, - ) - - theirs_model = PhiForCausalLM(theirs_config).to(device) - theirs_state_dict = theirs_model.state_dict() - state_dict = {} - copy_weights_phi(ours_config, {}, state_dict, theirs_state_dict) - ours_model = GPT(ours_config).to(device) - ours_model.load_state_dict(state_dict) - - # test end to end - x = torch.tensor([[9856, 23, 491, 1536, 304]], dtype=torch.int32, device=device) - assert x.size(1) == T - ours_y = ours_model(x) - theirs_y = theirs_model(x)["logits"].to(dtype) # HF converts logits to float - torch.testing.assert_close(ours_y, theirs_y) - - -@torch.inference_mode() -@pytest.mark.parametrize( - ("device", "dtype"), - [ - (torch.device("cpu"), torch.float32), - pytest.param( - torch.device("cuda"), - torch.float16, - marks=[pytest.mark.xfail(raises=AssertionError, strict=False), RunIf(min_cuda_gpus=1)], - ), - ], -) -def test_against_hf_phi_2(device, dtype): - wd = Path(__file__).parent.parent.resolve() - workdir = wd / "tests" / "reference_models" - workdir.mkdir(parents=True, exist_ok=True) - file_paths = [workdir / "original_phi_2.py", workdir / "configuration_phi.py"] - urls = [ - "https://huggingface.co/microsoft/phi-2/raw/main/modeling_phi.py", - "https://huggingface.co/microsoft/phi-2/raw/main/configuration_phi.py", - ] - for file_path, url in zip(file_paths, urls): - if not file_path.is_file(): - urlretrieve(url=url, filename=file_path) - - from reference_models.configuration_phi import PhiConfig - from reference_models.original_phi_2 import PhiForCausalLM - - torch.set_default_dtype(dtype) - - ours_config = Config.from_name( - "phi-2", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5 + model_name, padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5 ) T = 5 theirs_config = PhiConfig(