Skip to content

Commit

Permalink
Phi (tests): create a class directly from HF (#1343)
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrei-Aksionov authored Apr 23, 2024
1 parent a91b520 commit 5d96660
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 148 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,3 @@ checkpoints
out
wandb
events.out.tfevents*

tests/reference_models
70 changes: 5 additions & 65 deletions tests/test_convert_lit_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,73 +222,13 @@ def test_against_original_open_llama_3b():


@torch.inference_mode()
def test_against_hf_phi_1_5():
wd = Path(__file__).parent.parent.absolute()
workdir = wd / "tests" / "reference_models"
workdir.mkdir(parents=True, exist_ok=True)
file_paths = [workdir / "original_phi_1_5.py", workdir / "configuration_phi.py"]
urls = [
"https://huggingface.co/microsoft/phi-1_5/raw/main/modeling_phi.py",
"https://huggingface.co/microsoft/phi-1_5/raw/main/configuration_phi.py",
]
for file_path, url in zip(file_paths, urls):
if not file_path.is_file():
urlretrieve(url=url, filename=file_path)

from reference_models.configuration_phi import PhiConfig
from reference_models.original_phi_1_5 import PhiForCausalLM
@pytest.mark.parametrize("model_name", ("phi-1_5", "phi-2"))
def test_against_hf_phi(model_name):
from transformers.models.phi.configuration_phi import PhiConfig
from transformers.models.phi.modeling_phi import PhiForCausalLM

ours_config = Config.from_name(
"phi-1_5", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
)
T = 5
theirs_config = PhiConfig(
vocab_size=ours_config.padded_vocab_size,
max_position_embeddings=ours_config.block_size,
hidden_size=ours_config.n_embd,
intermediate_size=ours_config.intermediate_size,
num_attention_heads=ours_config.n_head,
num_hidden_layers=ours_config.n_layer,
partial_rotary_factor=ours_config.rotary_percentage,
)

ours_model = GPT(ours_config)
ours_state_dict = ours_model.state_dict()
theirs_state_dict = {}
copy_weights_phi(ours_config, theirs_state_dict, ours_state_dict)
theirs_model = PhiForCausalLM(theirs_config)
# strict=False because we don't save the rotary embeddings inv frequency
keys = theirs_model.load_state_dict(theirs_state_dict, strict=False)
assert not keys.unexpected_keys
assert all("inv_freq" in k for k in keys.missing_keys)

# test end to end
x = torch.tensor([[9856, 23, 491, 1536, 304]], dtype=torch.int32)
assert x.size(1) == T
ours_y = ours_model(x)
theirs_y = theirs_model(x)["logits"]
torch.testing.assert_close(ours_y, theirs_y)


@torch.inference_mode()
def test_against_hf_phi_2():
wd = Path(__file__).parent.parent.absolute()
workdir = wd / "tests" / "reference_models"
workdir.mkdir(parents=True, exist_ok=True)
file_paths = [workdir / "original_phi_2.py", workdir / "configuration_phi.py"]
urls = [
"https://huggingface.co/microsoft/phi-2/raw/main/modeling_phi.py",
"https://huggingface.co/microsoft/phi-2/raw/main/configuration_phi.py",
]
for file_path, url in zip(file_paths, urls):
if not file_path.is_file():
urlretrieve(url=url, filename=file_path)

from reference_models.configuration_phi import PhiConfig
from reference_models.original_phi_2 import PhiForCausalLM

ours_config = Config.from_name(
"phi-2", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
model_name, padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
)
T = 5
theirs_config = PhiConfig(
Expand Down
91 changes: 10 additions & 81 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,11 @@ def test_against_original_open_llama_3b(device, dtype):
@pytest.mark.parametrize(
"ours_kwargs",
[
{"name": "Llama-2-7b-hf"},
{"name": "CodeLlama-7b-hf"},
{"name": "Llama-2-70b-chat-hf", "n_query_groups": 1},
{"name": "Llama-3-8B"},
{"name": "Llama-3-8B-Instruct"}
{"name": "Llama-2-7b-hf"},
{"name": "CodeLlama-7b-hf"},
{"name": "Llama-2-70b-chat-hf", "n_query_groups": 1},
{"name": "Llama-3-8B"},
{"name": "Llama-3-8B-Instruct"},
],
)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -267,6 +267,7 @@ def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype):


@torch.inference_mode()
@pytest.mark.parametrize("model_name", ("phi-1_5", "phi-2"))
@pytest.mark.parametrize(
("device", "dtype"),
[
Expand All @@ -278,86 +279,14 @@ def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype):
),
],
)
def test_against_hf_phi_1_5(device, dtype):
wd = Path(__file__).parent.parent.resolve()
workdir = wd / "tests" / "reference_models"
workdir.mkdir(parents=True, exist_ok=True)
file_paths = [workdir / "original_phi_1_5.py", workdir / "configuration_phi.py"]
urls = [
"https://huggingface.co/microsoft/phi-1_5/raw/main/modeling_phi.py",
"https://huggingface.co/microsoft/phi-1_5/raw/main/configuration_phi.py",
]
for file_path, url in zip(file_paths, urls):
if not file_path.is_file():
urlretrieve(url=url, filename=file_path)

from reference_models.configuration_phi import PhiConfig
from reference_models.original_phi_1_5 import PhiForCausalLM
def test_against_hf_phi(model_name, device, dtype):
from transformers.models.phi.configuration_phi import PhiConfig
from transformers.models.phi.modeling_phi import PhiForCausalLM

torch.set_default_dtype(dtype)

ours_config = Config.from_name(
"phi-1_5", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
)
T = 5
theirs_config = PhiConfig(
vocab_size=ours_config.padded_vocab_size,
max_position_embeddings=ours_config.block_size,
hidden_size=ours_config.n_embd,
intermediate_size=ours_config.intermediate_size,
num_attention_heads=ours_config.n_head,
num_hidden_layers=ours_config.n_layer,
partial_rotary_factor=ours_config.rotary_percentage,
torch_dtype=dtype,
)

theirs_model = PhiForCausalLM(theirs_config).to(device)
theirs_state_dict = theirs_model.state_dict()
state_dict = {}
copy_weights_phi(ours_config, {}, state_dict, theirs_state_dict)
ours_model = GPT(ours_config).to(device)
ours_model.load_state_dict(state_dict)

# test end to end
x = torch.tensor([[9856, 23, 491, 1536, 304]], dtype=torch.int32, device=device)
assert x.size(1) == T
ours_y = ours_model(x)
theirs_y = theirs_model(x)["logits"].to(dtype) # HF converts logits to float
torch.testing.assert_close(ours_y, theirs_y)


@torch.inference_mode()
@pytest.mark.parametrize(
("device", "dtype"),
[
(torch.device("cpu"), torch.float32),
pytest.param(
torch.device("cuda"),
torch.float16,
marks=[pytest.mark.xfail(raises=AssertionError, strict=False), RunIf(min_cuda_gpus=1)],
),
],
)
def test_against_hf_phi_2(device, dtype):
wd = Path(__file__).parent.parent.resolve()
workdir = wd / "tests" / "reference_models"
workdir.mkdir(parents=True, exist_ok=True)
file_paths = [workdir / "original_phi_2.py", workdir / "configuration_phi.py"]
urls = [
"https://huggingface.co/microsoft/phi-2/raw/main/modeling_phi.py",
"https://huggingface.co/microsoft/phi-2/raw/main/configuration_phi.py",
]
for file_path, url in zip(file_paths, urls):
if not file_path.is_file():
urlretrieve(url=url, filename=file_path)

from reference_models.configuration_phi import PhiConfig
from reference_models.original_phi_2 import PhiForCausalLM

torch.set_default_dtype(dtype)

ours_config = Config.from_name(
"phi-2", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
model_name, padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
)
T = 5
theirs_config = PhiConfig(
Expand Down

0 comments on commit 5d96660

Please sign in to comment.