Skip to content

Commit

Permalink
Fix neva tutorial
Browse files Browse the repository at this point in the history
Signed-off-by: yaoyu-33 <[email protected]>
  • Loading branch information
yaoyu-33 committed Jul 10, 2024
1 parent 2b2e62d commit b0e05a1
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 145 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ model:
freeze: False
model_type: llama_2 # Only support nvgpt or llama_2
vision_encoder:
from_pretrained: "openai/clip-vit-large-patch14" # path or name
from_pretrained: "openai/clip-vit-large-patch14-336" # path or name
from_hf: True
patch_dim: 14
crop_size: [224, 224]
crop_size: [336, 336]
hidden_size: 1024 # could be found from model but tricky in code
vision_select_layer: -2 # default to the last layer
class_token_length: 1
Expand Down
4 changes: 2 additions & 2 deletions scripts/checkpoint_converters/convert_llava_hf_to_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def convert(args):
batch_dict = hf_tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')
batch_dict_cuda = {k: v.cuda() for k, v in batch_dict.items()}
hf_model = hf_model.cuda().eval()
model = model.eval()
model = model.cuda().eval()

hf_outputs = hf_model(**batch_dict_cuda, output_hidden_states=True)
ids = batch_dict_cuda['input_ids']
Expand All @@ -307,7 +307,7 @@ def convert(args):
attn_mask, _, pos_ids = attn_mask_and_pos_ids

outputs = model(
tokens=tokens, text_position_ids=pos_ids.cuda(), attention_mask=attn_mask.cuda(), labels=None
tokens=tokens.cuda(), text_position_ids=pos_ids.cuda(), attention_mask=attn_mask.cuda(), labels=None
)

hf_next_token = hf_outputs.logits[0, -1].argmax()
Expand Down
Loading

0 comments on commit b0e05a1

Please sign in to comment.