Skip to content

Commit

Permalink
refac tiledsiglip processor to util
Browse files Browse the repository at this point in the history
Signed-off-by: HuiyingLi <[email protected]>
  • Loading branch information
HuiyingLi committed Jul 6, 2024
1 parent d7ac921 commit 055aae4
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions nemo/collections/multimodal/parts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,8 @@ def expand2square(pil_img, background_color):


def create_image_processor(mm_cfg):
from nemo.collections.multimodal.models.multimodal_llm.neva.neva_model import TiledSiglipImageProcessor

if mm_cfg.vision_encoder.get("from_hf", False):
if "clip" in mm_cfg.vision_encoder.from_pretrained:
image_processor = CLIPImageProcessor.from_pretrained(
Expand All @@ -523,6 +525,11 @@ def create_image_processor(mm_cfg):
image_processor = SiglipImageProcessor.from_pretrained(
mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16
)
image_processor = TiledSiglipImageProcessor(image_processor,
grid_width = mm_cfg.vision_encoder.get("grid_width", 1),
grid_height = mm_cfg.vision_encoder.get("grid_height", 1),
max_upscale = mm_cfg.vision_encoder.get("max_upscale", 2.0),
)
else:
raise (ValueError("Currently only support CLIPImageProcessor and SiglipImageProcessor from Huggingface"))

Expand Down

0 comments on commit 055aae4

Please sign in to comment.