Skip to content

Commit

Permalink
update the MODALITY_TRANSFORMS dict to include the new video modality…
Browse files Browse the repository at this point in the history
… keys and their mappings to transforms
  • Loading branch information
kdu4108 committed Aug 1, 2024
1 parent b6f7747 commit cda245d
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions fourm/data/modality_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
ColorPaletteTransform,
SAMInstanceTokTransform,
SAMInstanceTransform,
VideoDescriptionTransform,
VideoDetectionTransform,
VideoRGBTransform,
VideoTokTransform,
VideoTranscriptTransform,
)
from fourm.models.decoder_embeddings import ImageTokenDecoderEmbedding, SequenceDecoderEmbedding
from fourm.models.encoder_embeddings import (
Expand Down Expand Up @@ -468,6 +473,15 @@
"tok_imagebind_global": TokTransform(),
# Other
"mask_valid": MaskTransform(mask_pool_size=1),
# Video
"video_rgb": VideoRGBTransform(imagenet_default_mean_and_std=True), # TODO: check parameters
"video_tok_rgb": VideoTokTransform(), # tok_ indicates its a token representation
"video_tok_clip": VideoTokTransform(), # TODO: check parameters
"video_description": VideoDescriptionTransform(aligned_captions=True), # TODO: check parameters
"video_transcript": VideoTranscriptTransform(aligned_captions=True), # TODO: check parameters
"video_det": VideoDetectionTransform(
det_threshold=0.6, det_max_instances=None, bbox_order="dist_to_orig", coord_bins=1000, min_visibility=0.0
), # TODO: check parameters
}

MODALITY_TRANSFORMS_DIVAE = {
Expand Down

0 comments on commit cda245d

Please sign in to comment.