From 35d61f5cc94c4ede407d27105579a71a675378af Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 15 Nov 2023 16:28:29 +0200 Subject: [PATCH 1/2] Add `CLIPFeatureExtractor` (and tests) (#387) --- src/processors.js | 2 ++ tests/processors.test.js | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/processors.js b/src/processors.js index 7691cb413..dd7130156 100644 --- a/src/processors.js +++ b/src/processors.js @@ -509,6 +509,7 @@ export class ImageFeatureExtractor extends FeatureExtractor { } +export class CLIPFeatureExtractor extends ImageFeatureExtractor { } export class ConvNextFeatureExtractor extends ImageFeatureExtractor { } export class ViTFeatureExtractor extends ImageFeatureExtractor { } export class MobileViTFeatureExtractor extends ImageFeatureExtractor { } @@ -1538,6 +1539,7 @@ export class AutoProcessor { WhisperFeatureExtractor, ViTFeatureExtractor, MobileViTFeatureExtractor, + CLIPFeatureExtractor, ConvNextFeatureExtractor, BeitFeatureExtractor, DeiTFeatureExtractor, diff --git a/tests/processors.test.js b/tests/processors.test.js index fe594613e..9e5d09f18 100644 --- a/tests/processors.test.js +++ b/tests/processors.test.js @@ -38,6 +38,7 @@ describe('Processors', () => { beit: 'microsoft/beit-base-patch16-224-pt22k-ft22k', detr: 'facebook/detr-resnet-50', yolos: 'hustvl/yolos-small-300', + clip: 'openai/clip-vit-base-patch16', } const TEST_IMAGES = { @@ -171,7 +172,7 @@ describe('Processors', () => { it(MODELS.deit, async () => { const processor = await AutoProcessor.from_pretrained(m(MODELS.deit)) - { // Tests grayscale image + { const image = await load_image(TEST_IMAGES.tiger); const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image); @@ -187,7 +188,7 @@ describe('Processors', () => { it(MODELS.beit, async () => { const processor = await AutoProcessor.from_pretrained(m(MODELS.beit)) - { // Tests grayscale image + { const image = await load_image(TEST_IMAGES.tiger); const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image); @@ -204,7 +205,7 @@ describe('Processors', () => { it(MODELS.detr, async () => { const processor = await AutoProcessor.from_pretrained(m(MODELS.detr)) - { // Tests grayscale image + { const image = await load_image(TEST_IMAGES.tiger); const { pixel_values, original_sizes, reshaped_input_sizes, pixel_mask } = await processor(image); @@ -225,7 +226,7 @@ describe('Processors', () => { it(MODELS.yolos, async () => { const processor = await AutoProcessor.from_pretrained(m(MODELS.yolos)) - { // Tests grayscale image + { const image = await load_image(TEST_IMAGES.tiger); const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image); @@ -236,5 +237,22 @@ describe('Processors', () => { compare(reshaped_input_sizes, [[888, 1333]]); } }, MAX_TEST_EXECUTION_TIME); + + // CLIPFeatureExtractor + // - tests center crop (do_center_crop=true, crop_size=224) + it(MODELS.clip, async () => { + const processor = await AutoProcessor.from_pretrained(m(MODELS.clip)) + + { + const image = await load_image(TEST_IMAGES.tiger); + const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image); + + compare(pixel_values.dims, [1, 3, 224, 224]); + compare(avg(pixel_values.data), -0.06678297738282096); + + compare(original_sizes, [[408, 612]]); + compare(reshaped_input_sizes, [[224, 224]]); + } + }, MAX_TEST_EXECUTION_TIME); }); }); From 4e4148cb5ce7f4a9265f58b4eeb660c64bed0386 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 15 Nov 2023 17:51:33 +0200 Subject: [PATCH 2/2] Add support for Grouped Query Attention on Llama Model (#393) Resolves #388 --- src/models.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models.js b/src/models.js index b0a82cee0..9fabe8bdd 100644 --- a/src/models.js +++ b/src/models.js @@ -3035,9 +3035,9 @@ export class LlamaPreTrainedModel extends PreTrainedModel { // config doesn't contain pad_token_id, so we assume it is the eos_token_id this.config.pad_token_id = this.config.eos_token_id - this.num_heads = this.config.num_attention_heads + this.num_heads = this.config.num_key_value_heads ?? this.config.num_attention_heads this.num_layers = this.config.num_hidden_layers - this.dim_kv = this.config.hidden_size / this.num_heads; + this.dim_kv = this.config.hidden_size / this.config.num_attention_heads } } /**