From bf1412ee7cf2854eb9ada7b7506f7878e67e610c Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 1 Nov 2024 11:07:35 +0200 Subject: [PATCH] Add support for MobileLLM (#1003) --- README.md | 1 + docs/snippets/6_supported-models.snippet | 1 + scripts/convert.py | 1 + src/configs.js | 1 + src/models.js | 10 ++++++++++ 5 files changed, 14 insertions(+) diff --git a/README.md b/README.md index 474b87f18..7185dbfe8 100644 --- a/README.md +++ b/README.md @@ -352,6 +352,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli. 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou. 1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel. +1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra. 1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam. 1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. 1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index f1bcdad44..8e97152c7 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -67,6 +67,7 @@ 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli. 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou. 1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel. +1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra. 1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam. 1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. 1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. diff --git a/scripts/convert.py b/scripts/convert.py index bf9265e48..c94d55585 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -41,6 +41,7 @@ 'starcoder2', 'openelm', 'gemma', + 'mobilellm', # Encoder-decoder models 'whisper', diff --git a/src/configs.js b/src/configs.js index 4bc95cf80..1a15af4c1 100644 --- a/src/configs.js +++ b/src/configs.js @@ -91,6 +91,7 @@ function getNormalizedConfig(config) { mapping['hidden_size'] = 'hidden_size'; break; case 'llama': + case 'mobilellm': case 'granite': case 'cohere': case 'mistral': diff --git a/src/models.js b/src/models.js index d357a83e4..eca48dbd0 100644 --- a/src/models.js +++ b/src/models.js @@ -3810,6 +3810,14 @@ export class LlamaForCausalLM extends LlamaPreTrainedModel { } ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// MobileLLM models +export class MobileLLMPreTrainedModel extends PreTrainedModel { } +export class MobileLLMModel extends MobileLLMPreTrainedModel { } +export class MobileLLMForCausalLM extends MobileLLMPreTrainedModel { } +////////////////////////////////////////////////// + + ////////////////////////////////////////////////// // Granite models export class GranitePreTrainedModel extends PreTrainedModel { } @@ -6125,6 +6133,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]], ['codegen', ['CodeGenModel', CodeGenModel]], ['llama', ['LlamaModel', LlamaModel]], + ['mobilellm', ['MobileLLMModel', MobileLLMModel]], ['granite', ['GraniteModel', GraniteModel]], ['cohere', ['CohereModel', CohereModel]], ['gemma', ['GemmaModel', GemmaModel]], @@ -6214,6 +6223,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]], ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]], ['llama', ['LlamaForCausalLM', LlamaForCausalLM]], + ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]], ['granite', ['GraniteForCausalLM', GraniteForCausalLM]], ['cohere', ['CohereForCausalLM', CohereForCausalLM]], ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],