From 9a8c664c2c33858ca3c422373edc09349607362f Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Wed, 6 Dec 2023 17:01:36 +0200 Subject: [PATCH] Documentation improvements (#299) * Add link to optimum docs for supported architectures Closes #288 * Refactor `SUPPORTED_MODELS` dict to include task * Update example model id * Update list of supported models * Update generate_tests.py * Remove requirement of `output_attentions` revision * Add demo site to examples section (closes #233) * Fix typo * Include examples in docs index * Update github issue templates * Create config.yml * Order supported models * Cleanup * Update 4_feature-request.yml --- .github/ISSUE_TEMPLATE/1_bug-report.md | 40 - .github/ISSUE_TEMPLATE/1_bug-report.yml | 51 + .github/ISSUE_TEMPLATE/2_feature-request.md | 26 - .github/ISSUE_TEMPLATE/2_new_model.yml | 40 + .github/ISSUE_TEMPLATE/3_new_pipeline.yml | 40 + .github/ISSUE_TEMPLATE/3_question.md | 10 - .github/ISSUE_TEMPLATE/4_feature-request.yml | 31 + .github/ISSUE_TEMPLATE/5_question.yml | 13 + .github/ISSUE_TEMPLATE/config.yml | 9 + README.md | 6 +- docs/snippets/1_quick-tour.snippet | 2 +- docs/snippets/3_examples.snippet | 2 + docs/snippets/4_custom-usage.snippet | 2 + docs/source/index.md | 8 + scripts/supported_models.py | 1154 ++++++++++-------- src/pipelines.js | 4 +- tests/generate_tests.py | 11 +- 17 files changed, 879 insertions(+), 570 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/1_bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/1_bug-report.yml delete mode 100644 .github/ISSUE_TEMPLATE/2_feature-request.md create mode 100644 .github/ISSUE_TEMPLATE/2_new_model.yml create mode 100644 .github/ISSUE_TEMPLATE/3_new_pipeline.yml delete mode 100644 .github/ISSUE_TEMPLATE/3_question.md create mode 100644 .github/ISSUE_TEMPLATE/4_feature-request.yml create mode 100644 .github/ISSUE_TEMPLATE/5_question.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/1_bug-report.md b/.github/ISSUE_TEMPLATE/1_bug-report.md deleted file mode 100644 index 62b3d08a2..000000000 --- a/.github/ISSUE_TEMPLATE/1_bug-report.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: "[Bug] Title goes here." -labels: bug -assignees: '' - ---- - -**Describe the bug** -*A clear and concise description of what the bug is.* - - - - -**How to reproduce** -*Steps or a minimal working example to reproduce the behavior* - - - - -**Expected behavior** -*A clear and concise description of what you expected to happen.* - - - -**Logs/screenshots** -*If applicable, add logs/screenshots to help explain your problem.* - -**Environment** -- Transformers.js version: -- Browser (if applicable): -- Operating system (if applicable): -- Other: - - -**Additional context** -*Add any other context about the problem here.* - - diff --git a/.github/ISSUE_TEMPLATE/1_bug-report.yml b/.github/ISSUE_TEMPLATE/1_bug-report.yml new file mode 100644 index 000000000..70ceedd96 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_bug-report.yml @@ -0,0 +1,51 @@ +name: "🐛 Bug Report" +description: Submit a bug report to help us improve transformers.js +labels: [ "bug" ] +body: + - type: textarea + id: system-info + attributes: + label: System Info + description: Please share your system info with us. If you are using other JS libraries/frameworks (e.g., React or Next.js), please include their versions too. + placeholder: transformers.js version, browser (if applicable), operating system, Node.js version, bundlers, ... + validations: + required: true + + - type: checkboxes + id: environment + attributes: + label: Environment + description: "The environment I am running in:" + options: + - label: "Website/web-app" + - label: "Browser extension" + - label: "Server-side (e.g., Node.js, Deno, Bun)" + - label: "Desktop app (e.g., Electron)" + - label: "Other (e.g., VSCode extension)" + + - type: textarea + id: description + validations: + required: true + attributes: + label: Description + description: A clear and concise description of the bug, as well as what you expected to happen. + + - type: textarea + id: reproduction + validations: + required: true + attributes: + label: Reproduction + description: | + Please provide a code sample that reproduces the problem you ran into. + If you have code snippets, error messages, stack traces please provide them here as well. + Important! Use [code tags](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting) to correctly format your code. + + placeholder: | + Steps to reproduce the behavior: + + 1. + 2. + 3. + diff --git a/.github/ISSUE_TEMPLATE/2_feature-request.md b/.github/ISSUE_TEMPLATE/2_feature-request.md deleted file mode 100644 index 235519115..000000000 --- a/.github/ISSUE_TEMPLATE/2_feature-request.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: Feature request -about: Suggest a new feature (e.g., model, pipeline, task) for this project -title: "[Feature request] Title goes here." -labels: enhancement -assignees: '' - ---- - -**Name of the feature** -*In general, the feature you want added should be supported by HuggingFace's [transformers](https://github.com/huggingface/transformers) library:* - - *If requesting a **model**, it must be listed [here](https://huggingface.co/docs/transformers/index#supported-models).* - - *If requesting a **pipeline**, it must be listed [here](https://huggingface.co/docs/transformers/main_classes/pipelines).* -- *If requesting a **task**, it must be listed [here](https://huggingface.co/tasks).* - - - - -**Reason for request** -*Why is it important that we add this feature? What is your intended use case? Remember, we are more likely to add support for models/pipelines/tasks that are popular (e.g., many downloads), or contain functionality that does not exist (e.g., new input type).* - - - - -**Additional context** -*Add any other context or screenshots about the feature request here.* diff --git a/.github/ISSUE_TEMPLATE/2_new_model.yml b/.github/ISSUE_TEMPLATE/2_new_model.yml new file mode 100644 index 000000000..5b7fe3014 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_new_model.yml @@ -0,0 +1,40 @@ +name: "🌟 New model addition" +description: Submit a proposal/request to implement a new model +labels: [ "New model" ] + +body: + - type: textarea + id: description-request + validations: + required: true + attributes: + label: Model description + description: | + Include important information about the model. + + - type: checkboxes + id: information-tasks + attributes: + label: Prerequisites + description: | + Please note that Transformers.js relies on the model first being supported in [🤗 Transformers](https://github.com/huggingface/transformers) and [🤗 Optimum](https://github.com/huggingface/optimum). If the model you are requesting is not yet supported by either of them, feel free to open up a model request there too. + options: + - label: "The model is supported in Transformers (i.e., listed [here](https://huggingface.co/docs/transformers/index#supported-models-and-frameworks))" + - label: "The model can be exported to ONNX with Optimum (i.e., listed [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview))" + + - type: textarea + id: additional-info + attributes: + label: Additional information + description: | + Please provide additional information about the model here. + If the model is already supported in Transformers, you can provide example Python code to help ensure the JavaScript implementation (and output) matches the original version. + + - type: textarea + id: contribution + validations: + required: true + attributes: + label: Your contribution + description: | + Is there any way that you could help, e.g. by submitting a PR? diff --git a/.github/ISSUE_TEMPLATE/3_new_pipeline.yml b/.github/ISSUE_TEMPLATE/3_new_pipeline.yml new file mode 100644 index 000000000..ff7b8bff7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_new_pipeline.yml @@ -0,0 +1,40 @@ +name: "🔧 New pipeline addition" +description: Submit a proposal/request to implement a new pipeline +labels: [ "New pipeline" ] + +body: + - type: textarea + id: description-request + validations: + required: true + attributes: + label: Pipeline description + description: | + Put any and all important information related to the pipeline. + + - type: checkboxes + id: information-tasks + attributes: + label: Prerequisites + description: | + Please note that Transformers.js relies on the pipeline first being supported in [🤗 Transformers](https://github.com/huggingface/transformers). If the pipeline you are requesting is not yet supported by Transformers, feel free to open up a feature request for it there too. + options: + - label: "The pipeline is supported in Transformers (i.e., listed [here](https://huggingface.co/docs/transformers/main_classes/pipelines))" + - label: "The task is listed [here](https://huggingface.co/tasks)" + + - type: textarea + id: additional-info + attributes: + label: Additional information + description: | + Please provide additional information about the pipeline here. + If the pipeline is already supported in Transformers, you can provide example Python code to help ensure the JavaScript implementation (and output) matches the original version. + + - type: textarea + id: contribution + validations: + required: true + attributes: + label: Your contribution + description: | + Is there any way that you could help, e.g. by submitting a PR? diff --git a/.github/ISSUE_TEMPLATE/3_question.md b/.github/ISSUE_TEMPLATE/3_question.md deleted file mode 100644 index d8beec23e..000000000 --- a/.github/ISSUE_TEMPLATE/3_question.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Ask a question -about: Ask a question about the library -title: "[Question] Title goes here." -labels: question -assignees: '' - ---- - - diff --git a/.github/ISSUE_TEMPLATE/4_feature-request.yml b/.github/ISSUE_TEMPLATE/4_feature-request.yml new file mode 100644 index 000000000..0ef12f408 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_feature-request.yml @@ -0,0 +1,31 @@ +name: "🚀 Feature request" +description: Submit a proposal/request for a new transformers.js feature +labels: [ "feature" ] +body: + - type: textarea + id: feature-request + validations: + required: true + attributes: + label: Feature request + description: | + A clear and concise description of the feature proposal. + If the feature is already part of the python [Transformers](https://github.com/huggingface/transformers) library, please provide relevant links or example usage. + + - type: textarea + id: motivation + validations: + required: true + attributes: + label: Motivation + description: | + Please outline the motivation for the proposal. Why is it important that we add this feature? What is your intended use case? + + - type: textarea + id: contribution + validations: + required: true + attributes: + label: Your contribution + description: | + Is there any way that you could help, e.g. by submitting a PR? diff --git a/.github/ISSUE_TEMPLATE/5_question.yml b/.github/ISSUE_TEMPLATE/5_question.yml new file mode 100644 index 000000000..2af3acbcf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_question.yml @@ -0,0 +1,13 @@ +name: "🙋 Question" +description: Ask a question about the library +labels: [ "question" ] + +body: + - type: textarea + id: question + validations: + required: true + attributes: + label: Question + description: | + Please enter your question here... diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..d071e5961 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,9 @@ +blank_issues_enabled: true +version: 2.1 +contact_links: + - name: Models on the Hugging Face Hub + url: https://huggingface.co/models?library=transformers.js + about: Open a Pull request / Discussion related to a specific model checkpoint directly on the Hugging Face Hub + - name: Documentation + url: https://huggingface.co/docs/transformers.js + about: View the Transformers.js documentation diff --git a/README.md b/README.md index 831b3dcf0..2ad41220e 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ let out = await pipe('I love transformers!'); You can also use a different model by specifying the model id or path as the second argument to the `pipeline` function. For example: ```javascript // Use a different model for sentiment-analysis -let pipe = await pipeline('sentiment-analysis', 'nlptown/bert-base-multilingual-uncased-sentiment'); +let pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment'); ``` @@ -125,6 +125,8 @@ Want to jump straight in? Get started with one of our sample applications/templa | Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](./examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) | | Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](./examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) | | Node.js | Sentiment analysis API | [code](./examples/node/) | +| Demo site | A collection of demos | [code](./examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) | + ## Custom usage @@ -176,6 +178,8 @@ bert-base-uncased/ └── model_quantized.onnx ``` +For the full list of supported architectures, see the [Optimum documentation](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview). + ## Supported tasks/models diff --git a/docs/snippets/1_quick-tour.snippet b/docs/snippets/1_quick-tour.snippet index 2f2fa58d9..dec6b341f 100644 --- a/docs/snippets/1_quick-tour.snippet +++ b/docs/snippets/1_quick-tour.snippet @@ -40,5 +40,5 @@ let out = await pipe('I love transformers!'); You can also use a different model by specifying the model id or path as the second argument to the `pipeline` function. For example: ```javascript // Use a different model for sentiment-analysis -let pipe = await pipeline('sentiment-analysis', 'nlptown/bert-base-multilingual-uncased-sentiment'); +let pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment'); ``` diff --git a/docs/snippets/3_examples.snippet b/docs/snippets/3_examples.snippet index 6af3da7ec..7b836ce72 100644 --- a/docs/snippets/3_examples.snippet +++ b/docs/snippets/3_examples.snippet @@ -15,3 +15,5 @@ Want to jump straight in? Get started with one of our sample applications/templa | Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](./examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) | | Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](./examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) | | Node.js | Sentiment analysis API | [code](./examples/node/) | +| Demo site | A collection of demos | [code](./examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) | + diff --git a/docs/snippets/4_custom-usage.snippet b/docs/snippets/4_custom-usage.snippet index fbfcdc717..3367b2685 100644 --- a/docs/snippets/4_custom-usage.snippet +++ b/docs/snippets/4_custom-usage.snippet @@ -44,3 +44,5 @@ bert-base-uncased/ ├── model.onnx └── model_quantized.onnx ``` + +For the full list of supported architectures, see the [Optimum documentation](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview). diff --git a/docs/source/index.md b/docs/source/index.md index 03e496e60..1b94c115f 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -23,6 +23,14 @@ The documentation is organized into 4 sections: 3. **DEVELOPER GUIDES** show you how to use the library to achieve a specific goal. 4. **API REFERENCE** describes all classes and functions, as well as their available parameters and types. +## Examples + + +{ + "path": "../snippets/3_examples.snippet" +} + + ## Supported tasks/models Here is the list of all tasks and architectures currently supported by Transformers.js. diff --git a/scripts/supported_models.py b/scripts/supported_models.py index 7944c39c5..fca98bc9e 100644 --- a/scripts/supported_models.py +++ b/scripts/supported_models.py @@ -3,338 +3,435 @@ SUPPORTED_MODELS = { # NOTE: keys of `SUPPORTED_MODELS` are subsets of https://github.com/huggingface/optimum/blob/7f8e606689365931300ef5e6d3b20cb88771cb08/optimum/exporters/tasks.py#L281-L965 - 'audio-spectrogram-transformer': [ - 'MIT/ast-finetuned-audioset-10-10-0.4593', - 'MIT/ast-finetuned-audioset-16-16-0.442', - 'MIT/ast-finetuned-speech-commands-v2', - 'mtg-upf/discogs-maest-30s-pw-73e-ts', - ], - - 'albert': [ + 'albert': { # Masked language modelling - 'albert-base-v2', - 'albert-large-v2', + 'fill-mask': [ + 'albert-base-v2', + 'albert-large-v2', + ], # Feature extraction - 'sentence-transformers/paraphrase-albert-small-v2', - 'sentence-transformers/paraphrase-albert-base-v2', - ], - 'bart': [ + 'feature-extraction': [ + 'sentence-transformers/paraphrase-albert-small-v2', + 'sentence-transformers/paraphrase-albert-base-v2', + ], + }, + 'audio-spectrogram-transformer': { + # Audio classification + 'audio-classification': { + 'MIT/ast-finetuned-audioset-10-10-0.4593', + 'MIT/ast-finetuned-audioset-16-16-0.442', + 'MIT/ast-finetuned-speech-commands-v2', + 'mtg-upf/discogs-maest-30s-pw-73e-ts', + } + }, + 'bart': { # Summarization - 'sshleifer/distilbart-xsum-12-1', - 'sshleifer/distilbart-xsum-6-6', - 'sshleifer/distilbart-xsum-12-3', - 'sshleifer/distilbart-xsum-9-6', - 'sshleifer/distilbart-xsum-12-6', - 'sshleifer/distilbart-cnn-12-3', - 'sshleifer/distilbart-cnn-12-6', - 'sshleifer/distilbart-cnn-6-6', - 'facebook/bart-large-cnn', - 'facebook/bart-large-xsum', - + 'summarization': [ + 'sshleifer/distilbart-xsum-12-1', + 'sshleifer/distilbart-xsum-6-6', + 'sshleifer/distilbart-xsum-12-3', + 'sshleifer/distilbart-xsum-9-6', + 'sshleifer/distilbart-xsum-12-6', + 'sshleifer/distilbart-cnn-12-3', + 'sshleifer/distilbart-cnn-12-6', + 'sshleifer/distilbart-cnn-6-6', + 'facebook/bart-large-cnn', + 'facebook/bart-large-xsum', + ], # Zero-shot classification - 'facebook/bart-large-mnli', - ], - 'beit': [ + 'zero-shot-classification': { + 'facebook/bart-large-mnli', + }, + }, + 'beit': { # Image classification - 'microsoft/beit-base-patch16-224', - 'microsoft/beit-base-patch16-224-pt22k', - 'microsoft/beit-base-patch16-384', - 'microsoft/beit-base-patch16-224-pt22k-ft22k', - 'microsoft/beit-large-patch16-224', - 'microsoft/beit-large-patch16-224-pt22k', - 'microsoft/beit-large-patch16-512', - 'microsoft/beit-large-patch16-224-pt22k-ft22k', - 'microsoft/beit-large-patch16-384', - 'microsoft/dit-base-finetuned-rvlcdip', - 'microsoft/dit-large-finetuned-rvlcdip', - ], - 'bert': [ + 'image-classification': [ + 'microsoft/beit-base-patch16-224', + 'microsoft/beit-base-patch16-224-pt22k', + 'microsoft/beit-base-patch16-384', + 'microsoft/beit-base-patch16-224-pt22k-ft22k', + 'microsoft/beit-large-patch16-224', + 'microsoft/beit-large-patch16-224-pt22k', + 'microsoft/beit-large-patch16-512', + 'microsoft/beit-large-patch16-224-pt22k-ft22k', + 'microsoft/beit-large-patch16-384', + 'microsoft/dit-base-finetuned-rvlcdip', + 'microsoft/dit-large-finetuned-rvlcdip', + ], + }, + 'bert': { # Feature extraction - 'sentence-transformers/all-MiniLM-L6-v2', - 'sentence-transformers/all-MiniLM-L12-v2', - 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', - 'sentence-transformers/paraphrase-MiniLM-L6-v2', - 'sentence-transformers/paraphrase-MiniLM-L3-v2', - 'sentence-transformers/bert-base-nli-mean-tokens', - 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', - 'sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens', - 'sentence-transformers/LaBSE', - 'deepset/sentence_bert', - 'intfloat/e5-small', - 'intfloat/e5-small-v2', - 'intfloat/e5-base', - 'intfloat/e5-base-v2', - 'intfloat/e5-large', - 'intfloat/e5-large-v2', - 'intfloat/multilingual-e5-base', - 'thenlper/gte-small', - 'thenlper/gte-base', - 'thenlper/gte-large', - 'BAAI/bge-small-en', - 'BAAI/bge-base-en', - 'BAAI/bge-large-en', - 'allenai/scibert_scivocab_uncased', - 'SpanBERT/spanbert-large-cased', - 'SpanBERT/spanbert-base-cased', - 'cambridgeltl/SapBERT-from-PubMedBERT-fulltext', - 'indobenchmark/indobert-base-p1', - 'GanjinZero/UMLSBert_ENG', - 'DeepPavlov/rubert-base-cased', - 'monologg/kobert', + 'feature-extraction': [ + 'sentence-transformers/all-MiniLM-L6-v2', + 'sentence-transformers/all-MiniLM-L12-v2', + 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', + 'sentence-transformers/paraphrase-MiniLM-L6-v2', + 'sentence-transformers/paraphrase-MiniLM-L3-v2', + 'sentence-transformers/bert-base-nli-mean-tokens', + 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', + 'sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens', + 'sentence-transformers/LaBSE', + 'deepset/sentence_bert', + 'intfloat/e5-small', + 'intfloat/e5-small-v2', + 'intfloat/e5-base', + 'intfloat/e5-base-v2', + 'intfloat/e5-large', + 'intfloat/e5-large-v2', + 'intfloat/multilingual-e5-base', + 'thenlper/gte-small', + 'thenlper/gte-base', + 'thenlper/gte-large', + 'BAAI/bge-small-en', + 'BAAI/bge-base-en', + 'BAAI/bge-large-en', + 'BAAI/bge-large-en-v1.5', + 'BAAI/bge-base-en-v1.5', + 'BAAI/bge-small-en-v1.5', + 'BAAI/bge-large-zh-v1.5', + 'BAAI/bge-base-zh-v1.5', + 'BAAI/bge-small-zh-v1.5', + 'allenai/scibert_scivocab_uncased', + 'SpanBERT/spanbert-large-cased', + 'SpanBERT/spanbert-base-cased', + 'cambridgeltl/SapBERT-from-PubMedBERT-fulltext', + 'indobenchmark/indobert-base-p1', + 'GanjinZero/UMLSBert_ENG', + 'DeepPavlov/rubert-base-cased', + 'monologg/kobert', + ], # Text classification - 'nlptown/bert-base-multilingual-uncased-sentiment', - 'ProsusAI/finbert', - 'unitary/toxic-bert', + 'text-classification': [ + 'nlptown/bert-base-multilingual-uncased-sentiment', + 'ProsusAI/finbert', + 'unitary/toxic-bert', + 'BAAI/bge-reranker-large', + 'BAAI/bge-reranker-base', + ], # Token classification - 'Davlan/bert-base-multilingual-cased-ner-hrl', - 'ckiplab/bert-base-chinese-ner', - 'ckiplab/bert-base-chinese-ws', - 'ckiplab/bert-base-chinese-pos', - 'dslim/bert-base-NER', - 'dslim/bert-base-NER-uncased', + 'token-classification': [ + 'Davlan/bert-base-multilingual-cased-ner-hrl', + 'ckiplab/bert-base-chinese-ner', + 'ckiplab/bert-base-chinese-ws', + 'ckiplab/bert-base-chinese-pos', + 'dslim/bert-base-NER', + 'dslim/bert-base-NER-uncased', + ], # Masked language modelling - 'bert-base-uncased', - 'bert-base-cased', - 'bert-base-multilingual-uncased', - 'bert-base-multilingual-cased', - 'bert-base-chinese', - 'emilyalsentzer/Bio_ClinicalBERT', - ], - 'blenderbot': [ - # Text2text generation (TODO add conversational) - 'facebook/blenderbot-400M-distill', - # 'facebook/blenderbot-1B-distill', - ], - 'blenderbot-small': [ - # Text2text generation (TODO add conversational) - # 'facebook/blenderbot-90M', # DEPRECATED - 'facebook/blenderbot_small-90M', - ], - 'bloom': [ + 'fill-mask': [ + 'bert-base-uncased', + 'bert-base-cased', + 'bert-base-multilingual-uncased', + 'bert-base-multilingual-cased', + 'bert-base-chinese', + 'emilyalsentzer/Bio_ClinicalBERT', + ], + }, + 'blenderbot': { + # Text-to-text (TODO add conversational) + 'text2text-generation': [ + 'facebook/blenderbot-400M-distill', + # 'facebook/blenderbot-1B-distill', + ], + }, + 'blenderbot-small': { + # Text-to-text (TODO add conversational) + 'text2text-generation': [ + # 'facebook/blenderbot-90M', # DEPRECATED + 'facebook/blenderbot_small-90M', + ], + }, + 'bloom': { # Text generation - 'bigscience/bloom-560m', - 'bigscience/bloomz-560m', - ], - 'camembert': [ + 'text-generation': [ + 'bigscience/bloom-560m', + 'bigscience/bloomz-560m', + ], + }, + + 'camembert': { # Feature extraction - 'dangvantuan/sentence-camembert-large', + 'feature-extraction': [ + 'dangvantuan/sentence-camembert-large', + ], # Token classification - 'Jean-Baptiste/camembert-ner', - 'Jean-Baptiste/camembert-ner-with-dates', - 'pythainlp/thainer-corpus-v2-base-model', - 'gilf/french-camembert-postag-model', + 'token-classification': [ + 'Jean-Baptiste/camembert-ner', + 'Jean-Baptiste/camembert-ner-with-dates', + 'pythainlp/thainer-corpus-v2-base-model', + 'gilf/french-camembert-postag-model', + ], # Masked language modelling - 'camembert-base', - 'airesearch/wangchanberta-base-att-spm-uncased', - ], - 'clap': [ + 'fill-mask': [ + 'camembert-base', + 'airesearch/wangchanberta-base-att-spm-uncased', + ], + }, + 'clap': { # Zero-shot audio classification and feature extraction # (with and without `--split_modalities`) - 'laion/clap-htsat-unfused', - # TODO add 'laion/clap-htsat-fused', - - 'Xenova/tiny-random-ClapModel', - ], - 'clip': [ - # Zero-shot image classification and feature extraction + 'zero-shot-audio-classification': { + 'laion/clap-htsat-unfused', + # TODO add 'laion/clap-htsat-fused', + 'laion/larger_clap_general', + 'laion/larger_clap_music_and_speech', + # 'Xenova/tiny-random-ClapModel', + } + }, + 'clip': { + # Zero-shot image classification (and feature extraction) # (with and without `--split_modalities`) - 'openai/clip-vit-base-patch16', - 'openai/clip-vit-base-patch32', - 'openai/clip-vit-large-patch14', - 'openai/clip-vit-large-patch14-336', - ], - 'codegen': [ + 'zero-shot-image-classification': [ + 'openai/clip-vit-base-patch16', + 'openai/clip-vit-base-patch32', + 'openai/clip-vit-large-patch14', + 'openai/clip-vit-large-patch14-336', + ], + }, + 'codegen': { # Text generation - 'Salesforce/codegen-350M-mono', - 'Salesforce/codegen-350M-multi', - 'Salesforce/codegen-350M-nl', - ], - 'convnext':[ + 'text-generation': [ + 'Salesforce/codegen-350M-mono', + 'Salesforce/codegen-350M-multi', + 'Salesforce/codegen-350M-nl', + ], + }, + 'convnext': { # Image classification - 'facebook/convnext-tiny-224', - 'facebook/convnext-small-224', - 'facebook/convnext-base-224', - 'facebook/convnext-base-224-22k', - 'facebook/convnext-base-224-22k-1k', - 'facebook/convnext-base-384', - 'facebook/convnext-base-384-22k-1k', - 'facebook/convnext-large-224', - 'facebook/convnext-large-224-22k', - 'facebook/convnext-large-224-22k-1k', - 'facebook/convnext-large-384', - 'facebook/convnext-large-384-22k-1k', - 'facebook/convnext-xlarge-224-22k', - 'facebook/convnext-xlarge-224-22k-1k', - 'facebook/convnext-xlarge-384-22k-1k', - ], - 'convnextv2':[ + 'image-classification': [ + 'facebook/convnext-tiny-224', + 'facebook/convnext-small-224', + 'facebook/convnext-base-224', + 'facebook/convnext-base-224-22k', + 'facebook/convnext-base-224-22k-1k', + 'facebook/convnext-base-384', + 'facebook/convnext-base-384-22k-1k', + 'facebook/convnext-large-224', + 'facebook/convnext-large-224-22k', + 'facebook/convnext-large-224-22k-1k', + 'facebook/convnext-large-384', + 'facebook/convnext-large-384-22k-1k', + 'facebook/convnext-xlarge-224-22k', + 'facebook/convnext-xlarge-224-22k-1k', + 'facebook/convnext-xlarge-384-22k-1k', + ], + }, + 'convnextv2': { # Image classification - 'facebook/convnextv2-atto-1k-224', - 'facebook/convnextv2-femto-1k-224', - 'facebook/convnextv2-pico-1k-224', - 'facebook/convnextv2-tiny-1k-224', - 'facebook/convnextv2-tiny-22k-384', - 'facebook/convnextv2-tiny-22k-224', - 'facebook/convnextv2-nano-1k-224', - 'facebook/convnextv2-nano-22k-384', - 'facebook/convnextv2-base-22k-224', - 'facebook/convnextv2-base-1k-224', - 'facebook/convnextv2-base-22k-384', - 'facebook/convnextv2-large-22k-224', - 'facebook/convnextv2-large-1k-224', - 'facebook/convnextv2-large-22k-384', - # 'facebook/convnextv2-huge-22k-512', - # 'facebook/convnextv2-huge-1k-224', - # 'facebook/convnextv2-huge-22k-384', - # 'facebook/convnextv2-nano-22k-224', - ], - 'deberta': [ + 'image-classification': [ + 'facebook/convnextv2-atto-1k-224', + 'facebook/convnextv2-femto-1k-224', + 'facebook/convnextv2-pico-1k-224', + 'facebook/convnextv2-tiny-1k-224', + 'facebook/convnextv2-tiny-22k-384', + 'facebook/convnextv2-tiny-22k-224', + 'facebook/convnextv2-nano-1k-224', + 'facebook/convnextv2-nano-22k-384', + 'facebook/convnextv2-base-22k-224', + 'facebook/convnextv2-base-1k-224', + 'facebook/convnextv2-base-22k-384', + 'facebook/convnextv2-large-22k-224', + 'facebook/convnextv2-large-1k-224', + 'facebook/convnextv2-large-22k-384', + # 'facebook/convnextv2-huge-22k-512', + # 'facebook/convnextv2-huge-1k-224', + # 'facebook/convnextv2-huge-22k-384', + # 'facebook/convnextv2-nano-22k-224', + ], + }, + 'deberta': { # Zero-shot classification - 'cross-encoder/nli-deberta-base', - 'Narsil/deberta-large-mnli-zero-cls', - ], - 'deberta-v2': [ + 'zero-shot-classification': [ + 'cross-encoder/nli-deberta-base', + 'Narsil/deberta-large-mnli-zero-cls', + ], + }, + 'deberta-v2': { # Zero-shot classification - 'cross-encoder/nli-deberta-v3-xsmall', - 'cross-encoder/nli-deberta-v3-small', - 'cross-encoder/nli-deberta-v3-base', - 'cross-encoder/nli-deberta-v3-large', - 'MoritzLaurer/DeBERTa-v3-xsmall-mnli-fever-anli-ling-binary', - 'MoritzLaurer/DeBERTa-v3-base-mnli', - 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli', - 'MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli', - 'MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7', - 'sileod/deberta-v3-base-tasksource-nli', - 'sileod/deberta-v3-large-tasksource-nli', - ], - 'deit': [ + 'zero-shot-classification': [ + 'cross-encoder/nli-deberta-v3-xsmall', + 'cross-encoder/nli-deberta-v3-small', + 'cross-encoder/nli-deberta-v3-base', + 'cross-encoder/nli-deberta-v3-large', + 'MoritzLaurer/DeBERTa-v3-xsmall-mnli-fever-anli-ling-binary', + 'MoritzLaurer/DeBERTa-v3-base-mnli', + 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli', + 'MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli', + 'MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7', + 'sileod/deberta-v3-base-tasksource-nli', + 'sileod/deberta-v3-large-tasksource-nli', + ], + }, + 'deit': { # Image classification - 'facebook/deit-tiny-distilled-patch16-224', - 'facebook/deit-small-distilled-patch16-224', - 'facebook/deit-base-distilled-patch16-224', - 'facebook/deit-base-distilled-patch16-384', - ], - 'detr': [ + 'image-classification': [ + 'facebook/deit-tiny-distilled-patch16-224', + 'facebook/deit-small-distilled-patch16-224', + 'facebook/deit-base-distilled-patch16-224', + 'facebook/deit-base-distilled-patch16-384', + ], + }, + 'detr': { # Object detection - 'facebook/detr-resnet-50', - 'facebook/detr-resnet-101', + 'object-detection': [ + 'facebook/detr-resnet-50', + 'facebook/detr-resnet-101', + ], # Image segmentation - 'facebook/detr-resnet-50-panoptic', - ], - 'distilbert': [ + 'image-segmentation': [ + 'facebook/detr-resnet-50-panoptic', + ], + }, + 'distilbert': { # Feature extraction - 'sentence-transformers/multi-qa-distilbert-cos-v1', - 'sentence-transformers/distiluse-base-multilingual-cased-v1', - 'sentence-transformers/distiluse-base-multilingual-cased-v2', - 'sentence-transformers/distilbert-base-nli-mean-tokens', - 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens', - 'sentence-transformers/msmarco-distilbert-base-v4', + 'feature-extraction': [ + 'sentence-transformers/multi-qa-distilbert-cos-v1', + 'sentence-transformers/distiluse-base-multilingual-cased-v1', + 'sentence-transformers/distiluse-base-multilingual-cased-v2', + 'sentence-transformers/distilbert-base-nli-mean-tokens', + 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens', + 'sentence-transformers/msmarco-distilbert-base-v4', + ], # Text classification - 'distilbert-base-uncased-finetuned-sst-2-english', + 'text-classification': [ + 'distilbert-base-uncased-finetuned-sst-2-english', + ], # Question answering - 'distilbert-base-uncased-distilled-squad', - 'distilbert-base-cased-distilled-squad', + 'question-answering': [ + 'distilbert-base-uncased-distilled-squad', + 'distilbert-base-cased-distilled-squad', + ], # Zero-shot classification - 'typeform/distilbert-base-uncased-mnli', + 'zero-shot-classification': [ + 'typeform/distilbert-base-uncased-mnli', + ], # Token classification - 'Davlan/distilbert-base-multilingual-cased-ner-hrl', + 'token-classification': [ + 'Davlan/distilbert-base-multilingual-cased-ner-hrl', + ], # Masked language modelling - 'distilbert-base-uncased', - 'distilbert-base-cased', - ], - 'donut': [ # NOTE: also a `vision-encoder-decoder` + 'fill-mask': [ + 'distilbert-base-uncased', + 'distilbert-base-cased', + ], + }, + 'donut': { # NOTE: also a `vision-encoder-decoder` # Image-to-text - 'naver-clova-ix/donut-base-finetuned-cord-v2', - 'naver-clova-ix/donut-base-finetuned-zhtrainticket', + 'image-to-text': [ + 'naver-clova-ix/donut-base-finetuned-cord-v2', + 'naver-clova-ix/donut-base-finetuned-zhtrainticket', + ], # Document Question Answering - 'naver-clova-ix/donut-base-finetuned-docvqa', - ], - 'dpt': [ + 'document-question-answering': [ + 'naver-clova-ix/donut-base-finetuned-docvqa', + ], + }, + 'dpt': { # Depth estimation - 'Intel/dpt-hybrid-midas', - 'Intel/dpt-large', - ], - 'falcon': [ + 'depth-estimation': [ + 'Intel/dpt-hybrid-midas', + 'Intel/dpt-large', + ], + }, + 'falcon': { # Text generation - 'Rocketknight1/tiny-random-falcon-7b', - 'fxmarty/really-tiny-falcon-testing', - ], - 'glpn': [ + 'text-generation': [ + 'Rocketknight1/tiny-random-falcon-7b', + 'fxmarty/really-tiny-falcon-testing', + ], + }, + 'glpn': { # Depth estimation - 'vinvino02/glpn-kitti', - 'vinvino02/glpn-nyu', - ], - 'gpt_neo': [ + 'depth-estimation': [ + 'vinvino02/glpn-kitti', + 'vinvino02/glpn-nyu', + ], + }, + 'gpt_neo': { # Text generation - 'EleutherAI/gpt-neo-125M', - 'MBZUAI/LaMini-Neo-125M', - # 'MBZUAI/LaMini-Neo-1.3B', # TODO add - 'iliemihai/gpt-neo-romanian-125m', - ], - 'gpt_neox': [ + 'text-generation': [ + 'EleutherAI/gpt-neo-125M', + 'MBZUAI/LaMini-Neo-125M', + # 'MBZUAI/LaMini-Neo-1.3B', # TODO add + 'iliemihai/gpt-neo-romanian-125m', + ], + }, + 'gpt_neox': { # Text generation - 'EleutherAI/pythia-14m', - 'EleutherAI/pythia-31m', - 'EleutherAI/pythia-70m', - 'EleutherAI/pythia-70m-deduped', - 'EleutherAI/pythia-160m', - 'EleutherAI/pythia-160m-deduped', - 'EleutherAI/pythia-410m', - 'EleutherAI/pythia-410m-deduped', - ], - 'gpt2': [ + 'text-generation': [ + 'EleutherAI/pythia-14m', + 'EleutherAI/pythia-31m', + 'EleutherAI/pythia-70m', + 'EleutherAI/pythia-70m-deduped', + 'EleutherAI/pythia-160m', + 'EleutherAI/pythia-160m-deduped', + 'EleutherAI/pythia-410m', + 'EleutherAI/pythia-410m-deduped', + ], + }, + 'gpt2': { # Text generation - 'gpt2', - 'distilgpt2', - 'MBZUAI/LaMini-Cerebras-111M', - 'MBZUAI/LaMini-Cerebras-256M', - 'MBZUAI/LaMini-Cerebras-590M', - # 'MBZUAI/LaMini-Cerebras-1.3B', # TODO add - 'MBZUAI/LaMini-GPT-124M', - 'MBZUAI/LaMini-GPT-774M', - # 'MBZUAI/LaMini-GPT-1.5B', # TODO add - 'aisquared/dlite-v2-774m', - 'Locutusque/gpt2-large-conversational', - ], - 'gpt_bigcode': [ + 'text-generation': [ + 'gpt2', + 'distilgpt2', + 'MBZUAI/LaMini-Cerebras-111M', + 'MBZUAI/LaMini-Cerebras-256M', + 'MBZUAI/LaMini-Cerebras-590M', + # 'MBZUAI/LaMini-Cerebras-1.3B', # TODO add + 'MBZUAI/LaMini-GPT-124M', + 'MBZUAI/LaMini-GPT-774M', + # 'MBZUAI/LaMini-GPT-1.5B', # TODO add + 'aisquared/dlite-v2-774m', + 'Locutusque/gpt2-large-conversational', + ], + }, + 'gpt_bigcode': { # Text generation - 'bigcode/tiny_starcoder_py', - 'abacaj/starcoderbase-1b-sft', - # 'bigcode/starcoderbase-1b', # NOTE: This model is gated, so we ignore it when testing - ], - 'gptj': [ + 'text-generation': [ + 'bigcode/tiny_starcoder_py', + 'abacaj/starcoderbase-1b-sft', + # 'bigcode/starcoderbase-1b', # NOTE: This model is gated, so we ignore it when testing + ], + }, + 'gptj': { # Text generation - 'TabbyML/J-350M', - 'Milos/slovak-gpt-j-405M', - 'heegyu/kogpt-j-350m', - ], - 'herbert': [ + 'text-generation': [ + 'TabbyML/J-350M', + 'Milos/slovak-gpt-j-405M', + 'heegyu/kogpt-j-350m', + ], + }, + 'herbert': { # Feature extraction - 'allegro/herbert-base-cased', - 'allegro/herbert-large-cased', - ], - 'llama': [ + 'feature-extraction': [ + 'allegro/herbert-base-cased', + 'allegro/herbert-large-cased', + ], + }, + 'llama': { # Text generation - 'Xenova/llama2.c-stories15M', - 'Xenova/llama2.c-stories42M', - 'Xenova/llama2.c-stories110M', - 'RajuKandasamy/tamillama_tiny_30m', - 'JackFram/llama-68m', - 'JackFram/llama-160m', - ], + 'text-generation': [ + 'Xenova/llama2.c-stories15M', + 'Xenova/llama2.c-stories42M', + 'Xenova/llama2.c-stories110M', + 'RajuKandasamy/tamillama_tiny_30m', + 'JackFram/llama-68m', + 'JackFram/llama-160m', + ], + }, 'longt5': { # Text-to-text 'text2text-generation': [ @@ -356,265 +453,348 @@ 'voidful/long-t5-encodec-tglobal-base', ], }, - 'm2m_100': [ + 'm2m_100': { # Translation - 'facebook/nllb-200-distilled-600M', - 'facebook/m2m100_418M', - ], - 'marian': [ + 'translation': [ + 'facebook/nllb-200-distilled-600M', + 'facebook/m2m100_418M', + ], + }, + 'marian': { # Translation - f'Helsinki-NLP/opus-mt-{x}' - for x in SUPPORTED_HELSINKI_NLP_MODELS - ], - 'mbart': [ + 'translation': [ + f'Helsinki-NLP/opus-mt-{x}' + for x in SUPPORTED_HELSINKI_NLP_MODELS + ], + }, + 'mbart': { # Translation - 'facebook/mbart-large-50-many-to-many-mmt', - 'facebook/mbart-large-50-many-to-one-mmt', - 'facebook/mbart-large-50', - ], - 'mistral': [ + 'translation': [ + 'facebook/mbart-large-50-many-to-many-mmt', + 'facebook/mbart-large-50-many-to-one-mmt', + 'facebook/mbart-large-50', + ], + }, + 'mistral': { # Text generation - 'echarlaix/tiny-random-mistral', - ], - 'mobilebert': [ + 'text-generation': [ + 'echarlaix/tiny-random-mistral', + ], + }, + 'mobilebert': { # Zero-shot classification - 'typeform/mobilebert-uncased-mnli', + 'zero-shot-classification': [ + 'typeform/mobilebert-uncased-mnli', - # TODO: - # https://github.com/huggingface/optimum/issues/1027 - # 'google/mobilebert-uncased', - ], - 'mobilevit': [ + # TODO: + # https://github.com/huggingface/optimum/issues/1027 + # 'google/mobilebert-uncased', + ], + }, + 'mobilevit': { # Image classification - 'apple/mobilevit-small', - 'apple/mobilevit-x-small', - 'apple/mobilevit-xx-small', + 'image-classification': [ + 'apple/mobilevit-small', + 'apple/mobilevit-x-small', + 'apple/mobilevit-xx-small', + ], # TODO: Image segmentation - # 'apple/deeplabv3-mobilevit-small', - # 'apple/deeplabv3-mobilevit-x-small', - # 'apple/deeplabv3-mobilevit-xx-small', - ], - 'mpt': [ + # 'image-segmentation': [ + # 'apple/deeplabv3-mobilevit-small', + # 'apple/deeplabv3-mobilevit-x-small', + # 'apple/deeplabv3-mobilevit-xx-small', + # ], + }, + 'mpt': { # Text generation - 'efederici/ipt-350m', - ], - 'mpnet': [ + 'text-generation': [ + 'efederici/ipt-350m', + ], + }, + 'mpnet': { # Feature extraction - 'sentence-transformers/all-mpnet-base-v2', - 'sentence-transformers/nli-mpnet-base-v2', - 'sentence-transformers/paraphrase-mpnet-base-v2', - 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2', - 'sentence-transformers/multi-qa-mpnet-base-cos-v1', - 'sentence-transformers/multi-qa-mpnet-base-dot-v1', - ], - 'mt5': [ - 'google/mt5-small', - 'google/mt5-base', - ], - 'nougat': [ + 'feature-extraction': [ + 'sentence-transformers/all-mpnet-base-v2', + 'sentence-transformers/nli-mpnet-base-v2', + 'sentence-transformers/paraphrase-mpnet-base-v2', + 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2', + 'sentence-transformers/multi-qa-mpnet-base-cos-v1', + 'sentence-transformers/multi-qa-mpnet-base-dot-v1', + ], + }, + 'mt5': { + # Text-to-text + 'text2text-generation': [ + 'google/mt5-small', + 'google/mt5-base', + ], + }, + 'nougat': { # Image-to-text - 'facebook/nougat-small', - 'facebook/nougat-base', - ], - 'opt': [ + 'image-to-text': [ + 'facebook/nougat-small', + 'facebook/nougat-base', + ], + }, + 'opt': { # Text generation - 'facebook/opt-125m', - 'facebook/opt-350m', - # (TODO conversational) - 'PygmalionAI/pygmalion-350m', - ], - 'owlvit': [ + 'text-generation': [ + # Text generation + 'facebook/opt-125m', + 'facebook/opt-350m', + # (TODO conversational) + 'PygmalionAI/pygmalion-350m', + ], + }, + 'owlvit': { # Object detection (Zero-shot object detection) # NOTE: Exported with --batch_size 1 - 'google/owlvit-base-patch32', - 'google/owlvit-base-patch16', - 'google/owlvit-large-patch14', - ], - 'resnet': [ + 'zero-shot-object-detection': [ + 'google/owlvit-base-patch32', + 'google/owlvit-base-patch16', + 'google/owlvit-large-patch14', + ], + }, + 'resnet': { # Image classification - 'microsoft/resnet-18', - 'microsoft/resnet-26', - 'microsoft/resnet-34', - 'microsoft/resnet-50', - 'microsoft/resnet-101', - 'microsoft/resnet-152', - ], - 'roberta': [ - # Masked language modelling - 'roberta-base', - 'distilroberta-base', - + 'image-classification': [ + 'microsoft/resnet-18', + 'microsoft/resnet-26', + 'microsoft/resnet-34', + 'microsoft/resnet-50', + 'microsoft/resnet-101', + 'microsoft/resnet-152', + ], + }, + 'roberta': { # Feature extraction - 'sentence-transformers/all-distilroberta-v1', - 'sentence-transformers/all-roberta-large-v1', + 'feature-extraction': [ + 'sentence-transformers/all-distilroberta-v1', + 'sentence-transformers/all-roberta-large-v1', + ], # Text classification - 'roberta-large-mnli', + 'text-classification': [ + 'roberta-large-mnli', + ], # Token classification - 'julien-c/EsperBERTo-small-pos', - ], + 'token-classification': [ + 'julien-c/EsperBERTo-small-pos', + ], + + # Masked language modelling + 'fill-mask': [ + 'roberta-base', + 'distilroberta-base', + ], + }, # 'sam': [ # 'facebook/sam-vit-base', # 'facebook/sam-vit-large', # 'facebook/sam-vit-huge', # ], - 'speecht5': [ - # Text-to-speech - 'microsoft/speecht5_tts', - ], - 'squeezebert': [ + + 'speecht5': { + # Text-to-audio/Text-to-speech + 'text-to-audio': [ + 'microsoft/speecht5_tts', + ], + }, + 'squeezebert': { # Feature extraction - 'squeezebert/squeezebert-uncased', - 'squeezebert/squeezebert-mnli', - ], - 'swin': [ + 'feature-extraction': [ + 'squeezebert/squeezebert-uncased', + 'squeezebert/squeezebert-mnli', + ], + }, + 'swin': { # Image classification - 'microsoft/swin-tiny-patch4-window7-224', - 'microsoft/swin-base-patch4-window7-224', - 'microsoft/swin-large-patch4-window12-384-in22k', - 'microsoft/swin-base-patch4-window7-224-in22k', - 'microsoft/swin-base-patch4-window12-384-in22k', - 'microsoft/swin-base-patch4-window12-384', - 'microsoft/swin-large-patch4-window7-224', - 'microsoft/swin-small-patch4-window7-224', - 'microsoft/swin-large-patch4-window7-224-in22k', - 'microsoft/swin-large-patch4-window12-384', - ], - 'swin2sr': [ + 'image-classification': [ + 'microsoft/swin-tiny-patch4-window7-224', + 'microsoft/swin-base-patch4-window7-224', + 'microsoft/swin-large-patch4-window12-384-in22k', + 'microsoft/swin-base-patch4-window7-224-in22k', + 'microsoft/swin-base-patch4-window12-384-in22k', + 'microsoft/swin-base-patch4-window12-384', + 'microsoft/swin-large-patch4-window7-224', + 'microsoft/swin-small-patch4-window7-224', + 'microsoft/swin-large-patch4-window7-224-in22k', + 'microsoft/swin-large-patch4-window12-384', + ], + }, + 'swin2sr': { # Image-to-image (Super-resolution) - 'caidas/swin2SR-classical-sr-x2-64', - 'caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', - 'caidas/swin2SR-classical-sr-x4-64', - 'caidas/swin2SR-compressed-sr-x4-48', - 'caidas/swin2SR-lightweight-x2-64', + 'image-to-image': [ + 'caidas/swin2SR-classical-sr-x2-64', + 'caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', + 'caidas/swin2SR-classical-sr-x4-64', + 'caidas/swin2SR-compressed-sr-x4-48', + 'caidas/swin2SR-lightweight-x2-64', + ], # Feature extraction - 'hf-tiny-model-private/tiny-random-Swin2SRModel', - ], - 't5': [ - # Text-to-text (Translation/Summarization) - 't5-small', - 't5-base', - 'google/t5-v1_1-small', - 'google/t5-v1_1-base', - 'google/flan-t5-small', - 'google/flan-t5-base', - 'MBZUAI/LaMini-Flan-T5-77M', - 'MBZUAI/LaMini-Flan-T5-248M', - 'MBZUAI/LaMini-Flan-T5-783M', - 'MBZUAI/LaMini-T5-61M', - 'MBZUAI/LaMini-T5-223M', - 'MBZUAI/LaMini-T5-738M', + 'feature-extraction': [ + 'hf-tiny-model-private/tiny-random-Swin2SRModel', + ], + }, + 't5': { + # Translation/Summarization + ('translation', 'summarization'): [ + 't5-small', + 't5-base', + 'google/t5-v1_1-small', + 'google/t5-v1_1-base', + 'google/flan-t5-small', + 'google/flan-t5-base', + ], + + # Text-to-text + 'text2text-generation': [ + 'MBZUAI/LaMini-Flan-T5-77M', + 'MBZUAI/LaMini-Flan-T5-248M', + 'MBZUAI/LaMini-Flan-T5-783M', + 'MBZUAI/LaMini-T5-61M', + 'MBZUAI/LaMini-T5-223M', + 'MBZUAI/LaMini-T5-738M', + ], # Feature extraction - 'sentence-transformers/sentence-t5-large', - 'hkunlp/instructor-base', - 'hkunlp/instructor-large', - ], - 'trocr': [ # NOTE: also a `vision-encoder-decoder` - # Text-to-image - 'microsoft/trocr-small-printed', - 'microsoft/trocr-base-printed', - 'microsoft/trocr-small-handwritten', - 'microsoft/trocr-base-handwritten', - ], - 'vision-encoder-decoder': [ + 'feature-extraction': [ + 'sentence-transformers/sentence-t5-large', + 'hkunlp/instructor-base', + 'hkunlp/instructor-large', + ], + }, + 'trocr': { # NOTE: also a `vision-encoder-decoder` # Text-to-image - 'nlpconnect/vit-gpt2-image-captioning', - ], - 'vit': [ + 'text-to-image': [ + 'microsoft/trocr-small-printed', + 'microsoft/trocr-base-printed', + 'microsoft/trocr-small-handwritten', + 'microsoft/trocr-base-handwritten', + ], + }, + 'vision-encoder-decoder': { + # Image-to-text + 'image-to-text': [ + 'nlpconnect/vit-gpt2-image-captioning', + ], + }, + 'vit': { # Feature extraction - 'google/vit-base-patch16-224-in21k', - 'facebook/dino-vitb16', - 'facebook/dino-vits8', - 'facebook/dino-vitb8', - 'facebook/dino-vits16', - + 'feature-extraction': [ + 'google/vit-base-patch16-224-in21k', + 'facebook/dino-vitb16', + 'facebook/dino-vits8', + 'facebook/dino-vitb8', + 'facebook/dino-vits16', + ], # Image classification - 'google/vit-base-patch16-224', - ], - 'wav2vec2': [ + 'image-classification': [ + 'google/vit-base-patch16-224', + ], + }, + 'wav2vec2': { # Feature extraction # NOTE: requires --task feature-extraction - 'facebook/mms-300m', - 'facebook/mms-1b', + 'feature-extraction': [ + 'facebook/mms-300m', + 'facebook/mms-1b', + ], # Audio classification - 'alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech', - 'superb/wav2vec2-base-superb-ks', - 'facebook/mms-lid-126', - 'facebook/mms-lid-256', - 'facebook/mms-lid-512', - 'facebook/mms-lid-1024', - 'facebook/mms-lid-2048', - 'facebook/mms-lid-4017', + 'audio-classification': [ + 'alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech', + 'superb/wav2vec2-base-superb-ks', + 'facebook/mms-lid-126', + 'facebook/mms-lid-256', + 'facebook/mms-lid-512', + 'facebook/mms-lid-1024', + 'facebook/mms-lid-2048', + 'facebook/mms-lid-4017', + ], # Automatic speech recognition - 'jonatasgrosman/wav2vec2-large-xlsr-53-english', - 'facebook/wav2vec2-base-960h', - 'facebook/mms-1b-l1107', - 'facebook/mms-1b-all', - 'facebook/mms-1b-fl102', - ], - 'wavlm': [ + 'automatic-speech-recognition': [ + 'jonatasgrosman/wav2vec2-large-xlsr-53-english', + 'facebook/wav2vec2-base-960h', + 'facebook/mms-1b-l1107', + 'facebook/mms-1b-all', + 'facebook/mms-1b-fl102', + ], + }, + 'wavlm': { # Feature extraction - 'microsoft/wavlm-base', - 'microsoft/wavlm-base-plus', - 'microsoft/wavlm-large', - ], - 'whisper': [ + 'feature-extraction': [ + 'microsoft/wavlm-base', + 'microsoft/wavlm-base-plus', + 'microsoft/wavlm-large', + ], + }, + 'whisper': { # Automatic speech recognition - 'openai/whisper-tiny', - 'openai/whisper-tiny.en', - 'openai/whisper-base', - 'openai/whisper-base.en', - 'openai/whisper-small', - 'openai/whisper-small.en', - 'openai/whisper-medium', - 'openai/whisper-medium.en', - 'openai/whisper-large', - 'openai/whisper-large-v2', - 'NbAiLab/nb-whisper-tiny-beta', - 'NbAiLab/nb-whisper-base-beta', - 'NbAiLab/nb-whisper-small-beta', - 'NbAiLab/nb-whisper-medium-beta', - 'NbAiLab/nb-whisper-large-beta', - ], - 'xlm': [ - 'xlm-clm-ende-1024', - 'xlm-mlm-ende-1024', - 'xlm-clm-enfr-1024', - 'xlm-mlm-enfr-1024', - 'xlm-mlm-17-1280', - 'xlm-mlm-100-1280', - 'xlm-mlm-en-2048', - 'xlm-mlm-enro-1024', - 'xlm-mlm-tlm-xnli15-1024', - 'xlm-mlm-xnli15-1024', - ], - 'xlm-roberta': [ + 'automatic-speech-recognition': [ + 'openai/whisper-tiny', + 'openai/whisper-tiny.en', + 'openai/whisper-base', + 'openai/whisper-base.en', + 'openai/whisper-small', + 'openai/whisper-small.en', + 'openai/whisper-medium', + 'openai/whisper-medium.en', + 'openai/whisper-large', + 'openai/whisper-large-v2', + 'NbAiLab/nb-whisper-tiny-beta', + 'NbAiLab/nb-whisper-base-beta', + 'NbAiLab/nb-whisper-small-beta', + 'NbAiLab/nb-whisper-medium-beta', + 'NbAiLab/nb-whisper-large-beta', + ], + }, + 'xlm': { + # Masked language modelling + 'fill-mask': [ + 'xlm-clm-ende-1024', + 'xlm-mlm-ende-1024', + 'xlm-clm-enfr-1024', + 'xlm-mlm-enfr-1024', + 'xlm-mlm-17-1280', + 'xlm-mlm-100-1280', + 'xlm-mlm-en-2048', + 'xlm-mlm-enro-1024', + 'xlm-mlm-tlm-xnli15-1024', + 'xlm-mlm-xnli15-1024', + ], + }, + 'xlm-roberta': { # Masked language modelling - 'xlm-roberta-base' - ], - 'yolos': [ + 'fill-mask': [ + 'xlm-roberta-base' + ], + }, + 'yolos': { # Object detection - 'hustvl/yolos-tiny', - 'hustvl/yolos-small', - 'hustvl/yolos-base', - 'hustvl/yolos-small-dwr', - 'hustvl/yolos-small-300', - ] + 'object-detection': [ + # Object detection + 'hustvl/yolos-tiny', + 'hustvl/yolos-small', + 'hustvl/yolos-base', + 'hustvl/yolos-small-dwr', + 'hustvl/yolos-small-300', + ], + }, } def main(): - for model_type, model_ids in SUPPORTED_MODELS.items(): - print(f'# {model_type:=^80}') - for model_id in model_ids: - print( - f'python -m scripts.convert --quantize --model_id {model_id}') - print() + for model_type, tasks in SUPPORTED_MODELS.items(): + for task, model_ids in tasks.items(): + print(f'# {model_type:=^80}') + for model_id in model_ids: + print( + f'python -m scripts.convert --quantize --model_id {model_id}') + print() if __name__ == '__main__': diff --git a/src/pipelines.js b/src/pipelines.js index f3d802da5..7705cc190 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -1180,9 +1180,7 @@ export class ZeroShotAudioClassificationPipeline extends Pipeline { * **Example:** Transcribe English w/ word-level timestamps. * ```javascript * let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav'; - * let transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { - * revision: 'output_attentions', - * }); + * let transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); * let output = await transcriber(url, { return_timestamps: 'word' }); * // { * // "text": " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.", diff --git a/tests/generate_tests.py b/tests/generate_tests.py index 6356e805d..8c44258a8 100644 --- a/tests/generate_tests.py +++ b/tests/generate_tests.py @@ -111,11 +111,18 @@ } +FLATTENED_SUPPORTED_MODELS = [ + (model_type, [ + model for task_models in tasks.values() for model in task_models + ]) for model_type, tasks in SUPPORTED_MODELS.items() +] + + def generate_tokenizer_tests(): results = {} - tokenizers_to_test = list(SUPPORTED_MODELS.items()) + \ + tokenizers_to_test = FLATTENED_SUPPORTED_MODELS + \ list(ADDITIONAL_TOKENIZERS_TO_TEST.items()) for model_type, tokenizer_names in tokenizers_to_test: @@ -180,7 +187,7 @@ def generate_tokenizer_tests(): def generate_config_tests(): results = {} - for model_type, config_names in SUPPORTED_MODELS.items(): + for model_type, config_names in FLATTENED_SUPPORTED_MODELS: print(f'Generating tests for {model_type}') for config_name in config_names: