Update generate_tests.py

huggingface · xenova · Sep 8, 2023 · Sep 2, 2023 · Sep 2, 2023 · Sep 2, 2023
commit 5080f7a0e27d47f67d6e5c9e77c4505cf67307a2
diff --git a/tests/generate_tests.py b/tests/generate_tests.py
@@ -22,11 +22,23 @@
     ],
 }
 
+MODELS_TO_IGNORE = [
+    # TODO: remove when https://github.com/huggingface/tokenizers/issues/251 is fixed
+    'xlm',
+
+    # TODO: remove when https://github.com/huggingface/transformers/issues/26018 is fixed
+    'marian',
+]
+
 TOKENIZERS_TO_IGNORE = [
     # TODO: remove when https://github.com/huggingface/transformers/pull/25478 is merged
     'facebook/m2m100_418M',
 ]
 
+MAX_TESTS = {
+    'marian': 10,
+}
+
 TOKENIZER_TEST_DATA = {
     "shared": [
         "hello world",
@@ -96,6 +108,11 @@ def generate_tokenizer_tests():
         list(ADDITIONAL_TOKENIZERS_TO_TEST.items())
 
     for model_type, tokenizer_names in tokenizers_to_test:
+        if model_type in MODELS_TO_IGNORE:
+            continue
+        if model_type in MAX_TESTS:
+            tokenizer_names = tokenizer_names[:MAX_TESTS[model_type]]
+
         print(f'Generating tests for {model_type}')
         for tokenizer_name in tokenizer_names:
             if tokenizer_name in TOKENIZERS_TO_IGNORE:
@@ -147,11 +164,16 @@ def generate_tokenizer_tests():
 def generate_config_tests():
     results = {}
     for model_type, config_names in SUPPORTED_MODELS.items():
+        print(f'Generating tests for {model_type}')
 
         for config_name in config_names:
-            # Load config
-            config = AutoConfig.from_pretrained(config_name)
-
+            print('  -', config_name)
+            try:
+                # Load config
+                config = AutoConfig.from_pretrained(config_name)
+            except Exception:
+                # Something went wrong, skip this config
+                continue
             results[config_name] = config.to_dict()
 
             # TODO: Remove after https://github.com/huggingface/transformers/issues/23876 fixed