From cc2e63f806863cfea6e3bff963b2419fa0c01a11 Mon Sep 17 00:00:00 2001 From: feipenghe Date: Tue, 26 Apr 2022 15:39:41 -0700 Subject: [PATCH] Move out dependency of rest modules in extra_requires in forte (#760) * import error for wikipedia * import error for transformers * import error for tqdm * add more import tests * remove pytorch installation at .github/workflows/main.yml * test backbone in an independent job at .github/workflows/main.yml * install pytest in .github/workflows/main.yml * remove RemoteProcessor that requires extra module at tests/forte/forte_backbone_test.py * add requests in remoe extra require at setup.py * remove init import RemoteProcessor at forte/processors/misc/__init__.py * try-except ImportError for requests at forte/processors/misc/remote_processor.py * change relative importing path for RemoteProcessor at tests/forte/remote_processor_test.py * try-except ImportError for module requests at forte/processors/third_party/machine_translation_processor.py * try-except ImportError for module requests at forte/processors/data_augment/algorithms/character_flip_op.py * try-except ImportError for module requests at forte/processors/data_augment/algorithms/distribution_replacement_op.py * add requests requirement in forte[data_aug] in setup.py * try-except ImportError for module requests at forte/processors/data_augment/algorithms/typo_replacement_op.py * pylint: raise errors from errors and disable import-outside-toplevel * pylint: raise errors from errors and disable import-outside-toplevel * forte_backbone_test.py pytest -> regular running python script * move requests importing inside init for RemoteProcessor * revert remove init import RemoteProcessor at forte/processors/misc/__init__.py * remove tensorflow and pytorch version in the test_backbone job * simplify test_backbone job --- .github/workflows/main.yml | 32 ++- forte/datasets/wikipedia/dbpedia/db_utils.py | 11 +- .../wikipedia/dbpedia/dbpedia_datasets.py | 13 +- .../algorithms/character_flip_op.py | 12 +- .../algorithms/distribution_replacement_op.py | 12 +- .../algorithms/typo_replacement_op.py | 12 +- forte/processors/misc/remote_processor.py | 10 +- .../machine_translation_processor.py | 23 +- forte/trainer/ner_trainer.py | 10 +- setup.py | 12 +- tests/forte/forte_backbone_test.py | 205 +++++++++++++----- 11 files changed, 276 insertions(+), 76 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4781ec1a9..28be707b1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -70,13 +70,7 @@ jobs: # Simply keep the database file but remove the repo. cp stave/simple-backend/db.sqlite3 . rm -rf stave - - name: Test backbone Forte import test - run: | - pip install --progress-bar off torch==${{ matrix.torch-version }} - # Try to install Forte backbone only and test basic imports. - pip install --use-feature=in-tree-build --progress-bar off . - pytest tests/forte/forte_backbone_test.py - pip uninstall -y torch + - name: Install Forte run: | pip install --use-feature=in-tree-build --progress-bar off .[data_aug,ir,remote,audio_ext,stave,models,test,wikipedia,nlp,extractor] @@ -116,7 +110,29 @@ jobs: cd .. coverage run -m pytest tests/forte/notebooks fi - + test_backbone: + runs-on: ubuntu-latest + env: + python-version: 3.7 + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ env.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ env.python-version }} + - name: Cache pip + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + - name: Test backbone Forte import test + run: | + # Try to install Forte backbone only and test basic imports. + pip install --use-feature=in-tree-build --progress-bar off . + python tests/forte/forte_backbone_test.py test_modules: runs-on: ubuntu-latest diff --git a/forte/datasets/wikipedia/dbpedia/db_utils.py b/forte/datasets/wikipedia/dbpedia/db_utils.py index 9d736bb21..bff38deaa 100644 --- a/forte/datasets/wikipedia/dbpedia/db_utils.py +++ b/forte/datasets/wikipedia/dbpedia/db_utils.py @@ -21,8 +21,17 @@ import sys from typing import List, Dict, Tuple, Optional from urllib.parse import urlparse, parse_qs +from forte.utils import create_import_error_msg + +try: + import rdflib +except ImportError as e: + raise ImportError( + create_import_error_msg( + "rbflib", "wikipedia", "DBpedia dataset supports" + ) + ) from e -import rdflib dbpedia_prefix = "http://dbpedia.org/resource/" state_type = Tuple[rdflib.term.Node, rdflib.term.Node, rdflib.term.Node] diff --git a/forte/datasets/wikipedia/dbpedia/dbpedia_datasets.py b/forte/datasets/wikipedia/dbpedia/dbpedia_datasets.py index a82f1f23f..b5b340e0f 100644 --- a/forte/datasets/wikipedia/dbpedia/dbpedia_datasets.py +++ b/forte/datasets/wikipedia/dbpedia/dbpedia_datasets.py @@ -31,7 +31,7 @@ Optional, ) -import rdflib + from smart_open import open from forte.common import Resources @@ -49,6 +49,7 @@ state_type, ) from forte.processors.base import PackWriter +from forte.utils import create_import_error_msg from ft.onto.wikipedia import ( WikiPage, WikiSection, @@ -60,6 +61,16 @@ WikiCategory, ) +try: + import rdflib +except ImportError as e: + raise ImportError( + create_import_error_msg( + "rbflib", "wikipedia", "DBpedia dataset supports" + ) + ) from e + + __all__ = [ "DBpediaWikiReader", "WikiPackReader", diff --git a/forte/processors/data_augment/algorithms/character_flip_op.py b/forte/processors/data_augment/algorithms/character_flip_op.py index 5bd7fed02..ed29d8ce9 100644 --- a/forte/processors/data_augment/algorithms/character_flip_op.py +++ b/forte/processors/data_augment/algorithms/character_flip_op.py @@ -15,13 +15,14 @@ import random import json from typing import Tuple, Any, Dict, Union -import requests + from forte.data.ontology import Annotation from forte.common.configuration import Config from forte.processors.data_augment.algorithms.single_annotation_op import ( SingleAnnotationAugmentOp, ) +from forte.utils import create_import_error_msg __all__ = ["CharacterFlipOp"] @@ -45,9 +46,18 @@ class CharacterFlipOp(SingleAnnotationAugmentOp): """ def __init__(self, configs: Union[Config, Dict[str, Any]]) -> None: + try: + import requests # pylint: disable=import-outside-toplevel + except ImportError as e: + raise ImportError( + create_import_error_msg( + "requests", "data_aug", "data augment support" + ) + ) from e super().__init__(configs) self.dict_path = self.configs["dict_path"] + try: r = requests.get(self.dict_path) self.data = r.json() diff --git a/forte/processors/data_augment/algorithms/distribution_replacement_op.py b/forte/processors/data_augment/algorithms/distribution_replacement_op.py index 0b6060c40..c820f2d92 100644 --- a/forte/processors/data_augment/algorithms/distribution_replacement_op.py +++ b/forte/processors/data_augment/algorithms/distribution_replacement_op.py @@ -14,7 +14,7 @@ import json import random from typing import Tuple, Union, Dict, Any -import requests + from forte.common.configurable import Configurable from forte.common.configuration import Config from forte.data.ontology import Annotation @@ -22,6 +22,7 @@ SingleAnnotationAugmentOp, ) from forte.utils.utils import create_class_with_kwargs +from forte.utils import create_import_error_msg __all__ = [ "DistributionReplacementOp", @@ -64,6 +65,15 @@ def cofigure_sampler(self) -> None: used by the distribution replacement op. The sampler will be set according to the configuration values """ + try: + import requests # pylint: disable=import-outside-toplevel + except ImportError as e: + raise ImportError( + create_import_error_msg( + "requests", "data_aug", "data augment support" + ) + ) from e + try: if "data_path" in self.configs["sampler_config"]["kwargs"]: distribution_path = self.configs["sampler_config"]["kwargs"][ diff --git a/forte/processors/data_augment/algorithms/typo_replacement_op.py b/forte/processors/data_augment/algorithms/typo_replacement_op.py index ebccf6bef..48fb06aa1 100644 --- a/forte/processors/data_augment/algorithms/typo_replacement_op.py +++ b/forte/processors/data_augment/algorithms/typo_replacement_op.py @@ -17,12 +17,13 @@ import json from typing import Tuple, Union, Dict, Any -import requests + from forte.data.ontology import Annotation from forte.processors.data_augment.algorithms.single_annotation_op import ( SingleAnnotationAugmentOp, ) from forte.common.configuration import Config +from forte.utils import create_import_error_msg __all__ = [ "UniformTypoGenerator", @@ -50,6 +51,15 @@ class UniformTypoGenerator: """ def __init__(self, dict_path: str): + try: + import requests # pylint: disable=import-outside-toplevel + except ImportError as e: + raise ImportError( + create_import_error_msg( + "requests", "data_aug", "data augment support" + ) + ) from e + try: r = requests.get(dict_path) self.data = r.json() diff --git a/forte/processors/misc/remote_processor.py b/forte/processors/misc/remote_processor.py index 42b93360d..36aaa2d44 100644 --- a/forte/processors/misc/remote_processor.py +++ b/forte/processors/misc/remote_processor.py @@ -21,7 +21,6 @@ import json import logging from typing import Dict, Set, Any, Optional -import requests from forte.common import Resources, ProcessorConfigError from forte.common.configuration import Config @@ -29,6 +28,7 @@ from forte.processors.base import PackProcessor from forte.utils import create_import_error_msg + logger = logging.getLogger(__name__) __all__ = ["RemoteProcessor"] @@ -53,6 +53,14 @@ class RemoteProcessor(PackProcessor): def __init__(self): super().__init__() + try: + import requests # pylint: disable=import-outside-toplevel + except ImportError as e: + raise ImportError( + create_import_error_msg( + "requests", "remote", "Remote Processor" + ) + ) from e self._requests: Any = requests self._records: Optional[Dict[str, Set[str]]] = None self._expectation: Optional[Dict[str, Set[str]]] = None diff --git a/forte/processors/third_party/machine_translation_processor.py b/forte/processors/third_party/machine_translation_processor.py index 5b5007d9c..9e3aa2bf0 100644 --- a/forte/processors/third_party/machine_translation_processor.py +++ b/forte/processors/third_party/machine_translation_processor.py @@ -17,15 +17,26 @@ from typing import Dict, Any from urllib.parse import urlencode import os -import requests -from transformers import T5Tokenizer, T5ForConditionalGeneration + from forte.common.configuration import Config from forte.common.resources import Resources from forte.data.data_pack import DataPack from forte.data.multi_pack import MultiPack from forte.processors.base import MultiPackProcessor, PackProcessor +from forte.utils import create_import_error_msg from ft.onto.base_ontology import Document, Utterance +try: + from transformers import ( # pylint:disable=import-outside-toplevel + T5Tokenizer, + T5ForConditionalGeneration, + ) +except ImportError as err: + raise ImportError( + create_import_error_msg( + "transformers", "data_aug", "Machine Translator" + ) + ) from err __all__ = ["MicrosoftBingTranslator", "MachineTranslationProcessor"] @@ -80,6 +91,14 @@ def initialize(self, resources: Resources, configs: Config): self.out_pack_name = configs.out_pack_name def _process(self, input_pack: MultiPack): + try: + import requests # pylint: disable=import-outside-toplevel + except ImportError as e: + raise ImportError( + create_import_error_msg( + "requests", "data_aug", "data augment support" + ) + ) from e query = input_pack.get_pack(self.in_pack_name).text params = "?" + urlencode( { diff --git a/forte/trainer/ner_trainer.py b/forte/trainer/ner_trainer.py index 89cf93da9..57cb947ea 100644 --- a/forte/trainer/ner_trainer.py +++ b/forte/trainer/ner_trainer.py @@ -24,7 +24,7 @@ from typing import List, Tuple, Iterator, Optional, Dict import numpy as np -from tqdm import tqdm + from forte.common.configuration import Config from forte.common.resources import Resources @@ -34,6 +34,14 @@ from forte.utils import create_import_error_msg from ft.onto.base_ontology import Token, Sentence + +try: + from tqdm import tqdm +except ImportError as e: + raise ImportError( + create_import_error_msg("tqdm", "models", "models") + ) from e + try: import torch from torch.optim import SGD diff --git a/setup.py b/setup.py index f156772ab..326e9f98f 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,9 @@ sys.exit("Python>=3.6 is required by Forte.") version = {} -with open(os.path.join( - os.path.dirname(os.path.abspath(__file__)), "forte/version.py" -)) as fp: +with open( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "forte/version.py") +) as fp: exec(fp.read(), version) setuptools.setup( @@ -50,12 +50,10 @@ "transformers>=4.15.0", "nltk", "texar-pytorch>=0.1.4", + "requests", ], "ir": ["texar-pytorch>=0.1.4", "tensorflow>=1.15.0"], - "remote": [ - "fastapi>=0.65.2", - "uvicorn>=0.14.0", - ], + "remote": ["fastapi>=0.65.2", "uvicorn>=0.14.0", "requests"], "audio_ext": ["soundfile>=0.10.3"], "stave": ["stave>=0.0.1.dev12"], "models": [ diff --git a/tests/forte/forte_backbone_test.py b/tests/forte/forte_backbone_test.py index 11ab5496f..bc5a4512a 100644 --- a/tests/forte/forte_backbone_test.py +++ b/tests/forte/forte_backbone_test.py @@ -2,56 +2,157 @@ Test cases to ensure native Forte code can be imported with only backbone Forte library installed (without any extra imports). -Current nondepency packages: -- texar """ -import os -import unittest - - -class ForteImportTest(unittest.TestCase): - def test_basic_import(self): - from forte.data import DataPack - from forte.data import DataStore - from forte.data import SinglePackSelector - from forte.data import BaseIndex - - def test_import_data(self): - from forte.data.readers import TerminalReader - from forte.datasets.mrc.squad_reader import SquadReader - - def test_import_processors(self): - from forte.processors.writers import ( - PackIdJsonPackWriter, - ) - from forte.processors.nlp import ( - ElizaProcessor, - ) - from forte.processors.misc import ( - AnnotationRemover, - ) - from forte.processors.base import ( - BaseProcessor, - ) - from forte.processors.data_augment import ( - BaseDataAugmentProcessor, - ) - from forte.processors.ir.search_processor import ( - SearchProcessor, - ) - - def test_import_evaluator(self): - from forte.evaluation.ner_evaluator import ( - CoNLLNEREvaluator, - ) - from forte.evaluation.base import Evaluator - - def test_import_trainer(self): - from forte.trainer.base import BaseTrainer - - def test_import_forte_modules(self): - from forte.pipeline_component import PipelineComponent - from forte import Pipeline - from forte.process_job import ProcessJob, ProcessJobStatus - from forte.process_manager import ProcessManager - from forte.train_pipeline import TrainPipeline + + +def test_basic_import(): + from forte.data import DataPack + from forte.data import DataStore + from forte.data import SinglePackSelector + from forte.data import BaseIndex + + +def test_import_data(): + from forte.data.readers import ( + AGNewsReader, + ClassificationDatasetReader, + CoNLL03Reader, + ConllUDReader, + RawDataDeserializeReader, + RecursiveDirectoryDeserializeReader, + DirPackReader, + MultiPackDirectoryReader, + MultiPackDeserializerBase, + SinglePackReader, + HTMLReader, + LargeMovieReader, + TerminalReader, + RawPackReader, + RawMultiPackReader, + MSMarcoPassageReader, + MultiPackSentenceReader, + MultiPackTerminalReader, + OntonotesReader, + OpenIEReader, + PlainTextReader, + ProdigyReader, + RACEMultiChoiceQAReader, + SemEvalTask8Reader, + SST2Reader, + StringReader, + ) + from forte.datasets.mrc.squad_reader import SquadReader + + +def test_import_processors(): + from forte.processors.writers import ( + PackIdJsonPackWriter, + ) + from forte.processors.nlp import ( + ElizaProcessor, + ) + from forte.processors.misc import ( + AnnotationRemover, + AttributeMasker, + DeleteOverlapEntry, + LowerCaserProcessor, + PeriodSentenceSplitter, + WhiteSpaceTokenizer, + Alphabet, + VocabularyProcessor, + ) + from forte.processors.base import ( + BaseProcessor, + ) + from forte.processors.data_augment import ( + BaseDataAugmentProcessor, + ) + from forte.processors.ir.search_processor import ( + SearchProcessor, + ) + + +def test_import_evaluator(): + from forte.evaluation.ner_evaluator import ( + CoNLLNEREvaluator, + ) + from forte.evaluation.base import Evaluator + + +def test_import_trainer(): + from forte.trainer.base import BaseTrainer + + +def test_import_forte_modules(): + from forte.pipeline_component import PipelineComponent + from forte import Pipeline + from forte.process_job import ProcessJob, ProcessJobStatus + from forte.process_manager import ProcessManager + from forte.train_pipeline import TrainPipeline + + +def test_import_base_data_aug(): + from forte.processors.data_augment import ( + BaseDataAugmentProcessor, + ReplacementDataAugmentProcessor, + ) + from forte.processors.data_augment.base_op_processor import ( + BaseOpProcessor, + ) + + from forte.processors.data_augment.algorithms.back_translation_op import ( + BackTranslationOp, + ) + + from forte.processors.data_augment.algorithms.back_translation_op import ( + BackTranslationOp, + ) + from forte.processors.data_augment.algorithms.base_data_augmentation_op import ( + BaseDataAugmentationOp, + ) + from forte.processors.data_augment.algorithms.character_flip_op import ( + CharacterFlipOp, + ) + from forte.processors.data_augment.algorithms.dictionary_replacement_op import ( + DictionaryReplacementOp, + ) + from forte.processors.data_augment.algorithms.dictionary import ( + Dictionary, + ) + from forte.processors.data_augment.algorithms.distribution_replacement_op import ( + DistributionReplacementOp, + ) + from forte.processors.data_augment.algorithms.eda_ops import ( + RandomSwapDataAugmentOp, + RandomInsertionDataAugmentOp, + RandomDeletionDataAugmentOp, + ) + from forte.processors.data_augment.algorithms.sampler import ( + Sampler, + UniformSampler, + UnigramSampler, + ) + + from forte.processors.data_augment.algorithms.single_annotation_op import ( + SingleAnnotationAugmentOp, + ) + from forte.processors.data_augment.algorithms.text_replacement_op import ( + TextReplacementOp, + ) + from forte.processors.data_augment.algorithms.typo_replacement_op import ( + UniformTypoGenerator, + TypoReplacementOp, + ) + from forte.processors.data_augment.algorithms.word_splitting_op import ( + RandomWordSplitDataAugmentOp, + ) + + +if __name__ == "__main__": + test_basic_import() + test_import_data() + test_import_processors() + test_import_evaluator() + test_import_trainer() + test_import_forte_modules() + test_import_base_data_aug()