From ea8cd01467948d0d313884914cea85a0ba139876 Mon Sep 17 00:00:00 2001 From: panxuchen Date: Wed, 17 Apr 2024 20:04:11 +0800 Subject: [PATCH] auto generate sphinx doc --- .github/workflows/deploy_spinx_docs.yml | 28 +- data_juicer/analysis/__init__.py | 7 + data_juicer/config/__init__.py | 8 +- data_juicer/core/__init__.py | 8 + data_juicer/format/__init__.py | 13 + data_juicer/ops/__init__.py | 8 + data_juicer/ops/common/__init__.py | 11 + data_juicer/ops/common/helper_func.py | 4 +- data_juicer/ops/deduplicator/__init__.py | 15 + .../deduplicator/ray_document_deduplicator.py | 2 +- data_juicer/ops/filter/__init__.py | 86 +++++ data_juicer/ops/mapper/__init__.py | 93 +++++ .../ops/mapper/image_face_blur_mapper.py | 2 +- .../ops/mapper/video_face_blur_mapper.py | 2 +- data_juicer/ops/selector/__init__.py | 4 + docs/sphinx_doc/_templates/module.rst_t | 9 - docs/sphinx_doc/_templates/package.rst_t | 31 +- docs/sphinx_doc/_templates/toc.rst_t | 8 - docs/sphinx_doc/source/conf.py | 14 + .../source/data_juicer.analysis.rst | 52 --- docs/sphinx_doc/source/data_juicer.config.rst | 11 - docs/sphinx_doc/source/data_juicer.core.rst | 51 --- docs/sphinx_doc/source/data_juicer.format.rst | 67 ---- .../source/data_juicer.ops.common.rst | 19 - .../source/data_juicer.ops.deduplicator.rst | 43 --- .../source/data_juicer.ops.filter.rst | 330 ---------------- .../source/data_juicer.ops.mapper.rst | 363 ------------------ docs/sphinx_doc/source/data_juicer.ops.rst | 36 -- .../source/data_juicer.ops.selector.rst | 19 - docs/sphinx_doc/source/data_juicer.rst | 13 - docs/sphinx_doc/source/data_juicer.tools.rst | 2 - docs/sphinx_doc/source/data_juicer.utils.rst | 107 ------ docs/sphinx_doc/source/index.rst | 15 +- docs/sphinx_doc/source/modules.rst | 7 - 34 files changed, 303 insertions(+), 1185 deletions(-) delete mode 100644 docs/sphinx_doc/_templates/module.rst_t delete mode 100644 docs/sphinx_doc/_templates/toc.rst_t delete mode 100644 docs/sphinx_doc/source/data_juicer.analysis.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.config.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.core.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.format.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.ops.common.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.ops.deduplicator.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.ops.filter.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.ops.mapper.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.ops.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.ops.selector.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.tools.rst delete mode 100644 docs/sphinx_doc/source/data_juicer.utils.rst delete mode 100644 docs/sphinx_doc/source/modules.rst diff --git a/.github/workflows/deploy_spinx_docs.yml b/.github/workflows/deploy_spinx_docs.yml index 27cff9c39..16c75a338 100644 --- a/.github/workflows/deploy_spinx_docs.yml +++ b/.github/workflows/deploy_spinx_docs.yml @@ -1,9 +1,13 @@ name: Deploy Sphinx documentation to Pages on: - release: - types: [published] - workflow_dispatch: + pull_request: + types: [opened, synchronize] + paths: + - 'docs/sphinx_doc/**/*' + push: + branches: + - main jobs: pages: @@ -19,14 +23,18 @@ jobs: run: | python -m pip install --upgrade pip pip install -v -e .[dev] - - id: deployment - uses: sphinx-notes/pages@v3 + - id: build + name: Build Documentation + run: | + cd docs/sphinx_doc + ./build_sphinx_doc.sh + - name: Upload Documentation + uses: actions/upload-artifact@v3 with: - documentation_path: ./docs/sphinx_doc/source - python_version: ${{ matrix.python-version }} - publish: false - requirements_path: ./environments/dev_requires.txt + name: SphinxDoc + path: 'docs/sphinx_doc/build' - uses: peaceiris/actions-gh-pages@v3 + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ${{ steps.deployment.outputs.artifact }} + publish_dir: 'docs/sphinx_doc/build/html' diff --git a/data_juicer/analysis/__init__.py b/data_juicer/analysis/__init__.py index 78db975a1..e4ae41aa8 100644 --- a/data_juicer/analysis/__init__.py +++ b/data_juicer/analysis/__init__.py @@ -1,2 +1,9 @@ from .column_wise_analysis import ColumnWiseAnalysis +from .diversity_analysis import DiversityAnalysis from .overall_analysis import OverallAnalysis + +__all__ = [ + 'ColumnWiseAnalysis', + 'DiversityAnalysis', + 'OverallAnalysis', +] diff --git a/data_juicer/config/__init__.py b/data_juicer/config/__init__.py index 853722dec..b33c6e755 100644 --- a/data_juicer/config/__init__.py +++ b/data_juicer/config/__init__.py @@ -1 +1,7 @@ -from .config import * # noqa: F401,F403 +from .config import export_config, init_configs, merge_config + +__all__ = [ + 'init_configs', + 'export_config', + 'merge_config', +] diff --git a/data_juicer/core/__init__.py b/data_juicer/core/__init__.py index cf712d21a..28a8c6d39 100644 --- a/data_juicer/core/__init__.py +++ b/data_juicer/core/__init__.py @@ -3,3 +3,11 @@ from .executor import Executor from .exporter import Exporter from .tracer import Tracer + +__all__ = [ + 'Analyser', + 'NestedDataset', + 'Executor', + 'Exporter', + 'Tracer', +] diff --git a/data_juicer/format/__init__.py b/data_juicer/format/__init__.py index cd2e10de0..e25ec9921 100644 --- a/data_juicer/format/__init__.py +++ b/data_juicer/format/__init__.py @@ -1,3 +1,16 @@ from . import (csv_formatter, json_formatter, mixture_formatter, parquet_formatter, text_formatter, tsv_formatter) +from .csv_formatter import CsvFormatter +from .formatter import LocalFormatter, RemoteFormatter +from .json_formatter import JsonFormatter from .load import load_formatter +from .mixture_formatter import MixtureFormatter +from .parquet_formatter import ParquetFormatter +from .text_formatter import TextFormatter +from .tsv_formatter import TsvFormatter + +__all__ = [ + 'load_formatter', 'JsonFormatter', 'LocalFormatter', 'RemoteFormatter', + 'TextFormatter', 'ParquetFormatter', 'CsvFormatter', 'TsvFormatter', + 'MixtureFormatter' +] diff --git a/data_juicer/ops/__init__.py b/data_juicer/ops/__init__.py index c35fc22bb..ae8256850 100644 --- a/data_juicer/ops/__init__.py +++ b/data_juicer/ops/__init__.py @@ -1,3 +1,11 @@ from . import deduplicator, filter, mapper, selector from .base_op import OPERATORS, Deduplicator, Filter, Mapper, Selector from .load import load_ops + +__all__ = [ + 'load_ops', + 'Filter', + 'Mapper', + 'Deduplicator', + 'Selector', +] diff --git a/data_juicer/ops/common/__init__.py b/data_juicer/ops/common/__init__.py index 1218b9b12..74e8dd33d 100644 --- a/data_juicer/ops/common/__init__.py +++ b/data_juicer/ops/common/__init__.py @@ -3,3 +3,14 @@ split_on_newline_tab_whitespace, split_on_whitespace, strip, words_augmentation, words_refinement) from .special_characters import SPECIAL_CHARACTERS + +__all__ = [ + 'get_sentences_from_document', + 'get_words_from_document', + 'merge_on_whitespace_tab_newline', + 'split_on_newline_tab_whitespace', + 'split_on_whitespace', + 'strip', + 'words_augmentation', + 'words_refinement', +] diff --git a/data_juicer/ops/common/helper_func.py b/data_juicer/ops/common/helper_func.py index c8a29bf8a..58e43d36f 100644 --- a/data_juicer/ops/common/helper_func.py +++ b/data_juicer/ops/common/helper_func.py @@ -134,8 +134,8 @@ def get_words_from_document( :param document: document that need to split words. :param token_func: function of tokenizer, if specified, the function - will be used for split document into different tokens. - :param new_line: whether to use `\\\\n' to split words. + will be used for split document into different tokens. + :param new_line: whether to use '\\\\n' to split words. :param tab: whether to use '\\\\t' to split words. :return: word list obtained from document """ diff --git a/data_juicer/ops/deduplicator/__init__.py b/data_juicer/ops/deduplicator/__init__.py index b95e91a80..69f73b361 100644 --- a/data_juicer/ops/deduplicator/__init__.py +++ b/data_juicer/ops/deduplicator/__init__.py @@ -2,3 +2,18 @@ document_simhash_deduplicator, image_deduplicator, ray_document_deduplicator, ray_image_deduplicator, ray_video_deduplicator, video_deduplicator) +from .document_deduplicator import DocumentDeduplicator +from .document_minhash_deduplicator import DocumentMinhashDeduplicator +from .document_simhash_deduplicator import DocumentSimhashDeduplicator +from .image_deduplicator import ImageDeduplicator +from .ray_basic_deduplicator import RayBasicDeduplicator +from .ray_document_deduplicator import RayDocumentDeduplicator +from .ray_image_deduplicator import RayImageDeduplicator +from .ray_video_deduplicator import RayVideoDeduplicator +from .video_deduplicator import VideoDeduplicator + +__all__ = [ + 'VideoDeduplicator', 'RayBasicDeduplicator', 'DocumentMinhashDeduplicator', + 'RayImageDeduplicator', 'RayDocumentDeduplicator', 'DocumentDeduplicator', + 'ImageDeduplicator', 'DocumentSimhashDeduplicator', 'RayVideoDeduplicator' +] diff --git a/data_juicer/ops/deduplicator/ray_document_deduplicator.py b/data_juicer/ops/deduplicator/ray_document_deduplicator.py index 9f8d6cd91..e12eb149f 100644 --- a/data_juicer/ops/deduplicator/ray_document_deduplicator.py +++ b/data_juicer/ops/deduplicator/ray_document_deduplicator.py @@ -29,7 +29,7 @@ def __init__(self, :param redis_port: the port of redis server :param lowercase: Whether to convert sample text to lower case :param ignore_non_character: Whether to ignore non-alphabet - characters, including whitespaces, digits, and punctuations + characters, including whitespaces, digits, and punctuations :param args: extra args :param kwargs: extra args. """ diff --git a/data_juicer/ops/filter/__init__.py b/data_juicer/ops/filter/__init__.py index ad6718268..056da04cd 100644 --- a/data_juicer/ops/filter/__init__.py +++ b/data_juicer/ops/filter/__init__.py @@ -18,5 +18,91 @@ video_nsfw_filter, video_ocr_area_ratio_filter, video_resolution_filter, video_tagging_from_frames_filter, video_watermark_filter, word_num_filter, word_repetition_filter) +from .alphanumeric_filter import AlphanumericFilter +from .audio_duration_filter import AudioDurationFilter +from .audio_nmf_snr_filter import AudioNMFSNRFilter +from .audio_size_filter import AudioSizeFilter +from .average_line_length_filter import AverageLineLengthFilter +from .character_repetition_filter import CharacterRepetitionFilter +from .flagged_words_filter import FlaggedWordFilter +from .image_aesthetics_filter import ImageAestheticsFilter +from .image_aspect_ratio_filter import ImageAspectRatioFilter +from .image_face_ratio_filter import ImageFaceRatioFilter +from .image_nsfw_filter import ImageNSFWFilter +from .image_shape_filter import ImageShapeFilter +from .image_size_filter import ImageSizeFilter +from .image_text_matching_filter import ImageTextMatchingFilter +from .image_text_similarity_filter import ImageTextSimilarityFilter +from .image_watermark_filter import ImageWatermarkFilter +from .language_id_score_filter import LanguageIDScoreFilter +from .maximum_line_length_filter import MaximumLineLengthFilter +from .perplexity_filter import PerplexityFilter +from .phrase_grounding_recall_filter import PhraseGroundingRecallFilter +from .special_characters_filter import SpecialCharactersFilter +from .specified_field_filter import SpecifiedFieldFilter +from .specified_numeric_field_filter import SpecifiedNumericFieldFilter +from .stopwords_filter import StopWordsFilter +from .suffix_filter import SuffixFilter +from .text_action_filter import TextActionFilter +from .text_entity_dependency_filter import TextEntityDependencyFilter +from .text_length_filter import TextLengthFilter +from .token_num_filter import TokenNumFilter +from .video_aesthetics_filter import VideoAestheticsFilter +from .video_aspect_ratio_filter import VideoAspectRatioFilter +from .video_duration_filter import VideoDurationFilter +from .video_frames_text_similarity_filter import \ + VideoFramesTextSimilarityFilter +from .video_motion_score_filter import VideoMotionScoreFilter +from .video_nsfw_filter import VideoNSFWFilter +from .video_ocr_area_ratio_filter import VideoOcrAreaRatioFilter +from .video_resolution_filter import VideoResolutionFilter +from .video_tagging_from_frames_filter import VideoTaggingFromFramesFilter +from .video_watermark_filter import VideoWatermarkFilter +from .word_num_filter import WordNumFilter +from .word_repetition_filter import WordRepetitionFilter + +__all__ = [ + 'ImageTextSimilarityFilter', + 'VideoAspectRatioFilter', + 'ImageTextMatchingFilter', + 'ImageNSFWFilter', + 'TokenNumFilter', + 'TextLengthFilter', + 'SpecifiedNumericFieldFilter', + 'AudioNMFSNRFilter', + 'VideoAestheticsFilter', + 'PerplexityFilter', + 'PhraseGroundingRecallFilter', + 'MaximumLineLengthFilter', + 'AverageLineLengthFilter', + 'SpecifiedFieldFilter', + 'VideoTaggingFromFramesFilter', + 'TextEntityDependencyFilter', + 'VideoResolutionFilter', + 'AlphanumericFilter', + 'ImageWatermarkFilter', + 'ImageAestheticsFilter', + 'AudioSizeFilter', + 'StopWordsFilter', + 'CharacterRepetitionFilter', + 'ImageShapeFilter', + 'VideoDurationFilter', + 'TextActionFilter', + 'VideoOcrAreaRatioFilter', + 'VideoNSFWFilter', + 'SpecialCharactersFilter', + 'VideoFramesTextSimilarityFilter', + 'ImageAspectRatioFilter', + 'AudioDurationFilter', + 'LanguageIDScoreFilter', + 'SuffixFilter', + 'ImageSizeFilter', + 'VideoWatermarkFilter', + 'WordNumFilter', + 'ImageFaceRatioFilter', + 'FlaggedWordFilter', + 'WordRepetitionFilter', + 'VideoMotionScoreFilter', +] # yapf: enable diff --git a/data_juicer/ops/mapper/__init__.py b/data_juicer/ops/mapper/__init__.py index 3166c5aae..90fc4898b 100644 --- a/data_juicer/ops/mapper/__init__.py +++ b/data_juicer/ops/mapper/__init__.py @@ -24,5 +24,98 @@ video_tagging_from_audio_mapper, video_tagging_from_frames_mapper, whitespace_normalization_mapper) +from .audio_ffmpeg_wrapped_mapper import AudioFFmpegWrappedMapper +from .chinese_convert_mapper import ChineseConvertMapper +from .clean_copyright_mapper import CleanCopyrightMapper +from .clean_email_mapper import CleanEmailMapper +from .clean_html_mapper import CleanHtmlMapper +from .clean_ip_mapper import CleanIpMapper +from .clean_links_mapper import CleanLinksMapper +from .expand_macro_mapper import ExpandMacroMapper +from .fix_unicode_mapper import FixUnicodeMapper +from .image_blur_mapper import ImageBlurMapper +from .image_captioning_from_gpt4v_mapper import ImageCaptioningFromGPT4VMapper +from .image_captioning_mapper import ImageCaptioningMapper +from .image_diffusion_mapper import ImageDiffusionMapper +from .image_face_blur_mapper import ImageFaceBlurMapper +from .nlpaug_en_mapper import NlpaugEnMapper +from .nlpcda_zh_mapper import NlpcdaZhMapper +from .punctuation_normalization_mapper import PunctuationNormalizationMapper +from .remove_bibliography_mapper import RemoveBibliographyMapper +from .remove_comments_mapper import RemoveCommentsMapper +from .remove_header_mapper import RemoveHeaderMapper +from .remove_long_words_mapper import RemoveLongWordsMapper +from .remove_non_chinese_character_mapper import \ + RemoveNonChineseCharacterlMapper +from .remove_repeat_sentences_mapper import RemoveRepeatSentencesMapper +from .remove_specific_chars_mapper import RemoveSpecificCharsMapper +from .remove_table_text_mapper import RemoveTableTextMapper +from .remove_words_with_incorrect_substrings_mapper import \ + RemoveWordsWithIncorrectSubstringsMapper +from .replace_content_mapper import ReplaceContentMapper +from .sentence_split_mapper import SentenceSplitMapper +from .video_captioning_from_audio_mapper import VideoCaptioningFromAudioMapper +from .video_captioning_from_frames_mapper import \ + VideoCaptioningFromFramesMapper +from .video_captioning_from_summarizer_mapper import \ + VideoCaptioningFromSummarizerMapper +from .video_captioning_from_video_mapper import VideoCaptioningFromVideoMapper +from .video_face_blur_mapper import VideoFaceBlurMapper +from .video_ffmpeg_wrapped_mapper import VideoFFmpegWrappedMapper +from .video_remove_watermark_mapper import VideoRemoveWatermarkMapper +from .video_resize_aspect_ratio_mapper import VideoResizeAspectRatioMapper +from .video_resize_resolution_mapper import VideoResizeResolutionMapper +from .video_split_by_duration_mapper import VideoSplitByDurationMapper +from .video_split_by_key_frame_mapper import VideoSplitByKeyFrameMapper +from .video_split_by_scene_mapper import VideoSplitBySceneMapper +from .video_tagging_from_audio_mapper import VideoTaggingFromAudioMapper +from .video_tagging_from_frames_mapper import VideoTaggingFromFramesMapper +from .whitespace_normalization_mapper import WhitespaceNormalizationMapper + +__all__ = [ + 'VideoCaptioningFromAudioMapper', + 'VideoTaggingFromAudioMapper', + 'ImageCaptioningFromGPT4VMapper', + 'PunctuationNormalizationMapper', + 'RemoveBibliographyMapper', + 'SentenceSplitMapper', + 'VideoSplitBySceneMapper', + 'CleanIpMapper', + 'CleanLinksMapper', + 'RemoveHeaderMapper', + 'RemoveTableTextMapper', + 'VideoRemoveWatermarkMapper', + 'RemoveRepeatSentencesMapper', + 'ImageDiffusionMapper', + 'ImageFaceBlurMapper', + 'VideoFFmpegWrappedMapper', + 'ChineseConvertMapper', + 'NlpcdaZhMapper', + 'ImageBlurMapper', + 'CleanCopyrightMapper', + 'RemoveNonChineseCharacterlMapper', + 'VideoSplitByKeyFrameMapper', + 'RemoveSpecificCharsMapper', + 'VideoResizeAspectRatioMapper', + 'CleanHtmlMapper', + 'WhitespaceNormalizationMapper', + 'VideoTaggingFromFramesMapper', + 'RemoveCommentsMapper', + 'ExpandMacroMapper', + 'ImageCaptioningMapper', + 'RemoveWordsWithIncorrectSubstringsMapper', + 'VideoCaptioningFromVideoMapper', + 'VideoCaptioningFromSummarizerMapper', + 'FixUnicodeMapper', + 'NlpaugEnMapper', + 'VideoCaptioningFromFramesMapper', + 'RemoveLongWordsMapper', + 'VideoResizeResolutionMapper', + 'CleanEmailMapper', + 'ReplaceContentMapper', + 'AudioFFmpegWrappedMapper', + 'VideoSplitByDurationMapper', + 'VideoFaceBlurMapper', +] # yapf: enable diff --git a/data_juicer/ops/mapper/image_face_blur_mapper.py b/data_juicer/ops/mapper/image_face_blur_mapper.py index e4ec8f4a6..8ba01b61b 100644 --- a/data_juicer/ops/mapper/image_face_blur_mapper.py +++ b/data_juicer/ops/mapper/image_face_blur_mapper.py @@ -33,7 +33,7 @@ def __init__(self, Initialization method. :param blur_type: Type of blur kernel, including - ['mean', 'box', 'gaussian']. + ['mean', 'box', 'gaussian']. :param radius: Radius of blur kernel. :param args: extra args :param kwargs: extra args diff --git a/data_juicer/ops/mapper/video_face_blur_mapper.py b/data_juicer/ops/mapper/video_face_blur_mapper.py index 17a3f6d54..a3abb233e 100644 --- a/data_juicer/ops/mapper/video_face_blur_mapper.py +++ b/data_juicer/ops/mapper/video_face_blur_mapper.py @@ -32,7 +32,7 @@ def __init__(self, Initialization method. :param blur_type: Type of blur kernel, including - ['mean', 'box', 'gaussian']. + ['mean', 'box', 'gaussian']. :param radius: Radius of blur kernel. :param args: extra args :param kwargs: extra args diff --git a/data_juicer/ops/selector/__init__.py b/data_juicer/ops/selector/__init__.py index cf0977321..c37998a9a 100644 --- a/data_juicer/ops/selector/__init__.py +++ b/data_juicer/ops/selector/__init__.py @@ -1 +1,5 @@ from . import frequency_specified_field_selector, topk_specified_field_selector +from .frequency_specified_field_selector import FrequencySpecifiedFieldSelector +from .topk_specified_field_selector import TopkSpecifiedFieldSelector + +__all__ = ['FrequencySpecifiedFieldSelector', 'TopkSpecifiedFieldSelector'] diff --git a/docs/sphinx_doc/_templates/module.rst_t b/docs/sphinx_doc/_templates/module.rst_t deleted file mode 100644 index 249027855..000000000 --- a/docs/sphinx_doc/_templates/module.rst_t +++ /dev/null @@ -1,9 +0,0 @@ -{%- if show_headings %} -{{- [basename, "module"] | join(' ') | e | heading }} - -{% endif -%} -.. automodule:: {{ qualname }} -{%- for option in automodule_options %} - :{{ option }}: -{%- endfor %} - diff --git a/docs/sphinx_doc/_templates/package.rst_t b/docs/sphinx_doc/_templates/package.rst_t index cae4ac115..2951c5530 100644 --- a/docs/sphinx_doc/_templates/package.rst_t +++ b/docs/sphinx_doc/_templates/package.rst_t @@ -5,6 +5,8 @@ {%- endfor %} {%- endmacro %} +{{- pkgname | heading }} + {%- macro toctree(docnames) -%} .. toctree:: :maxdepth: {{ maxdepth }} @@ -13,33 +15,4 @@ {%- endfor %} {%- endmacro %} -{%- if is_namespace %} -{{- [pkgname, "namespace"] | join(" ") | e | heading }} -{% else %} -{{- pkgname | e | heading }} -{% endif %} - -{%- if is_namespace %} -.. py:module:: {{ pkgname }} -{% endif %} - -{%- if modulefirst and not is_namespace %} {{ automodule(pkgname, automodule_options) }} -{% endif %} - -{%- if subpackages %} -{{ toctree(subpackages) }} -{% endif %} - -{%- if submodules %} -{% if separatemodules %} -{{ toctree(submodules) }} -{% else %} -{%- for submodule in submodules %} -{% if show_headings %} -{{- submodule | e | heading(2) }} -{% endif %} -{{ automodule(submodule, automodule_options) }} -{% endfor %} -{%- endif %} -{%- endif %} diff --git a/docs/sphinx_doc/_templates/toc.rst_t b/docs/sphinx_doc/_templates/toc.rst_t deleted file mode 100644 index f0877eeb2..000000000 --- a/docs/sphinx_doc/_templates/toc.rst_t +++ /dev/null @@ -1,8 +0,0 @@ -{{ header | heading }} - -.. toctree:: - :maxdepth: {{ maxdepth }} -{% for docname in docnames %} - {{ docname }} -{%- endfor %} - diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py index 2d91a83b0..8b5921558 100644 --- a/docs/sphinx_doc/source/conf.py +++ b/docs/sphinx_doc/source/conf.py @@ -24,10 +24,20 @@ extensions = [ 'sphinx.ext.autodoc', + "sphinx.ext.autosummary", 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', + "sphinx.ext.autosectionlabel", ] +# Prefix document path to section labels, otherwise autogenerated labels would +# look like 'heading' rather than 'path/to/file:heading' +autosectionlabel_prefix_document = True +autosummary_generate = True +autosummary_ignore_module_all = False + +autodoc_member_order = "bysource" + templates_path = ['_templates'] exclude_patterns = ['build'] @@ -36,6 +46,10 @@ html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_theme_options = { + "navigation_depth": 2, +} + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". diff --git a/docs/sphinx_doc/source/data_juicer.analysis.rst b/docs/sphinx_doc/source/data_juicer.analysis.rst deleted file mode 100644 index 2a053700f..000000000 --- a/docs/sphinx_doc/source/data_juicer.analysis.rst +++ /dev/null @@ -1,52 +0,0 @@ -data\_juicer.analysis -============================= - - - -data\_juicer.analysis.collector --------------------------------------- - -.. automodule:: data_juicer.analysis.collector - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.analysis.column\_wise\_analysis ---------------------------------------------------- - -.. automodule:: data_juicer.analysis.column_wise_analysis - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.analysis.diversity\_analysis ------------------------------------------------- - -.. automodule:: data_juicer.analysis.diversity_analysis - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.analysis.draw ---------------------------------- - -.. automodule:: data_juicer.analysis.draw - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.analysis.measure ------------------------------------- - -.. automodule:: data_juicer.analysis.measure - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.analysis.overall\_analysis ----------------------------------------------- - -.. automodule:: data_juicer.analysis.overall_analysis - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.config.rst b/docs/sphinx_doc/source/data_juicer.config.rst deleted file mode 100644 index c77412dc4..000000000 --- a/docs/sphinx_doc/source/data_juicer.config.rst +++ /dev/null @@ -1,11 +0,0 @@ -data\_juicer.config -=========================== - - -data\_juicer.config.config ---------------------------------- - -.. automodule:: data_juicer.config.config - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.core.rst b/docs/sphinx_doc/source/data_juicer.core.rst deleted file mode 100644 index 4d2ea7df6..000000000 --- a/docs/sphinx_doc/source/data_juicer.core.rst +++ /dev/null @@ -1,51 +0,0 @@ -data\_juicer.core -========================= - - -data\_juicer.core.analyser ---------------------------------- - -.. automodule:: data_juicer.core.analyser - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.core.data ------------------------------ - -.. automodule:: data_juicer.core.data - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.core.executor ---------------------------------- - -.. automodule:: data_juicer.core.executor - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.core.exporter ---------------------------------- - -.. automodule:: data_juicer.core.exporter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.core.ray\_executor --------------------------------------- - -.. automodule:: data_juicer.core.ray_executor - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.core.tracer -------------------------------- - -.. automodule:: data_juicer.core.tracer - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.format.rst b/docs/sphinx_doc/source/data_juicer.format.rst deleted file mode 100644 index 837bac07e..000000000 --- a/docs/sphinx_doc/source/data_juicer.format.rst +++ /dev/null @@ -1,67 +0,0 @@ -data\_juicer.format -=========================== - - -data\_juicer.format.csv\_formatter ------------------------------------------ - -.. automodule:: data_juicer.format.csv_formatter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.formatter ------------------------------------- - -.. automodule:: data_juicer.format.formatter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.json\_formatter ------------------------------------------- - -.. automodule:: data_juicer.format.json_formatter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.load -------------------------------- - -.. automodule:: data_juicer.format.load - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.mixture\_formatter ---------------------------------------------- - -.. automodule:: data_juicer.format.mixture_formatter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.parquet\_formatter ---------------------------------------------- - -.. automodule:: data_juicer.format.parquet_formatter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.text\_formatter ------------------------------------------- - -.. automodule:: data_juicer.format.text_formatter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.format.tsv\_formatter ------------------------------------------ - -.. automodule:: data_juicer.format.tsv_formatter - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.ops.common.rst b/docs/sphinx_doc/source/data_juicer.ops.common.rst deleted file mode 100644 index db392dd3d..000000000 --- a/docs/sphinx_doc/source/data_juicer.ops.common.rst +++ /dev/null @@ -1,19 +0,0 @@ -data\_juicer.ops.common -======================= - - -data\_juicer.ops.common.helper\_func -------------------------------------------- - -.. automodule:: data_juicer.ops.common.helper_func - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.common.special\_characters --------------------------------------------------- - -.. automodule:: data_juicer.ops.common.special_characters - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.ops.deduplicator.rst b/docs/sphinx_doc/source/data_juicer.ops.deduplicator.rst deleted file mode 100644 index d4fa76e5f..000000000 --- a/docs/sphinx_doc/source/data_juicer.ops.deduplicator.rst +++ /dev/null @@ -1,43 +0,0 @@ -data\_juicer.ops.deduplicator -===================================== - - -data\_juicer.ops.deduplicator.document\_deduplicator ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.deduplicator.document_deduplicator - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.deduplicator.document\_minhash\_deduplicator --------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.deduplicator.document_minhash_deduplicator - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.deduplicator.document\_simhash\_deduplicator --------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.deduplicator.document_simhash_deduplicator - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.deduplicator.image\_deduplicator --------------------------------------------------------- - -.. automodule:: data_juicer.ops.deduplicator.image_deduplicator - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.deduplicator.video\_deduplicator --------------------------------------------------------- - -.. automodule:: data_juicer.ops.deduplicator.video_deduplicator - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.ops.filter.rst b/docs/sphinx_doc/source/data_juicer.ops.filter.rst deleted file mode 100644 index 1817861f7..000000000 --- a/docs/sphinx_doc/source/data_juicer.ops.filter.rst +++ /dev/null @@ -1,330 +0,0 @@ -data\_juicer.ops.filter -=============================== - -data\_juicer.ops.filter.alphanumeric\_filter ---------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.alphanumeric_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.audio\_duration\_filter ------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.audio_duration_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.audio\_nmf\_snr\_filter ------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.audio_nmf_snr_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.audio\_size\_filter --------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.audio_size_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.average\_line\_length\_filter ------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.average_line_length_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.character\_repetition\_filter ------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.character_repetition_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.flagged\_words\_filter ------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.flagged_words_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_aesthetics\_filter --------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.image_aesthetics_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_aspect\_ratio\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.image_aspect_ratio_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_face\_ratio\_filter -------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.image_face_ratio_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_nsfw\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.image_nsfw_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_shape\_filter ---------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.image_shape_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_size\_filter --------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.image_size_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_text\_matching\_filter ------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.image_text_matching_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_text\_similarity\_filter --------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.image_text_similarity_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.image\_watermark\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.image_watermark_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.language\_id\_score\_filter ----------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.language_id_score_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.maximum\_line\_length\_filter ------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.maximum_line_length_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.perplexity\_filter -------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.perplexity_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.phrase\_grounding\_recall\_filter ----------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.phrase_grounding_recall_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.special\_characters\_filter ----------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.special_characters_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.specified\_field\_filter -------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.specified_field_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.specified\_numeric\_field\_filter ----------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.specified_numeric_field_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.stopwords\_filter ------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.stopwords_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.suffix\_filter ---------------------------------------------- - -.. automodule:: data_juicer.ops.filter.suffix_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.text\_action\_filter ---------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.text_action_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.text\_entity\_dependency\_filter ---------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.text_entity_dependency_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.text\_length\_filter ---------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.text_length_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.token\_num\_filter -------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.token_num_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_aesthetics\_filter --------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.video_aesthetics_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_aspect\_ratio\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.video_aspect_ratio_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_duration\_filter ------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.video_duration_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_frames\_text\_similarity\_filter ----------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.video_frames_text_similarity_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_motion\_score\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.video_motion_score_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_nsfw\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.video_nsfw_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_ocr\_area\_ratio\_filter --------------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.video_ocr_area_ratio_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_resolution\_filter --------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.video_resolution_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_watermark\_filter ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.filter.video_watermark_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.video\_tagging\_from\_frames\_filter --------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.video_tagging_from_frames_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.word\_num\_filter ------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.word_num_filter - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.filter.word\_repetition\_filter -------------------------------------------------------- - -.. automodule:: data_juicer.ops.filter.word_repetition_filter - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.ops.mapper.rst b/docs/sphinx_doc/source/data_juicer.ops.mapper.rst deleted file mode 100644 index 4e8fc7158..000000000 --- a/docs/sphinx_doc/source/data_juicer.ops.mapper.rst +++ /dev/null @@ -1,363 +0,0 @@ -data\_juicer.ops.mapper -=============================== - - -data\_juicer.ops.mapper.audio\_ffmpeg\_wrapped\_mapper -------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.audio_ffmpeg_wrapped_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.chinese\_convert\_mapper -------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.chinese_convert_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.clean\_copyright\_mapper -------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.clean_copyright_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.clean\_email\_mapper ---------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.clean_email_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.clean\_html\_mapper --------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.clean_html_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.clean\_ip\_mapper ------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.clean_ip_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.clean\_links\_mapper ---------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.clean_links_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.expand\_macro\_mapper ----------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.expand_macro_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.fix\_unicode\_mapper ---------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.fix_unicode_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.generate\_caption\_mapper --------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.generate_caption_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.gpt4v\_generate\_mapper ------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.gpt4v_generate_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.image\_blur\_mapper --------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.image_blur_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.image\_captioning\_from\_gpt4v\_mapper ---------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.image_captioning_from_gpt4v_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.image\_captioning\_mapper --------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.image_captioning_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.image\_diffusion\_mapper -------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.image_diffusion_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.image\_face\_blur\_mapper -------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.image_face_blur_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.nlpaug\_en\_mapper -------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.nlpaug_en_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.nlpcda\_zh\_mapper -------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.nlpcda_zh_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.punctuation\_normalization\_mapper ------------------------------------------------------------------ - -.. automodule:: data_juicer.ops.mapper.punctuation_normalization_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_bibliography\_mapper ------------------------------------------------------------ - -.. automodule:: data_juicer.ops.mapper.remove_bibliography_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_comments\_mapper -------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_comments_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_header\_mapper ------------------------------------------------------ - -.. automodule:: data_juicer.ops.mapper.remove_header_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_long\_words\_mapper ----------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_long_words_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_non\_chinese\_character\_mapper ----------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_non_chinese_character_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_repeat\_sentences\_mapper ----------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_repeat_sentences_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_specific\_chars\_mapper --------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_specific_chars_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_table\_text\_mapper ----------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_table_text_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.remove\_words\_with\_incorrect\_substrings\_mapper ---------------------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.remove_words_with_incorrect_substrings_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.replace\_content\_mapper -------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.replace_content_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.sentence\_split\_mapper ------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.sentence_split_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_captioning\_from\_audio\_mapper ---------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_captioning_from_audio_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_captioning\_from\_frames\_mapper ---------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_captioning_from_frames_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_captioning\_from\_summarizer\_mapper ---------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_captioning_from_summarizer_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_captioning\_from\_video\_mapper ---------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_captioning_from_video_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_face\_blur\_mapper -------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_face_blur_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_ffmpeg\_wrapped\_mapper -------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_ffmpeg_wrapped_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_remove\_watermark\_mapper -------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_remove_watermark_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_resize\_aspect\_ratio\_mapper -------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_resize_aspect_ratio_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_resize\_resolution\_mapper ----------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_resize_resolution_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_split\_by\_duration\_mapper ------------------------------------------------------------------ - -.. automodule:: data_juicer.ops.mapper.video_split_by_duration_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_split\_by\_key\_frame\_mapper -------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_split_by_key_frame_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_split\_by\_scene\_mapper --------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_split_by_scene_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_tagging\_from\_audio\_mapper ------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_tagging_from_audio_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.video\_tagging\_from\_frames\_mapper -------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.video_tagging_from_frames_mapper - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.mapper.whitespace\_normalization\_mapper ----------------------------------------------------------------- - -.. automodule:: data_juicer.ops.mapper.whitespace_normalization_mapper - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.ops.rst b/docs/sphinx_doc/source/data_juicer.ops.rst deleted file mode 100644 index 404379fe7..000000000 --- a/docs/sphinx_doc/source/data_juicer.ops.rst +++ /dev/null @@ -1,36 +0,0 @@ -data\_juicer.ops -======================== - -.. toctree:: - :maxdepth: 4 - - data_juicer.ops.common - data_juicer.ops.deduplicator - data_juicer.ops.filter - data_juicer.ops.mapper - data_juicer.ops.selector - - -data\_juicer.ops.base\_op --------------------------------- - -.. automodule:: data_juicer.ops.base_op - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.load ----------------------------- - -.. automodule:: data_juicer.ops.load - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.op\_fusion ----------------------------------- - -.. automodule:: data_juicer.ops.op_fusion - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.ops.selector.rst b/docs/sphinx_doc/source/data_juicer.ops.selector.rst deleted file mode 100644 index 64ef25e6b..000000000 --- a/docs/sphinx_doc/source/data_juicer.ops.selector.rst +++ /dev/null @@ -1,19 +0,0 @@ -data\_juicer.ops.selector -================================= - - -data\_juicer.ops.selector.frequency\_specified\_field\_selector ----------------------------------------------------------------------- - -.. automodule:: data_juicer.ops.selector.frequency_specified_field_selector - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.ops.selector.topk\_specified\_field\_selector ------------------------------------------------------------------ - -.. automodule:: data_juicer.ops.selector.topk_specified_field_selector - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/data_juicer.rst b/docs/sphinx_doc/source/data_juicer.rst deleted file mode 100644 index 076b6f360..000000000 --- a/docs/sphinx_doc/source/data_juicer.rst +++ /dev/null @@ -1,13 +0,0 @@ -data\_juicer -============ - -.. toctree:: - :maxdepth: 4 - - data_juicer.analysis - data_juicer.config - data_juicer.core - data_juicer.format - data_juicer.ops - data_juicer.tools - data_juicer.utils diff --git a/docs/sphinx_doc/source/data_juicer.tools.rst b/docs/sphinx_doc/source/data_juicer.tools.rst deleted file mode 100644 index 6d25049a6..000000000 --- a/docs/sphinx_doc/source/data_juicer.tools.rst +++ /dev/null @@ -1,2 +0,0 @@ -data\_juicer.tools -================== diff --git a/docs/sphinx_doc/source/data_juicer.utils.rst b/docs/sphinx_doc/source/data_juicer.utils.rst deleted file mode 100644 index 280e8db01..000000000 --- a/docs/sphinx_doc/source/data_juicer.utils.rst +++ /dev/null @@ -1,107 +0,0 @@ -data\_juicer.utils -================== - - -data\_juicer.utils.asset\_utils --------------------------------------- - -.. automodule:: data_juicer.utils.asset_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.availability\_utils ---------------------------------------------- - -.. automodule:: data_juicer.utils.availability_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.cache\_utils --------------------------------------- - -.. automodule:: data_juicer.utils.cache_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.ckpt\_utils -------------------------------------- - -.. automodule:: data_juicer.utils.ckpt_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.compress ----------------------------------- - -.. automodule:: data_juicer.utils.compress - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.constant ----------------------------------- - -.. automodule:: data_juicer.utils.constant - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.file\_utils -------------------------------------- - -.. automodule:: data_juicer.utils.file_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.fingerprint\_utils --------------------------------------------- - -.. automodule:: data_juicer.utils.fingerprint_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.logger\_utils ---------------------------------------- - -.. automodule:: data_juicer.utils.logger_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.mm\_utils ------------------------------------ - -.. automodule:: data_juicer.utils.mm_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.model\_utils --------------------------------------- - -.. automodule:: data_juicer.utils.model_utils - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.registry ----------------------------------- - -.. automodule:: data_juicer.utils.registry - :members: - :undoc-members: - :show-inheritance: - -data\_juicer.utils.unittest\_utils ------------------------------------------ - -.. automodule:: data_juicer.utils.unittest_utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/source/index.rst b/docs/sphinx_doc/source/index.rst index 9c098d834..78f525425 100644 --- a/docs/sphinx_doc/source/index.rst +++ b/docs/sphinx_doc/source/index.rst @@ -8,10 +8,19 @@ Welcome to data-juicer's documentation! .. toctree:: :maxdepth: 2 - :caption: References: - -.. include:: modules.rst + :glob: + :caption: Data-Juicer API Reference + data_juicer.core + data_juicer.ops + data_juicer.ops.filter + data_juicer.ops.mapper + data_juicer.ops.deduplicator + data_juicer.ops.selector + data_juicer.ops.common + data_juicer.analysis + data_juicer.config + data_juicer.format Indices and tables ================== diff --git a/docs/sphinx_doc/source/modules.rst b/docs/sphinx_doc/source/modules.rst deleted file mode 100644 index 2845759f3..000000000 --- a/docs/sphinx_doc/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -data_juicer -=========== - -.. toctree:: - :maxdepth: 4 - - data_juicer