diff --git a/python/paddle/base/dataset.py b/python/paddle/base/dataset.py index 099dba1e6d755..533fb69a6621b 100644 --- a/python/paddle/base/dataset.py +++ b/python/paddle/base/dataset.py @@ -30,9 +30,8 @@ class DatasetFactory: Example: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") """ def __init__(self): @@ -51,9 +50,8 @@ def create_dataset(self, datafeed_class="QueueDataset"): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() """ try: dataset = globals()[datafeed_class]() @@ -87,9 +85,9 @@ def set_pipe_command(self, pipe_command): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_pipe_command("python my_script.py") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_pipe_command("python my_script.py") Args: pipe_command(str): pipe command @@ -104,9 +102,9 @@ def set_so_parser_name(self, so_parser_name): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_so_parser_name("./abc.so") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_so_parser_name("./abc.so") Args: pipe_command(str): pipe command @@ -121,9 +119,9 @@ def set_rank_offset(self, rank_offset): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_rank_offset("rank_offset") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_rank_offset("rank_offset") Args: rank_offset(str): rank_offset's name @@ -183,9 +181,9 @@ def set_batch_size(self, batch_size): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_batch_size(128) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_batch_size(128) Args: batch_size(int): batch size @@ -200,9 +198,9 @@ def set_pv_batch_size(self, pv_batch_size): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_pv_batch(128) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_pv_batch_size(128) Args: pv_batch_size(int): pv batch size @@ -216,9 +214,9 @@ def set_thread(self, thread_num): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_thread(12) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_thread(12) Args: thread_num(int): thread num @@ -233,9 +231,9 @@ def set_filelist(self, filelist): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_filelist(['a.txt', 'b.txt']) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_filelist(['a.txt', 'b.txt']) Args: filelist(list): file list @@ -253,9 +251,12 @@ def set_use_var(self, var_list): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_use_var([data, label]) + >>> import paddle.base as base + >>> paddle.enable_static() + >>> dataset = base.DatasetFactory().create_dataset() + >>> data = paddle.static.data(name="data", shape=[None, 10, 10], dtype="int64") + >>> label = paddle.static.data(name="label", shape=[None, 1], dtype="int64", lod_level=1) + >>> dataset.set_use_var([data, label]) Args: var_list(list): variable list @@ -286,9 +287,9 @@ def set_hdfs_config(self, fs_name, fs_ugi): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_hdfs_config("my_fs_name", "my_fs_ugi") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_hdfs_config("my_fs_name", "my_fs_ugi") Args: fs_name(str): fs name @@ -303,9 +304,9 @@ def set_download_cmd(self, download_cmd): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - dataset.set_download_cmd("./read_from_afs") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> dataset.set_download_cmd("./read_from_afs") Args: download_cmd(str): customized download command @@ -347,9 +348,9 @@ def desc(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset() - print(dataset.desc()) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset() + >>> print(dataset.desc()) Returns: A string message @@ -465,9 +466,9 @@ def set_queue_num(self, queue_num): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_queue_num(12) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_queue_num(12) """ self.is_user_set_queue_num = True @@ -487,9 +488,9 @@ def set_parse_ins_id(self, parse_ins_id): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_parse_ins_id(True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_parse_ins_id(True) """ self.parse_ins_id = parse_ins_id @@ -508,9 +509,9 @@ def set_parse_content(self, parse_content): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_parse_content(True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_parse_content(True) """ self.parse_content = parse_content @@ -525,9 +526,9 @@ def set_parse_logkey(self, parse_logkey): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_parse_logkey(True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_parse_logkey(True) """ self.parse_logkey = parse_logkey @@ -542,9 +543,9 @@ def _set_trainer_num(self, trainer_num): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset._set_trainer_num(1) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset._set_trainer_num(1) """ self.trainer_num = trainer_num @@ -563,9 +564,9 @@ def set_merge_by_sid(self, merge_by_sid): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_merge_by_sid(True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_merge_by_sid(True) """ self.merge_by_sid = merge_by_sid @@ -580,9 +581,9 @@ def set_enable_pv_merge(self, enable_pv_merge): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_enable_pv_merge(True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_enable_pv_merge(True) """ self.enable_pv_merge = enable_pv_merge @@ -595,12 +596,13 @@ def preprocess_instance(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.preprocess_instance() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.preprocess_instance() """ self.dataset.preprocess_instance() @@ -613,12 +615,13 @@ def set_current_phase(self, current_phase): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.set_current_phase(1) + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.set_current_phase(1) """ self.dataset.set_current_phase(current_phase) @@ -630,14 +633,15 @@ def postprocess_instance(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.preprocess_instance() - exe.train_from_dataset(dataset) - dataset.postprocess_instance() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.preprocess_instance() + >>> exe.train_from_dataset(dataset) + >>> dataset.postprocess_instance() """ self.dataset.postprocess_instance() @@ -656,9 +660,9 @@ def set_fleet_send_batch_size(self, fleet_send_batch_size=1024): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_fleet_send_batch_size(800) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_fleet_send_batch_size(800) """ self.fleet_send_batch_size = fleet_send_batch_size @@ -677,9 +681,9 @@ def set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_fleet_send_sleep_seconds(2) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_fleet_send_sleep_seconds(2) """ self.fleet_send_sleep_seconds = fleet_send_sleep_seconds @@ -699,9 +703,9 @@ def set_merge_by_lineid(self, merge_size=2): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_merge_by_lineid() + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_merge_by_lineid() """ self.dataset.set_merge_by_lineid(merge_size) @@ -740,10 +744,10 @@ def set_date(self, date): Examples: .. code-block:: python - import paddle.base as base + >>> import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_date("20211111") + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_date("20211111") """ year = int(date[:4]) month = int(date[4:6]) @@ -765,12 +769,12 @@ def load_into_memory(self, is_shuffle=False): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() """ self._prepare_to_run() if not self.use_ps_gpu: @@ -793,13 +797,13 @@ def preload_into_memory(self, thread_num=None): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.preload_into_memory() - dataset.wait_preload_done() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.preload_into_memory() + >>> dataset.wait_preload_done() """ self._prepare_to_run() if thread_num is None: @@ -819,13 +823,13 @@ def wait_preload_done(self): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.preload_into_memory() - dataset.wait_preload_done() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.preload_into_memory() + >>> dataset.wait_preload_done() """ self.dataset.wait_preload_done() self.dataset.destroy_preload_readers() @@ -841,13 +845,13 @@ def local_shuffle(self): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.local_shuffle() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.local_shuffle() """ self.dataset.local_shuffle() @@ -865,14 +869,14 @@ def global_shuffle(self, fleet=None, thread_num=12): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.global_shuffle(fleet) + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.global_shuffle(fleet) Args: fleet(Fleet): fleet singleton. Default None. @@ -927,18 +931,18 @@ def release_memory(self): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.global_shuffle(fleet) - exe = base.Executor(base.CPUPlace()) - exe.run(base.default_startup_program()) - exe.train_from_dataset(base.default_main_program(), dataset) - dataset.release_memory() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.global_shuffle(fleet) + >>> exe = base.Executor(base.CPUPlace()) + >>> exe.run(base.default_startup_program()) + >>> exe.train_from_dataset(base.default_main_program(), dataset) + >>> dataset.release_memory() """ self.dataset.release_memory() @@ -957,12 +961,13 @@ def get_pv_data_size(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - print dataset.get_pv_data_size() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> print(dataset.get_pv_data_size()) """ return self.dataset.get_pv_data_size() @@ -991,14 +996,14 @@ def get_memory_data_size(self, fleet=None): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - print dataset.get_memory_data_size(fleet) + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> print(dataset.get_memory_data_size(fleet)) """ import numpy as np @@ -1035,15 +1040,15 @@ def get_shuffle_data_size(self, fleet=None): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.global_shuffle(fleet) - print dataset.get_shuffle_data_size(fleet) + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() + >>> dataset.global_shuffle(fleet) + >>> print(dataset.get_shuffle_data_size(fleet)) """ import numpy as np @@ -1082,20 +1087,20 @@ def set_graph_config(self, config): Examples: .. code-block:: python - # required: skiptest - import paddle.base as base - from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - graph_config = {"walk_len": 24, - "walk_degree": 10, - "once_sample_startid_len": 80000, - "sample_times_one_chunk": 5, - "window": 3, - "debug_mode": 0, - "batch_size": 800, - "meta_path": "cuid2clk-clk2cuid;cuid2conv-conv2cuid;clk2cuid-cuid2clk;clk2cuid-cuid2conv", - "gpu_graph_training": 1} - dataset.set_graph_config(graph_config) + >>> # doctest: +SKIP + >>> import paddle.base as base + >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> graph_config = {"walk_len": 24, + ... "walk_degree": 10, + ... "once_sample_startid_len": 80000, + ... "sample_times_one_chunk": 5, + ... "window": 3, + ... "debug_mode": 0, + ... "batch_size": 800, + ... "meta_path": "cuid2clk-clk2cuid;cuid2conv-conv2cuid;clk2cuid-cuid2clk;clk2cuid-cuid2conv", + ... "gpu_graph_training": 1} + >>> dataset.set_graph_config(graph_config) """ self.proto_desc.graph_config.walk_degree = config.get("walk_degree", 1) @@ -1145,10 +1150,10 @@ def set_pass_id(self, pass_id): Examples: .. code-block:: python - import paddle.base as base - pass_id = 0 - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_pass_id(pass_id) + >>> import paddle.base as base + >>> pass_id = 0 + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_pass_id(pass_id) """ self.pass_id = pass_id self.dataset.set_pass_id(pass_id) @@ -1163,9 +1168,9 @@ def get_pass_id(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - pass_id = dataset.get_pass_id() + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> pass_id = dataset.get_pass_id() """ return self.pass_id @@ -1183,8 +1188,8 @@ class QueueDataset(DatasetBase): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("QueueDataset") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("QueueDataset") """ @@ -1224,9 +1229,10 @@ def local_shuffle(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("QueueDataset") - dataset.local_shuffle() + >>> # doctest: +SKIP('NotImplementedError will be raised.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("QueueDataset") + >>> dataset.local_shuffle() Raises: NotImplementedError: QueueDataset does not support local shuffle @@ -1250,10 +1256,10 @@ def global_shuffle(self, fleet=None): Examples: .. code-block:: python - import paddle.base as base - from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet - dataset = base.DatasetFactory().create_dataset("QueueDataset") - #dataset.global_shuffle(fleet) + >>> import paddle.base as base + >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet + >>> dataset = base.DatasetFactory().create_dataset("QueueDataset") + >>> #dataset.global_shuffle(fleet) Raises: NotImplementedError: QueueDataset does not support global shuffle @@ -1272,8 +1278,8 @@ class FileInstantDataset(DatasetBase): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory.create_dataset("FileInstantDataset") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory.create_dataset("FileInstantDataset") """ def __init__(self): @@ -1312,8 +1318,8 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("BoxPSDataset") + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") """ def __init__(self): @@ -1342,9 +1348,9 @@ def begin_pass(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("BoxPSDataset") - dataset.begin_pass() + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") + >>> dataset.begin_pass() """ self.boxps.begin_pass() @@ -1355,9 +1361,9 @@ def end_pass(self, need_save_delta): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("BoxPSDataset") - dataset.end_pass(True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") + >>> dataset.end_pass(True) """ self.boxps.end_pass(need_save_delta) @@ -1368,12 +1374,13 @@ def wait_preload_done(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("BoxPSDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.preload_into_memory() - dataset.wait_preload_done() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.preload_into_memory() + >>> dataset.wait_preload_done() """ self.boxps.wait_feed_pass_done() @@ -1383,11 +1390,12 @@ def load_into_memory(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("BoxPSDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.load_into_memory() """ self._prepare_to_run() self.boxps.load_into_memory() @@ -1398,11 +1406,12 @@ def preload_into_memory(self): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("BoxPSDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.preload_into_memory() + >>> # doctest: +SKIP('Depends on external files.') + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") + >>> filelist = ["a.txt", "b.txt"] + >>> dataset.set_filelist(filelist) + >>> dataset.preload_into_memory() """ self._prepare_to_run() self.boxps.preload_into_memory() @@ -1427,11 +1436,13 @@ def slots_shuffle(self, slots): slots(list[string]): the set of slots(string) to do slots shuffle. Examples: - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_merge_by_lineid() - #suppose there is a slot 0 - dataset.slots_shuffle(['0']) + .. code-block:: python + + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_merge_by_lineid() + >>> #suppose there is a slot 0 + >>> dataset.slots_shuffle(['0']) """ slots_set = set(slots) self.boxps.slots_shuffle(slots_set)