diff --git a/docs/api/paddle/distributed/QueueDataset_cn.rst b/docs/api/paddle/distributed/QueueDataset_cn.rst index 78315d9bbea..6039fa4e4d6 100644 --- a/docs/api/paddle/distributed/QueueDataset_cn.rst +++ b/docs/api/paddle/distributed/QueueDataset_cn.rst @@ -14,10 +14,7 @@ QueueyDataset 是流式处理数据使用 Dataset 类。与 InmemoryDataset 继 代码示例 :::::::::::: -.. code-block:: python - - import paddle - dataset = paddle.distributed.QueueDataset() +COPY-FROM: paddle.distributed.QueueDataset 方法 :::::::::::: @@ -49,7 +46,7 @@ None。 **代码示例** -.. code-block:: python +.. code-block:: text import paddle @@ -108,42 +105,7 @@ set_filelist(filelist) **代码示例** -.. code-block:: python - - import paddle - import os - - paddle.enable_static() - - with open("test_queue_dataset_run_a.txt", "w") as f: - data = "2 1 2 2 5 4 2 2 7 2 1 3\n" - data += "2 6 2 2 1 4 2 2 4 2 2 3\n" - data += "2 5 2 2 9 9 2 2 7 2 1 3\n" - data += "2 7 2 2 1 9 2 3 7 2 5 3\n" - f.write(data) - with open("test_queue_dataset_run_b.txt", "w") as f: - data = "2 1 2 2 5 4 2 2 7 2 1 3\n" - data += "2 6 2 2 1 4 2 2 4 2 2 3\n" - data += "2 5 2 2 9 9 2 2 7 2 1 3\n" - data += "2 7 2 2 1 9 2 3 7 2 5 3\n" - f.write(data) - dataset = paddle.distributed.QueueDataset() - slots = ["slot1", "slot2", "slot3", "slot4"] - slots_vars = [] - for slot in slots: - var = paddle.static.data( - name=slot, shape=[None, 1], dtype="int64", lod_level=1) - slots_vars.append(var) - dataset.init( - batch_size=1, - thread_num=2, - input_type=1, - pipe_command="cat", - use_var=slots_vars) - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - os.remove("./test_queue_dataset_run_a.txt") - os.remove("./test_queue_dataset_run_b.txt") +COPY-FROM: paddle.distributed.QueueDataset.set_filelist **参数** diff --git a/docs/api/paddle/distributed/fleet/PaddleCloudRoleMaker_cn.rst b/docs/api/paddle/distributed/fleet/PaddleCloudRoleMaker_cn.rst index 314ccbddf31..1d52a20a71c 100644 --- a/docs/api/paddle/distributed/fleet/PaddleCloudRoleMaker_cn.rst +++ b/docs/api/paddle/distributed/fleet/PaddleCloudRoleMaker_cn.rst @@ -12,23 +12,7 @@ PaddleCloudRoleMaker 是基于从环境变量中获取分布式相关信息进 代码示例 :::::::::::: -.. code-block:: python - - import os - import paddle.distributed.fleet as fleet - - os.environ["PADDLE_PSERVER_NUMS"] = "2" - os.environ["PADDLE_TRAINERS_NUM"] = "2" - - os.environ["POD_IP"] = "127.0.0.1" - os.environ["PADDLE_PORT"] = "36001" - os.environ["TRAINING_ROLE"] = "PSERVER" - os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ - "127.0.0.1:36001,127.0.0.2:36001" - - os.environ["PADDLE_TRAINER_ID"] = "0" - - fleet.PaddleCloudRoleMaker(is_collective=False) +COPY-FROM: paddle.distributed.fleet.PaddleCloudRoleMaker 方法 :::::::::::: @@ -45,7 +29,7 @@ string **代码示例** -.. code-block:: python +.. code-block:: text import paddle.distributed.fleet as fleet role = fleet.PaddleCloudRoleMaker(is_collective=False) diff --git a/docs/api/paddle/distributed/fleet/UserDefinedRoleMaker_cn.rst b/docs/api/paddle/distributed/fleet/UserDefinedRoleMaker_cn.rst index a9d1cea57b2..1210310a4a4 100644 --- a/docs/api/paddle/distributed/fleet/UserDefinedRoleMaker_cn.rst +++ b/docs/api/paddle/distributed/fleet/UserDefinedRoleMaker_cn.rst @@ -12,16 +12,7 @@ UserDefinedRoleMaker 是基于从用户自定义的参数中获取分布式相 代码示例 :::::::::::: -.. code-block:: python - - import paddle.distributed.fleet as fleet - from paddle.distributed.fleet.base.role_maker import Role - - fleet.UserDefinedRoleMaker( - current_id=0, - role=Role.SERVER, - worker_num=2, - server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) +COPY-FROM: paddle.distributed.fleet.UserDefinedRoleMaker 方法 :::::::::::: @@ -38,15 +29,13 @@ string **代码示例** -.. code-block:: python +.. code-block:: text import paddle.distributed.fleet as fleet from paddle.distributed.fleet.base.role_maker import Role - role = fleet.UserDefinedRoleMaker( current_id=0, role=Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) - role.to_string() diff --git a/docs/api/paddle/distributed/fleet/UtilBase_cn.rst b/docs/api/paddle/distributed/fleet/UtilBase_cn.rst index ca91716f667..ad75c494e1c 100644 --- a/docs/api/paddle/distributed/fleet/UtilBase_cn.rst +++ b/docs/api/paddle/distributed/fleet/UtilBase_cn.rst @@ -24,39 +24,7 @@ Numpy.array|None:一个和 `input` 形状一致的 numpy 数组或 None。 **代码示例** -.. code-block:: python - - # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . - import paddle.distributed.fleet as fleet - from paddle.distributed.fleet import PaddleCloudRoleMaker - import sys - import numpy as np - import os - - os.environ["PADDLE_WITH_GLOO"] = "2" - - def train(): - role = PaddleCloudRoleMaker( - is_collective=False, - init_gloo=True, - path="./tmp_gloo") - fleet.init(role) - - if fleet.is_server(): - input = [1, 2] - output = fleet.util.all_reduce(input, "sum", "server") - print(output) - # [2, 4] - elif fleet.is_worker(): - input = np.array([3, 4]) - output = fleet.util.all_reduce(input, "sum", "worker") - print(output) - # [6, 8] - output = fleet.util.all_reduce(input, "sum", "all") - print(output) - # [8, 12] - if __name__ == "__main__": - train() +COPY-FROM: paddle.distributed.fleet.UtilBase.all_reduce barrier(comm_world="worker") ''''''''' @@ -68,35 +36,7 @@ barrier(comm_world="worker") **代码示例** -.. code-block:: python - - # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . - - import paddle.distributed.fleet as fleet - from paddle.distributed.fleet import PaddleCloudRoleMaker - import sys - import os - - os.environ["PADDLE_WITH_GLOO"] = "2" - - def train(): - role = PaddleCloudRoleMaker( - is_collective=False, - init_gloo=True, - path="./tmp_gloo") - fleet.init(role) - - if fleet.is_server(): - fleet.util.barrier("server") - print("all server arrive here") - elif fleet.is_worker(): - fleet.util.barrier("worker") - print("all server arrive here") - fleet.util.barrier("all") - print("all servers and workers arrive here") - - if __name__ == "__main__": - train() +COPY-FROM: paddle.distributed.fleet.UtilBase.barrier all_gather(input, comm_world="worker") ''''''''' @@ -113,39 +53,7 @@ all_gather(input, comm_world="worker") **代码示例** -.. code-block:: python - - # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . - import paddle.distributed.fleet as fleet - from paddle.distributed.fleet import PaddleCloudRoleMaker - import sys - import os - - os.environ["PADDLE_WITH_GLOO"] = "2" - - def train(): - role = PaddleCloudRoleMaker( - is_collective=False, - init_gloo=True, - path="./tmp_gloo") - fleet.init(role) - - if fleet.is_server(): - input = fleet.server_index() - output = fleet.util.all_gather(input, "server") - print(output) - # output = [0, 1] - elif fleet.is_worker(): - input = fleet.worker_index() - output = fleet.util.all_gather(input, "worker") - # output = [0, 1] - print(output) - output = fleet.util.all_gather(input, "all") - print(output) - # output = [0, 1, 0, 1] - - if __name__ == "__main__": - train() +COPY-FROM: paddle.distributed.fleet.UtilBase.all_gather get_file_shard(files) ''''''''' @@ -166,23 +74,7 @@ get_file_shard(files) **代码示例** -.. code-block:: python - - import paddle.distributed.fleet as fleet - import paddle.distributed.fleet.base.role_maker as role_maker - - role = role_maker.UserDefinedRoleMaker( - is_collective=False, - init_gloo=False, - current_id=0, - role=role_maker.Role.WORKER, - worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], - server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) - fleet.init(role) - - files = fleet.util.get_file_shard(["file1", "file2", "file3"]) - print(files) - # files = ["file1", "file2"] +COPY-FROM: paddle.distributed.fleet.UtilBase.get_file_shard print_on_rank(message, rank_id) ''''''''' @@ -196,18 +88,4 @@ print_on_rank(message, rank_id) **代码示例** -.. code-block:: python - - import paddle.distributed.fleet as fleet - import paddle.distributed.fleet.base.role_maker as role_maker - - role = role_maker.UserDefinedRoleMaker( - is_collective=False, - init_gloo=False, - current_id=0, - role=role_maker.Role.WORKER, - worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], - server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) - fleet.init(role) - - fleet.util.print_on_rank("I'm worker 0", 0) +COPY-FROM: paddle.distributed.fleet.UtilBase.print_on_rank diff --git a/docs/api/paddle/distributed/fleet/utils/HDFSClient_cn.rst b/docs/api/paddle/distributed/fleet/utils/HDFSClient_cn.rst index 3e562756f1c..7d17447ead1 100644 --- a/docs/api/paddle/distributed/fleet/utils/HDFSClient_cn.rst +++ b/docs/api/paddle/distributed/fleet/utils/HDFSClient_cn.rst @@ -4,27 +4,25 @@ HDFSClient ------------------------------- .. py:class:: paddle.distributed.fleet.utils.HDFSClient -一个HADOOP文件系统工具类。 +一个 HADOOP 文件系统工具类。 参数 :::::::::::: - - **hadoop_home** (str):HADOOP HOME地址。 - - **configs** (dict): HADOOP文件系统配置。需包含 `fs.default.name` 和 `hadoop.job.ugi` 这两个字段。 + - **hadoop_home** (str):HADOOP HOME 地址。 + - **configs** (dict): HADOOP 文件系统配置。需包含 `fs.default.name` 和 `hadoop.job.ugi` 这两个字段。 代码示例 :::::::::::: -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" - configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.ls_dir("hdfs:/test_hdfs_client") @@ -36,24 +34,22 @@ ls_dir(fs_path) **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **返回** - - Tuple,一个包含所有子目录和文件名的2-Tuple,格式形如:([subdirname1, subdirname1, ...], [filename1, filename2, ...])。 + - Tuple,一个包含所有子目录和文件名的 2-Tuple,格式形如:([subdirname1, subdirname1, ...], [filename1, filename2, ...])。 **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) subdirs, files = client.ls_dir("hdfs:/test_hdfs_client") @@ -63,43 +59,39 @@ mkdirs(fs_path) **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.mkdirs("hdfs:/test_hdfs_client") delete(fs_path) ''''''''' -删除HADOOP文件(或目录)。 +删除 HADOOP 文件(或目录)。 **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.delete("hdfs:/test_hdfs_client") @@ -109,7 +101,7 @@ is_file(fs_path) **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **返回** @@ -117,16 +109,14 @@ is_file(fs_path) **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) ret = client.is_file("hdfs:/test_hdfs_client") @@ -136,7 +126,7 @@ is_dir(fs_path) **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **返回** @@ -144,16 +134,14 @@ is_dir(fs_path) **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) ret = client.is_file("hdfs:/test_hdfs_client") @@ -163,7 +151,7 @@ is_exist(fs_path) **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **返回** @@ -171,105 +159,97 @@ is_exist(fs_path) **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) ret = client.is_exist("hdfs:/test_hdfs_client") upload(local_path, fs_path) ''''''''' -上传本地文件至HADOOP文件系统。 +上传本地文件至 HADOOP 文件系统。 **参数** - **local_path** (str):本地文件路径。 - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.upload("test_hdfs_client", "hdfs:/test_hdfs_client") download(fs_path, local_path) ''''''''' -下载HADOOP文件至本地文件系统。 +下载 HADOOP 文件至本地文件系统。 **参数** - **local_path** (str):本地文件路径。 - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.download("hdfs:/test_hdfs_client", "./") touch(fs_path, exist_ok=True) ''''''''' -创建一个HADOOP文件。 +创建一个 HADOOP 文件。 **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 - **exist_ok** (bool):路径已存在时程序是否报错。若 `exist_ok = True`,则直接返回,反之则抛出文件存在的异常,默认不抛出异常。 **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.touch("hdfs:/test_hdfs_client") mv(fs_src_path, fs_dst_path, overwrite=False) ''''''''' -HADOOP系统文件移动。 +HADOOP 系统文件移动。 **参数** - **fs_src_path** (str):移动前源文件路径名。 - **fs_dst_path** (str):移动后目标文件路径名。 - **overwrite** (bool):若目标文件已存在,是否删除进行重写,默认不重写并抛出异常。 - + **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient @@ -278,17 +258,16 @@ HADOOP系统文件移动。 "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) client.mv("hdfs:/test_hdfs_client", "hdfs:/test_hdfs_client2") list_dirs(fs_path) ''''''''' -列出HADOOP文件路径下所有的子目录。 +列出 HADOOP 文件路径下所有的子目录。 **参数** - - **fs_path** (str): HADOOP文件路径。 + - **fs_path** (str): HADOOP 文件路径。 **返回** @@ -296,18 +275,13 @@ list_dirs(fs_path) **代码示例** -.. code-block:: python +.. code-block:: text from paddle.distributed.fleet.utils import HDFSClient - hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { "fs.default.name": "hdfs://xxx.hadoop.com:54310", "hadoop.job.ugi": "hello,hello123" } - client = HDFSClient(hadoop_home, configs) subdirs = client.list_dirs("hdfs:/test_hdfs_client") - - -