diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py index dc6d27f33e236..e863620ed305e 100644 --- a/python/paddle/text/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -67,28 +67,39 @@ class Conll05st(Dataset): .. code-block:: python - import paddle - from paddle.text.datasets import Conll05st - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, pred_idx, mark, label): - return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label) - - - conll05st = Conll05st() - - for i in range(10): - pred_idx, mark, label= conll05st[i][-3:] - pred_idx = paddle.to_tensor(pred_idx) - mark = paddle.to_tensor(mark) - label = paddle.to_tensor(label) - - model = SimpleNet() - pred_idx, mark, label= model(pred_idx, mark, label) - print(pred_idx, mark, label) + >>> import paddle + >>> from paddle.text.datasets import Conll05st + + >>> class SimpleNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... + ... def forward(self, pred_idx, mark, label): + ... return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label) + + + >>> conll05st = Conll05st() + + >>> for i in range(10): + ... pred_idx, mark, label= conll05st[i][-3:] + ... pred_idx = paddle.to_tensor(pred_idx) + ... mark = paddle.to_tensor(mark) + ... label = paddle.to_tensor(label) + ... + ... model = SimpleNet() + ... pred_idx, mark, label= model(pred_idx, mark, label) + ... print(pred_idx.item(), mark.item(), label.item()) + >>> # doctest: +SKIP('label will change') + 65840 5 1991 + 92560 5 3686 + 99120 5 457 + 121960 5 3945 + 4774 5 2378 + 14973 5 1938 + 36921 5 1090 + 26908 5 2329 + 62965 5 2968 + 97755 5 2674 """ @@ -334,10 +345,11 @@ def get_dict(self): .. code-block:: python - from paddle.text.datasets import Conll05st + >>> from paddle.text.datasets import Conll05st + + >>> conll05st = Conll05st() + >>> word_dict, predicate_dict, label_dict = conll05st.get_dict() - conll05st = Conll05st() - word_dict, predicate_dict, label_dict = conll05st.get_dict() """ return self.word_dict, self.predicate_dict, self.label_dict @@ -349,9 +361,10 @@ def get_embedding(self): .. code-block:: python - from paddle.text.datasets import Conll05st + >>> from paddle.text.datasets import Conll05st + + >>> conll05st = Conll05st() + >>> emb_file = conll05st.get_embedding() - conll05st = Conll05st() - emb_file = conll05st.get_embedding() """ return self.emb_file diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py index d34d2e8ecf1d1..0e4430fdfbee8 100644 --- a/python/paddle/text/datasets/imdb.py +++ b/python/paddle/text/datasets/imdb.py @@ -47,27 +47,38 @@ class Imdb(Dataset): .. code-block:: python - import paddle - from paddle.text.datasets import Imdb - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, doc, label): - return paddle.sum(doc), label - - - imdb = Imdb(mode='train') - - for i in range(10): - doc, label = imdb[i] - doc = paddle.to_tensor(doc) - label = paddle.to_tensor(label) - - model = SimpleNet() - image, label = model(doc, label) - print(doc.shape, label.shape) + >>> # doctest: +TIMEOUT(75) + >>> import paddle + >>> from paddle.text.datasets import Imdb + + >>> class SimpleNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... + ... def forward(self, doc, label): + ... return paddle.sum(doc), label + + + >>> imdb = Imdb(mode='train') + + >>> for i in range(10): + ... doc, label = imdb[i] + ... doc = paddle.to_tensor(doc) + ... label = paddle.to_tensor(label) + ... + ... model = SimpleNet() + ... image, label = model(doc, label) + ... print(doc.shape, label.shape) + [121] [1] + [115] [1] + [386] [1] + [471] [1] + [585] [1] + [206] [1] + [221] [1] + [324] [1] + [166] [1] + [598] [1] """ diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py index 5aead1c2d9cf5..b70486b9c71df 100644 --- a/python/paddle/text/datasets/imikolov.py +++ b/python/paddle/text/datasets/imikolov.py @@ -47,27 +47,37 @@ class Imikolov(Dataset): .. code-block:: python - import paddle - from paddle.text.datasets import Imikolov - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, src, trg): - return paddle.sum(src), paddle.sum(trg) - - - imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2) - - for i in range(10): - src, trg = imikolov[i] - src = paddle.to_tensor(src) - trg = paddle.to_tensor(trg) - - model = SimpleNet() - src, trg = model(src, trg) - print(src.shape, trg.shape) + >>> import paddle + >>> from paddle.text.datasets import Imikolov + + >>> class SimpleNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... + ... def forward(self, src, trg): + ... return paddle.sum(src), paddle.sum(trg) + + + >>> imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2) + + >>> for i in range(10): + ... src, trg = imikolov[i] + ... src = paddle.to_tensor(src) + ... trg = paddle.to_tensor(trg) + ... + ... model = SimpleNet() + ... src, trg = model(src, trg) + ... print(src.item(), trg.item()) + 2076 2075 + 2076 2075 + 675 674 + 4 3 + 464 463 + 2076 2075 + 865 864 + 2076 2075 + 2076 2075 + 1793 1792 """ diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py index 00eee8e7784f3..55572d33f8387 100644 --- a/python/paddle/text/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -113,28 +113,39 @@ class Movielens(Dataset): .. code-block:: python - import paddle - from paddle.text.datasets import Movielens - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, category, title, rating): - return paddle.sum(category), paddle.sum(title), paddle.sum(rating) - - - movielens = Movielens(mode='train') - - for i in range(10): - category, title, rating = movielens[i][-3:] - category = paddle.to_tensor(category) - title = paddle.to_tensor(title) - rating = paddle.to_tensor(rating) - - model = SimpleNet() - category, title, rating = model(category, title, rating) - print(category.shape, title.shape, rating.shape) + >>> # doctest: +TIMEOUT(75) + >>> import paddle + >>> from paddle.text.datasets import Movielens + + >>> class SimpleNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... + ... def forward(self, category, title, rating): + ... return paddle.sum(category), paddle.sum(title), paddle.sum(rating) + + + >>> movielens = Movielens(mode='train') + + >>> for i in range(10): + ... category, title, rating = movielens[i][-3:] + ... category = paddle.to_tensor(category) + ... title = paddle.to_tensor(title) + ... rating = paddle.to_tensor(rating) + ... + ... model = SimpleNet() + ... category, title, rating = model(category, title, rating) + ... print(category.shape, title.shape, rating.shape) + [] [] [] + [] [] [] + [] [] [] + [] [] [] + [] [] [] + [] [] [] + [] [] [] + [] [] [] + [] [] [] + [] [] [] """ diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py index dee0e3c98bca6..42854bc81902c 100644 --- a/python/paddle/text/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -58,28 +58,38 @@ class UCIHousing(Dataset): .. code-block:: python - import paddle - from paddle.text.datasets import UCIHousing - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, feature, target): - return paddle.sum(feature), target - - paddle.disable_static() - - uci_housing = UCIHousing(mode='train') - - for i in range(10): - feature, target = uci_housing[i] - feature = paddle.to_tensor(feature) - target = paddle.to_tensor(target) - - model = SimpleNet() - feature, target = model(feature, target) - print(feature.numpy().shape, target.numpy()) + >>> import paddle + >>> from paddle.text.datasets import UCIHousing + + >>> class SimpleNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... + ... def forward(self, feature, target): + ... return paddle.sum(feature), target + + >>> paddle.disable_static() + + >>> uci_housing = UCIHousing(mode='train') + + >>> for i in range(10): + ... feature, target = uci_housing[i] + ... feature = paddle.to_tensor(feature) + ... target = paddle.to_tensor(target) + ... + ... model = SimpleNet() + ... feature, target = model(feature, target) + ... print(feature.shape, target.numpy()) + [] [24.] + [] [21.6] + [] [34.7] + [] [33.4] + [] [36.2] + [] [28.7] + [] [22.9] + [] [27.1] + [] [16.5] + [] [18.9] """ diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py index 3b109ba3736b0..a2d7c9ebe5871 100644 --- a/python/paddle/text/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -61,27 +61,37 @@ class WMT14(Dataset): .. code-block:: python - import paddle - from paddle.text.datasets import WMT14 - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, src_ids, trg_ids, trg_ids_next): - return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) - - wmt14 = WMT14(mode='train', dict_size=50) - - for i in range(10): - src_ids, trg_ids, trg_ids_next = wmt14[i] - src_ids = paddle.to_tensor(src_ids) - trg_ids = paddle.to_tensor(trg_ids) - trg_ids_next = paddle.to_tensor(trg_ids_next) - - model = SimpleNet() - src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) - print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) + >>> import paddle + >>> from paddle.text.datasets import WMT14 + + >>> class SimpleNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... + ... def forward(self, src_ids, trg_ids, trg_ids_next): + ... return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) + + >>> wmt14 = WMT14(mode='train', dict_size=50) + + >>> for i in range(10): + ... src_ids, trg_ids, trg_ids_next = wmt14[i] + ... src_ids = paddle.to_tensor(src_ids) + ... trg_ids = paddle.to_tensor(trg_ids) + ... trg_ids_next = paddle.to_tensor(trg_ids_next) + ... + ... model = SimpleNet() + ... src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) + ... print(src_ids.item(), trg_ids.item(), trg_ids_next.item()) + 91 38 39 + 123 81 82 + 556 229 230 + 182 26 27 + 447 242 243 + 116 110 111 + 403 288 289 + 258 221 222 + 136 34 35 + 281 136 137 """ @@ -196,9 +206,10 @@ def get_dict(self, reverse=False): .. code-block:: python - from paddle.text.datasets import WMT14 - wmt14 = WMT14(mode='train', dict_size=50) - src_dict, trg_dict = wmt14.get_dict() + >>> from paddle.text.datasets import WMT14 + >>> wmt14 = WMT14(mode='train', dict_size=50) + >>> src_dict, trg_dict = wmt14.get_dict() + """ src_dict, trg_dict = self.src_dict, self.trg_dict if reverse: