Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[xdoctest][task 144-149] reformat example code with google style in text/datasets/* #56579

Merged
merged 2 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 41 additions & 28 deletions python/paddle/text/datasets/conll05.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,39 @@ class Conll05st(Dataset):

.. code-block:: python

import paddle
from paddle.text.datasets import Conll05st

class SimpleNet(paddle.nn.Layer):
def __init__(self):
super().__init__()

def forward(self, pred_idx, mark, label):
return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label)


conll05st = Conll05st()

for i in range(10):
pred_idx, mark, label= conll05st[i][-3:]
pred_idx = paddle.to_tensor(pred_idx)
mark = paddle.to_tensor(mark)
label = paddle.to_tensor(label)

model = SimpleNet()
pred_idx, mark, label= model(pred_idx, mark, label)
print(pred_idx, mark, label)
>>> import paddle
>>> from paddle.text.datasets import Conll05st

>>> class SimpleNet(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
...
... def forward(self, pred_idx, mark, label):
... return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label)


>>> conll05st = Conll05st()

>>> for i in range(10):
... pred_idx, mark, label= conll05st[i][-3:]
... pred_idx = paddle.to_tensor(pred_idx)
... mark = paddle.to_tensor(mark)
... label = paddle.to_tensor(label)
...
... model = SimpleNet()
... pred_idx, mark, label= model(pred_idx, mark, label)
... print(pred_idx.item(), mark.item(), label.item())
>>> # doctest: +SKIP('label will change')
65840 5 1991
92560 5 3686
99120 5 457
121960 5 3945
4774 5 2378
14973 5 1938
36921 5 1090
26908 5 2329
62965 5 2968
97755 5 2674

"""

Expand Down Expand Up @@ -334,10 +345,11 @@ def get_dict(self):

.. code-block:: python

from paddle.text.datasets import Conll05st
>>> from paddle.text.datasets import Conll05st

>>> conll05st = Conll05st()
>>> word_dict, predicate_dict, label_dict = conll05st.get_dict()

conll05st = Conll05st()
word_dict, predicate_dict, label_dict = conll05st.get_dict()
"""
return self.word_dict, self.predicate_dict, self.label_dict

Expand All @@ -349,9 +361,10 @@ def get_embedding(self):

.. code-block:: python

from paddle.text.datasets import Conll05st
>>> from paddle.text.datasets import Conll05st

>>> conll05st = Conll05st()
>>> emb_file = conll05st.get_embedding()

conll05st = Conll05st()
emb_file = conll05st.get_embedding()
"""
return self.emb_file
53 changes: 32 additions & 21 deletions python/paddle/text/datasets/imdb.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

@megemini 为啥 timeout 了还是 successful 呢?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@megemini 为啥 timeout 了还是 successful 呢?

因为 #56573 这个 PR 还没合入 ~~~ 哈哈哈哈

之前旧代码的逻辑一直都是,timeout 只是提示,#56573 里面我觉得不合适,干脆改为 failed, timeout, nocode 都是错误~

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nocode 的意思是不是就是说旧格式呢?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nocode 的意思是不是就是说旧格式呢?

nocode 目前有两种情况:

  • 旧格式的代码,因为,旧格式代码在 xdoctest 看来就是一堆描述文字,所以会 nocode。
  • 语法错误,因为,xdoctest 对于语法错误会 catch,但是直接 pass,最后的结果与 nocode 一样。

所以,具体情况还要看日志,不过,以上两种情况应该都是不合理的,所以最终应该认定为检查失败。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

所以,具体情况还要看日志,不过,以上两种情况应该都是不合理的,所以最终应该认定为检查失败。

好的了解

@ooooo-create 调整一下两个 timeout 示例的时间吧

Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,38 @@ class Imdb(Dataset):

.. code-block:: python

import paddle
from paddle.text.datasets import Imdb

class SimpleNet(paddle.nn.Layer):
def __init__(self):
super().__init__()

def forward(self, doc, label):
return paddle.sum(doc), label


imdb = Imdb(mode='train')

for i in range(10):
doc, label = imdb[i]
doc = paddle.to_tensor(doc)
label = paddle.to_tensor(label)

model = SimpleNet()
image, label = model(doc, label)
print(doc.shape, label.shape)
>>> # doctest: +TIMEOUT(75)
>>> import paddle
>>> from paddle.text.datasets import Imdb

>>> class SimpleNet(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
...
... def forward(self, doc, label):
... return paddle.sum(doc), label


>>> imdb = Imdb(mode='train')

>>> for i in range(10):
... doc, label = imdb[i]
... doc = paddle.to_tensor(doc)
... label = paddle.to_tensor(label)
...
... model = SimpleNet()
... image, label = model(doc, label)
... print(doc.shape, label.shape)
[121] [1]
[115] [1]
[386] [1]
[471] [1]
[585] [1]
[206] [1]
[221] [1]
[324] [1]
[166] [1]
[598] [1]

"""

Expand Down
52 changes: 31 additions & 21 deletions python/paddle/text/datasets/imikolov.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,37 @@ class Imikolov(Dataset):

.. code-block:: python

import paddle
from paddle.text.datasets import Imikolov

class SimpleNet(paddle.nn.Layer):
def __init__(self):
super().__init__()

def forward(self, src, trg):
return paddle.sum(src), paddle.sum(trg)


imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2)

for i in range(10):
src, trg = imikolov[i]
src = paddle.to_tensor(src)
trg = paddle.to_tensor(trg)

model = SimpleNet()
src, trg = model(src, trg)
print(src.shape, trg.shape)
>>> import paddle
>>> from paddle.text.datasets import Imikolov

>>> class SimpleNet(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
...
... def forward(self, src, trg):
... return paddle.sum(src), paddle.sum(trg)


>>> imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2)

>>> for i in range(10):
... src, trg = imikolov[i]
... src = paddle.to_tensor(src)
... trg = paddle.to_tensor(trg)
...
... model = SimpleNet()
... src, trg = model(src, trg)
... print(src.item(), trg.item())
2076 2075
2076 2075
675 674
4 3
464 463
2076 2075
865 864
2076 2075
2076 2075
1793 1792

"""

Expand Down
55 changes: 33 additions & 22 deletions python/paddle/text/datasets/movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,28 +113,39 @@ class Movielens(Dataset):

.. code-block:: python

import paddle
from paddle.text.datasets import Movielens

class SimpleNet(paddle.nn.Layer):
def __init__(self):
super().__init__()

def forward(self, category, title, rating):
return paddle.sum(category), paddle.sum(title), paddle.sum(rating)


movielens = Movielens(mode='train')

for i in range(10):
category, title, rating = movielens[i][-3:]
category = paddle.to_tensor(category)
title = paddle.to_tensor(title)
rating = paddle.to_tensor(rating)

model = SimpleNet()
category, title, rating = model(category, title, rating)
print(category.shape, title.shape, rating.shape)
>>> # doctest: +TIMEOUT(75)
>>> import paddle
>>> from paddle.text.datasets import Movielens

>>> class SimpleNet(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
...
... def forward(self, category, title, rating):
... return paddle.sum(category), paddle.sum(title), paddle.sum(rating)


>>> movielens = Movielens(mode='train')

>>> for i in range(10):
... category, title, rating = movielens[i][-3:]
... category = paddle.to_tensor(category)
... title = paddle.to_tensor(title)
... rating = paddle.to_tensor(rating)
...
... model = SimpleNet()
... category, title, rating = model(category, title, rating)
... print(category.shape, title.shape, rating.shape)
[] [] []
[] [] []
[] [] []
[] [] []
[] [] []
[] [] []
[] [] []
[] [] []
[] [] []
[] [] []

"""

Expand Down
54 changes: 32 additions & 22 deletions python/paddle/text/datasets/uci_housing.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,38 @@ class UCIHousing(Dataset):

.. code-block:: python

import paddle
from paddle.text.datasets import UCIHousing

class SimpleNet(paddle.nn.Layer):
def __init__(self):
super().__init__()

def forward(self, feature, target):
return paddle.sum(feature), target

paddle.disable_static()

uci_housing = UCIHousing(mode='train')

for i in range(10):
feature, target = uci_housing[i]
feature = paddle.to_tensor(feature)
target = paddle.to_tensor(target)

model = SimpleNet()
feature, target = model(feature, target)
print(feature.numpy().shape, target.numpy())
>>> import paddle
>>> from paddle.text.datasets import UCIHousing

>>> class SimpleNet(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
...
... def forward(self, feature, target):
... return paddle.sum(feature), target

>>> paddle.disable_static()

>>> uci_housing = UCIHousing(mode='train')

>>> for i in range(10):
... feature, target = uci_housing[i]
... feature = paddle.to_tensor(feature)
... target = paddle.to_tensor(target)
...
... model = SimpleNet()
... feature, target = model(feature, target)
... print(feature.shape, target.numpy())
[] [24.]
[] [21.6]
[] [34.7]
[] [33.4]
[] [36.2]
[] [28.7]
[] [22.9]
[] [27.1]
[] [16.5]
[] [18.9]

"""

Expand Down
Loading