Skip to content

Commit

Permalink
Merge branch 'main' into carmocca/tinystories-debug
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca authored Mar 27, 2024
2 parents a4efeda + af8a39d commit 3b525d5
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions tests/data/test_tinystories.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from torch.utils._pytree import tree_map


def fake_chunk(path, data):
def fn(_):
for story in data:
yield torch.tensor(story)
def tokenize(data):
for story in data:
yield torch.tensor(story)


optimize(fn=fn, inputs=[None] * len(data), output_dir=str(path), num_workers=1, chunk_bytes="200MB")
def fake_chunk(path, data):
optimize(fn=tokenize, inputs=[data] * len(data), output_dir=str(path), num_workers=1, chunk_bytes="200MB")


@pytest.mark.xfail(raises=IndexError, strict=True) # requires https://github.com/Lightning-AI/litdata/pull/77
Expand Down

0 comments on commit 3b525d5

Please sign in to comment.