Skip to content

Commit

Permalink
fix default compression
Browse files Browse the repository at this point in the history
  • Loading branch information
guipenedo committed Dec 26, 2024
1 parent 61d4f86 commit 47379fd
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/datatrove/pipeline/writers/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import random
import tempfile
import time
from typing import Callable
from typing import Callable, Literal

from huggingface_hub import (
CommitOperationAdd,
Expand Down Expand Up @@ -31,7 +31,7 @@ def __init__(
private: bool = True,
local_working_dir: DataFolderLike | None = None,
output_filename: str = None,
compression: str | None = None,
compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] | None = "snappy",
adapter: Callable = None,
cleanup: bool = True,
expand_metadata: bool = True,
Expand Down
2 changes: 1 addition & 1 deletion src/datatrove/pipeline/writers/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(
self,
output_folder: DataFolderLike,
output_filename: str = None,
compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] | None = None,
compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] | None = "snappy",
adapter: Callable = None,
batch_size: int = 1000,
expand_metadata: bool = False,
Expand Down

0 comments on commit 47379fd

Please sign in to comment.