Skip to content

Commit

Permalink
fix for requeueing code and change minhash default
Browse files Browse the repository at this point in the history
  • Loading branch information
guipenedo committed May 6, 2024
1 parent d56d3c5 commit 22c739e
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/datatrove/executor/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
def requeue_handler(signum, _frame):
signame = signal.Signals(signum).name
logger.warning(f"Received signal {signum} ({signame}). Requeueing and exiting...")
subprocess.run(["scontrol", "requeue", "${SLURM_JOB_ID}"])
subprocess.run(["scontrol", "requeue", os.environ.get("SLURM_JOB_ID")])
sys.exit(15)


Expand Down
2 changes: 1 addition & 1 deletion src/datatrove/pipeline/dedup/minhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class MinhashConfig:
num_buckets: int = 14
hashes_per_bucket: int = 8

use_64bit_hashes: bool = False
use_64bit_hashes: bool = True
seed: int = 1

norm_config: TextNormConfig = field(default_factory=TextNormConfig)
Expand Down

0 comments on commit 22c739e

Please sign in to comment.