From 22c739eb955f7cd0a8d4c14a67c7dfaae2a5619e Mon Sep 17 00:00:00 2001 From: guipenedo Date: Mon, 6 May 2024 12:19:03 +0200 Subject: [PATCH] fix for requeueing code and change minhash default --- src/datatrove/executor/slurm.py | 2 +- src/datatrove/pipeline/dedup/minhash.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datatrove/executor/slurm.py b/src/datatrove/executor/slurm.py index 17495721..91c86b71 100644 --- a/src/datatrove/executor/slurm.py +++ b/src/datatrove/executor/slurm.py @@ -25,7 +25,7 @@ def requeue_handler(signum, _frame): signame = signal.Signals(signum).name logger.warning(f"Received signal {signum} ({signame}). Requeueing and exiting...") - subprocess.run(["scontrol", "requeue", "${SLURM_JOB_ID}"]) + subprocess.run(["scontrol", "requeue", os.environ.get("SLURM_JOB_ID")]) sys.exit(15) diff --git a/src/datatrove/pipeline/dedup/minhash.py b/src/datatrove/pipeline/dedup/minhash.py index 69be1780..5d2035b3 100644 --- a/src/datatrove/pipeline/dedup/minhash.py +++ b/src/datatrove/pipeline/dedup/minhash.py @@ -50,7 +50,7 @@ class MinhashConfig: num_buckets: int = 14 hashes_per_bucket: int = 8 - use_64bit_hashes: bool = False + use_64bit_hashes: bool = True seed: int = 1 norm_config: TextNormConfig = field(default_factory=TextNormConfig)