diff --git a/.gitignore b/.gitignore
index 1153c526..74d550bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ external
 *.egg-info
 notebooks
 final_outputs
-.cache*
\ No newline at end of file
+.cache*
+data_subset/**
\ No newline at end of file
diff --git a/commands.txt b/commands.txt
new file mode 100644
index 00000000..3c702478
--- /dev/null
+++ b/commands.txt
@@ -0,0 +1,24 @@
+# .bashrc
+export PATH=$PATH:~/.local/bin
+
+export XRT_TPU_CONFIG="localservice;0;localhost:51011"
+
+export XLA_USE_BF16=0
+
+export TPU_NUM_DEVICES=8
+
+export HF_DATASETS_CACHE=/dev/shm/cache
+
+# data
+gcloud auth login
+gsutil -m cp -r gs://trc-transfer-data/sentence/data/eval.pth data/
+
+# cleanup
+pkill -e python3
+(until no more)
+or
+watch -n1 pkill -e python3
+
+# for debugging:
+
+os.environ["PJRT_DEVICE"] = "None"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..bc523862
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+transformers==4.29.2
+accelerate==0.19.0
+datasets
+pysbd
+wandb
+h5py
+nltk
+spacy
+ersatz
+iso-639
+scikit-learn==1.2.2
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100755
index 00000000..b9603cef
--- /dev/null
+++ b/run.sh
@@ -0,0 +1 @@
+python3 $HOME/transformers/examples/pytorch/xla_spawn.py --num_cores ${TPU_NUM_DEVICES} wtpsplit/train/train.py $1
\ No newline at end of file
diff --git a/wtpsplit/evaluation/__init__.py b/wtpsplit/evaluation/__init__.py
index 1a8194ee..0dc83874 100644
--- a/wtpsplit/evaluation/__init__.py
+++ b/wtpsplit/evaluation/__init__.py
@@ -1,5 +1,6 @@
 import subprocess
 import unicodedata
+import os
 
 import numpy as np
 import regex as re
@@ -69,7 +70,7 @@ def train_mixture(lang_code, original_train_x, train_y, n_subsample=None, featur
 
         train_x = train_x.float()
 
-        clf = linear_model.LogisticRegression(max_iter=10_000)
+        clf = linear_model.LogisticRegression(max_iter=10_000, random_state=0)
         clf.fit(train_x, train_y)
         preds = clf.predict_proba(train_x)[:, 1]
 
@@ -229,6 +230,14 @@ def ersatz_sentencize(
 ):
     if lang_code not in ERSATZ_LANGUAGES:
         raise LanguageError(f"ersatz does not support {lang_code}")
+    
+    # check if infile parent dir exists, if not, create it
+    if not os.path.exists(os.path.dirname(infile)):
+        os.makedirs(os.path.dirname(infile))
+    # check if outfile parent dir exists, if not, create it
+    if not os.path.exists(os.path.dirname(outfile)):
+        os.makedirs(os.path.dirname(outfile))
+        
     open(infile, "w").write(text)
 
     subprocess.check_output(
diff --git a/wtpsplit/evaluation/intrinsic.py b/wtpsplit/evaluation/intrinsic.py
index 1bdfb9ed..f4a6aab1 100644
--- a/wtpsplit/evaluation/intrinsic.py
+++ b/wtpsplit/evaluation/intrinsic.py
@@ -32,9 +32,9 @@ class Args:
     #        }
     #    }
     # }
-    eval_data_path: str = "data/eval_new.pth"
+    eval_data_path: str = "data/eval.pth"
     valid_text_path: str = None#"data/sentence/valid.parquet"
-    device: str = "cuda"
+    device: str = "xla:1"
     block_size: int = 512
     stride: int = 64
     batch_size: int = 32
@@ -70,6 +70,7 @@ def load_or_compute_logits(args, model, eval_data, valid_data=None, max_n_train_
                     block_size=args.block_size,
                     batch_size=args.batch_size,
                     pad_last_batch=True,
+                    verbose=True,
                 )[0]
                 lang_group.create_dataset("valid", data=valid_logits)
 
@@ -92,6 +93,7 @@ def load_or_compute_logits(args, model, eval_data, valid_data=None, max_n_train_
                         block_size=args.block_size,
                         batch_size=args.batch_size,
                         pad_last_batch=True,
+                        verbose=True,
                     )[0]
                     test_labels = get_labels(lang_code, test_sentences, after_space=False)
 
diff --git a/wtpsplit/train/evaluate.py b/wtpsplit/train/evaluate.py
index 9cb21e35..4fd6bbe4 100644
--- a/wtpsplit/train/evaluate.py
+++ b/wtpsplit/train/evaluate.py
@@ -74,6 +74,7 @@ def evaluate_sentence(
         stride=stride,
         block_size=block_size,
         batch_size=batch_size,
+        verbose=True,
     )[0]
 
     true_end_indices = np.cumsum(np.array([len(s) for s in sentences])) + np.arange(len(sentences)) * len(separator)