From 41c02b8de384b6b8c34b548afcc6198fcb004695 Mon Sep 17 00:00:00 2001 From: misko Date: Thu, 19 Dec 2024 21:26:50 -0800 Subject: [PATCH] add in script to make splits --- spf/scripts/make_splits.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 spf/scripts/make_splits.sh diff --git a/spf/scripts/make_splits.sh b/spf/scripts/make_splits.sh new file mode 100644 index 0000000..92871b6 --- /dev/null +++ b/spf/scripts/make_splits.sh @@ -0,0 +1,18 @@ + +splits=/mnt/md2/splits/ +name=dec19 +n=90 # 90% for train + +ls /mnt/ssd/2d_wallarray_v2_data/*/*.zarr -d | shuf > ${splits}/${name}_full.txt +grep rx_circle ${splits}/${name}_full.txt > ${splits}/${name}_val.txt +grep -v rx_circle ${splits}/${name}_full.txt > ${splits}/${name}_notcircle.txt + +total=$(wc -l < "${splits}/${name}_notcircle.txt") +cutoff=$(( (n * total) / 100 )) + +head -n "$cutoff" "${splits}/${name}_notcircle.txt" > ${splits}/${name}_train.txt +tail -n $(( total - cutoff )) "${splits}/${name}_notcircle.txt" >> ${splits}/${name}_val.txt + +# add in rover to validation +ls /mnt/ssd/rovers/merged/nov*.zarr -d >> ${splits}/${name}_train.txt +ls /mnt/ssd/rovers/merged/dec*.zarr -d >> ${splits}/${name}_val.txt