forked from TreeMaker/TreeMaker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
resubCondor.sh
executable file
·74 lines (60 loc) · 1.84 KB
/
resubCondor.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
#helper function
contains(){
for name in $1; do
[[ "$name" == $2 ]] && return 0
done
return 1
}
#assumptions in this script:
#all .jdl and .condor files are in the current directory
#all .jdl files are named jobExecCondor_JOBNAME.jdl
#all .condor files are named JOBNAME_$(Cluster).condor
#one .jdl file per JOBNAME (i.e. only "Queue 1" is used)
TIME=$1
OUTNAME=$2
#default is beginning of time
if [ -z "$TIME" ]; then
#old default was 24 hrs ago
#TIME=$(date "--date=$(date) -1 day" +"%Y-%m-%d %H:%M")
TIME="1970-01-01 00:00"
fi
#default is resub.sh
if [ -z "$OUTNAME" ]; then
OUTNAME="resub.sh"
fi
#setup resub script
echo "#!/bin/bash" > ${OUTNAME}
echo "" >> ${OUTNAME}
#keep track of jobs that have been checked
joblist=""
#search for "return value" in condor logs newer than TIME - denotes finished job
#or "abort" - denotes removed job
for file in $(grep -l "return value\|abort" $(find *.condor -newermt "${TIME}")); do
#skip job if it finished successfully - return value 0
success=$(grep -lw "return value 0" ${file})
if [[ -n "$success" ]]; then
continue
fi
base=$(echo $(basename ${file}) | rev | cut -d'_' -f1-1 --complement | rev)
#skip job if it has already been checked
if contains "$joblist" $base; then
continue
fi
#check for newer logs that succeeded
newerfiles=$(find ${base}_*.condor -newer "${file}")
newerstatus=""
if [ -n "$newerfiles" ]; then
newerstatus=$(grep -lw "return value 0" ${newerfiles})
fi
#if none were found, the job failed
if [ -z "$newerstatus" ]; then
#optional: output return value for failed job as comment in resub script
#echo "#"$(grep -w "return value" ${file}) >> ${OUTNAME}
echo "condor_submit jobExecCondor_${base}.jdl" >> ${OUTNAME}
fi
#append jobname to list
joblist="${base} ${joblist}"
done
echo "Job resubmission script created: ${OUTNAME}"
chmod +x ${OUTNAME}