diff --git a/earlGrey b/earlGrey index 48913dd..d40b8fe 100644 --- a/earlGrey +++ b/earlGrey @@ -205,14 +205,14 @@ calcDivRL() mergeRep() { mkdir ${OUTDIR}/${species}_mergedRepeats/looseMerge - ${SCRIPT_DIR}/rcMergeRepeatsLoose -f $genome -s $species -d ${OUTDIR}/${species}_mergedRepeats/looseMerge -u ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).out -q ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).tbl -t $ProcNum -b ${dict} + ${SCRIPT_DIR}/rcMergeRepeatsLoose -f $genome -s $species -d ${OUTDIR}/${species}_mergedRepeats/looseMerge -u ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).out -q ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).tbl -t $ProcNum -b ${dict} -m $margin if [ -f "${OUTDIR}/${species}_mergedRepeats/looseMerge/${species}.filteredRepeats.bed" ]; then awk '{OFS="\t"}{print $1, $2, $3, $4, $5, $6, $7, $8, toupper($9)}' ${OUTDIR}/${species}_mergedRepeats/looseMerge/${species}.filteredRepeats.gff > ${OUTDIR}/${species}_mergedRepeats/looseMerge/${species}.filteredRepeats.gff.1 && mv ${OUTDIR}/${species}_mergedRepeats/looseMerge/${species}.filteredRepeats.gff{.1,} fi if [ ! -f "${OUTDIR}/${species}_mergedRepeats/looseMerge/${species}.filteredRepeats.bed" ]; then echo "ERROR: loose merge defragmentation failed, trying strict merge..." cd ${OUTDIR}/${species}_mergedRepeats/ - ${SCRIPT_DIR}/rcMergeRepeats -f $genome -s $species -d ${OUTDIR}/${species}_mergedRepeats/ -u ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).out -q ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).tbl -t $ProcNum -b ${dict} + ${SCRIPT_DIR}/rcMergeRepeats -f $genome -s $species -d ${OUTDIR}/${species}_mergedRepeats/ -u ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).out -q ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).tbl -t $ProcNum -b ${dict} -m $margin if [ ! -f "${OUTDIR}/${species}_mergedRepeats/${species}.filteredRepeats.bed" ]; then echo "ERROR: strict merge also failed, check ${OUTDIR}/${species}_RepeatMasker_Against_Custom_Library/$(basename $genome).out looks as expected" exit 2 diff --git a/scripts/rcMergeRepeats b/scripts/rcMergeRepeats index 6e9c7ec..051aa6e 100644 --- a/scripts/rcMergeRepeats +++ b/scripts/rcMergeRepeats @@ -2,11 +2,11 @@ usage() { - echo "Usage: rcMergeRepeats -f genome.fasta -s species -d output_directory -u RepeatMasker.out -q RepeatMasker.tbl -c repeatcraft.cfg -t ThreadNum -b dictionary | -h" + echo "Usage: rcMergeRepeats -f genome.fasta -s species -d output_directory -u RepeatMasker.out -q RepeatMasker.tbl -c repeatcraft.cfg -t ThreadNum -b dictionary -m yes|no | -h" } -while getopts f:s:d:u:q:t:b:h option +while getopts f:s:d:u:q:t:b:m:h option do case "${option}" in @@ -17,6 +17,7 @@ while getopts f:s:d:u:q:t:b:h option q) table=${OPTARG};; t) threads=${OPTARG};; b) dict=${OPTARG};; + m) cutoff=${OPTARG};; h) usage; exit;; esac done @@ -57,7 +58,7 @@ mergeRepeats() sort -s -k1,1 ${dir}/${spe}.rmerge.gff > ${dir}/${spe}.rmerge.gff.sorted #filter-gff overlap -v --progress -s 1 -d -t -c length -a length ${dir}/${spe}.rmerge.gff.sorted ${dir}/${spe}.rmerge.gff.filtered Rscript ${SCRIPT_DIR}/filteringOverlappingRepeats.R ${dir}/${spe}.rmerge.gff.sorted ${dir}/${spe}.rmerge.gff.filtered - Rscript ${SCRIPT_DIR}/mergeRepeats.R ${dir}/${spe}.rmerge.gff.filtered ${dir}/${spe}.mergedRepeats.bed $genSize ${dir}/${spe}.mergedRepeats.revisedTable ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.filteredRepeats.summary $margin + Rscript ${SCRIPT_DIR}/mergeRepeats.R ${dir}/${spe}.rmerge.gff.filtered ${dir}/${spe}.mergedRepeats.bed $genSize ${dir}/${spe}.mergedRepeats.revisedTable ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.filteredRepeats.summary $cutoff } diff --git a/scripts/rcMergeRepeatsLoose b/scripts/rcMergeRepeatsLoose index 67f77c2..680f402 100644 --- a/scripts/rcMergeRepeatsLoose +++ b/scripts/rcMergeRepeatsLoose @@ -2,11 +2,11 @@ usage() { - echo "Usage: rcMergeRepeats -f genome.fasta -s species -d output_directory -u RepeatMasker.out -q RepeatMasker.tbl -c repeatcraft.cfg -t ThreadNum -b dictionary | -h" + echo "Usage: rcMergeRepeats -f genome.fasta -s species -d output_directory -u RepeatMasker.out -q RepeatMasker.tbl -c repeatcraft.cfg -t ThreadNum -b dictionary -m yes|no | -h" } -while getopts f:s:d:u:q:t:b:h option +while getopts f:s:d:u:q:t:b:m:h option do case "${option}" in @@ -17,6 +17,7 @@ while getopts f:s:d:u:q:t:b:h option q) table=${OPTARG};; t) threads=${OPTARG};; b) dict=${OPTARG};; + m) cutoff=${OPTARG};; h) usage; exit;; esac done @@ -56,7 +57,7 @@ mergeRepeats() { sort -s -k1,1 ${dir}/${spe}.rmerge.gff > ${dir}/${spe}.rmerge.gff.sorted Rscript ${SCRIPT_DIR}/filteringOverlappingRepeats.R ${dir}/${spe}.rmerge.gff.sorted ${dir}/${spe}.rmerge.gff.filtered - Rscript ${SCRIPT_DIR}/mergeRepeats.R ${dir}/${spe}.rmerge.gff.filtered ${dir}/${spe}.mergedRepeats.bed $genSize ${dir}/${spe}.mergedRepeats.revisedTable ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.filteredRepeats.summary $margin + Rscript ${SCRIPT_DIR}/mergeRepeats.R ${dir}/${spe}.rmerge.gff.filtered ${dir}/${spe}.mergedRepeats.bed $genSize ${dir}/${spe}.mergedRepeats.revisedTable ${dir}/${spe}.filteredRepeats.bed ${dir}/${spe}.filteredRepeats.summary $cutoff }