You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
## Create one directory where we put all genes
## found among all genomes. The output are
## unaligned fasta files.
cd "${PROJECTDIR}"
perl "${SRCDIR}"/gather_genes.pl -o "${GENESDIR}" $(find out -mindepth 1 -type d)
## Remove hmmer description from fasta headers
sed -i 's/ .*//' "${GENESDIR}"/*.fas
Add outgroup (optional)
# Note: this step was used for a version of the Jarvis data.
#
# Need to look for
# "ACACH Rifleman Acanthisitta_chloris"
# "MANVI Golden-collared_Manakin Manacus_vitellinus"
#
# Also: adjust the file ending for the reference data
# (now: '.sate.removed.intron.noout.aligned-allgap.filtered.fas')
cd "${PROJECTDIR}"
mkdir -p "${ALIDIR}"
outg="${ALIDIR}/tmp.outgroups.txt"
perl -e 'print "ACACH\nMANVI\n"' > "${outg}"
for f in ${GENESDIR}/*.fas ; do
nr=$(basename "${f}" .fas)
reffas="${REFERENCEDIR}/fasta_files/${nr}.sate.removed.intron.noout.aligned-allgap.filtered.fas"
#reffas="${REFERENCEDIR}/fasta_files/${nr}.fas"
grepfasta.pl -f "${outg}" "${reffas}" | \
sed '/^$/d' | \
${SRCDIR}/remove_gaps_in_fasta.pl > "${ALIDIR}/tmp.${nr}.outgrp.seq"
aliin="${ALIDIR}/${nr}.outgrp.input"
cat "${ALIDIR}/tmp.${nr}.outgrp.seq" "${f}" | \
"${SRCDIR}/fasta_unwrap.pl" | \
"${SRCDIR}/fasta_wrap.pl" > "${aliin}"
rm "${ALIDIR}/tmp.${nr}.outgrp.seq"
done
rm "${outg}"
Align gene files
# Try mafft (MAFFT v7.310)
cd "${ALIDIR}"
time for f in *.input ; do
g="${f%.input}.mafft.ali"
aliout="${ALIDIR}/$(basename "$g")"
mafft --auto --thread ${NCPU} "${f}" > "${aliout}"
done
# real 55m22,108s
# user 231m26,448s
# sys 48m37,191s
mkdir -p ${TREEDIR}
cd ${TREEDIR}
time for f in ${ALIDIR}/*.mafft.ali-odseq-filtered.degap.fas ; do
g=$(basename "$f" .fas)
echo $g
echo "iqtree.${g}"
iqtree -s "$f" \
-nt AUTO \
-ntmax ${NCPU} \
-m TEST \
-pre "iqtree.${g}"
done
# real 1360m25,347s
# user 2766m8,775s
# sys 7m35,787s
ASTRAL III
mkdir -p ${ASTRALDIR}
cd ${ASTRALDIR}
indata=all.mafft.ali-odseq-filtered.degap.trees
outdata=all.mafft.ali-odseq-filtered.degap.astral
cat ${TREEDIR}/*.mafft.ali-odseq-filtered.degap.treefile > "${indata}"
time astral -i "${indata}" -o "${outdata}"
# real 0m18,594s
# user 0m28,208s
# sys 0m0,312s