# create and move to a working dir
# copy the requisite output files
cp ../rnd3/*.gff ../rnd3/*.fa .
cp ../rnd1/agloss-rnd1_{repeats,{est,protein}2genome}.gff .
rename \ # Perl version, not Linux util
# map new ids to MAKER names
NUM_SEQS=`grep -Ev '^#' agloss_noSeq.gff \
| cut -d $'\t' -f 9 | tr ';' '\n' \
| cut -d '=' -f 2 | sort -u | wc -l`
--prefix=Albula-glossodonta \
# rename based on new ids
cp -f "${FASTA}" "${FASTA%.fa}_renamed.fa"
map_fasta_ids identifiers_map.tsv "${FASTA%.fa}_renamed.fa"
cp -f "${GFF}" "${GFF%.gff}_renamed.gff"
map_gff_ids identifiers_map.tsv "${GFF%.gff}_renamed.gff"
# prep for functional annotation
-logfile uniprot_sprot_makeblastdb.log
# do the alignment for func. annot.
-query proteins_renamed.fa \
-db /path/to/swissprot/uniprot_sprot \
-num_threads ${THREADS} \
-out proteins-x-uniprotSprot_fmt6.tsv
# update the fasta and gff files with func. annots.
for FASTA in *_renamed.fa
/path/to/swissprot/unitprot_sprot.fa \
proteins-x-uniprotSprot_fmt6.tsv \
> ${FASTA%.fa}_putative-function.fa
/path/to/swissprot/unitprot_sprot.fa \
proteins-x-uniprotSprot_fmt6.tsv \
> ${GFF%.gff}_putative-function.gff
# run interproscan for more func. annots.
-o proteins-interproscan.tsv
# update the gff files with interproscan results
for GFF in {with,no}Seq_renamed_putative-function.gff
proteins-interproscan.tsv \
> ${GFF%.gff}_domain-added.gff
for GFF in {with,no}Seq_renamed.gff
proteins-interproscan.tsv \
> ${GFF%.gff} _visible-iprscan-domains.gff