#First we run transdecoder to determine likely coding regions
mamba install transdecoder
/Pathtotransdecoder/TransDecoder.LongOrfs -t trinity.fasta
/Pathtotransdecoder/TransDecoder.Predict -t trinity.fasta
# the output that we want from this is the transdecoder.pep file
# Next we will start the annotation process, this will be done in trinotate and can be run in parallel with the DE analysis
#make sure environement is activate and install
mamba activate trinityenv
#compile the sequence database, this only needs to be done once
/Path_to_trinotate/Build_Trinotate_Boilerplate_SQLite_db.pl Trinotate
#once completed you will have the following files
#prepare the protein database for blast searches, set working directory to file location
makeblastdb -in uniprot_sprot.pep -dbtype prot
#uncompress and prepare the pfam database
#next we will blast our fasta to the database to find annotations
#this is best to run on an HPC as it will take up to a few days for the blastx
blastx -query your_transcriptome.fasta -db uniprot_sprot.pep -num_threads <n> -max_target_seqs 1 -outfmt 6 -evalue 1e-3 > blastx.outfmt6
blastp -query transdecoder.pep -db uniprot_sprot.pep -num_threads <n>-max_target_seqs 1 -outfmt 6 -evalue 1e-3 > blastp.outfmt6
#identify protein domains (optional)
#run this on HPC as well if possible
hmmscan --cpu 12 --domtblout TrinotatePFAM.out Pfam-A.hmm trinity.fasta.transdecoder.pep > pfam.log
#load trinotate sqlite database
Trinotate Trinotate.sqlite init --gene_trans_map Trinity.fasta.gene_trans_map --transcript_fasta Trinity.fasta --transdecoder_pep trinity.fasta.transdecoder.pep
Trinotate Trinotate.sqlite LOAD_swissprot_blastp blastp.outfmt6
Trinotate Trinotate.sqlite LOAD_swissprot_blastx blastx.outfmt6
# load pfam doamain entires
Trinotate Trinotate.sqlite LOAD_pfam TrinotatePFAM.out
Trinotate Trinotate.sqlite report [opts] > trinotate_annotation_report.xls
#extract go assignments per gene and make into annotations file
/path_to_trinity_install/bin/extract_GO_assignments_from_Trinotate_xls.pl \
--Trinotate_xls trinotate.xls \
-G --include_ancestral_terms \
#get a gene lengths file, this can be used for downstream analysis
/path_to_trinity_install/opt/trinity-2.13.2/util/misc/fasta_seq_length.pl Trinity.fasta > Trinity.fasta.seq_lens
/path_to_trinity_install/opt/trinity-2.13.2/util/misc/TPM_weighted_gene_length.py \
--gene_trans_map /media/chase/Samsung_T5/Trinotate_Lotus/Trinity.fasta.gene_trans_map \
--trans_lengths /media/chase/Samsung_T5/Trinotate_Lotus/Trinity.fasta.seq_lens \
--TPM_matrix kallisto.isoform.TMM.EXPR.matrix > Trinity.gene_lengths.txt