#db generation fasta2blastdb -prot 8non_target_insects_prot.fasta non_target_insects_prot fasta2blastdb -nucl LDEC_mRNA.fna CPB_genome_nucl fasta2blastdb -prot LDEC_proteins.faa CPB_genome_prot #Ulrich_top100 blast runs ablastplus -now -use 'blastn' -cpu 6 -db 'Ld_mRNAs_ESTs_patents_nucl' -besthits '100' -expect '0.0000000001' -remark 'Tctop100cds__Ld_nucl' -log 'Tctop100cds__Ld_nucl.log' 'Biomart_Tc_top100CDS.fasta' 'Tctop100cds__Ld_nucl.output' -exec 'sumablastplus -empties -maxhits 100 Tctop100cds__Ld_nucl.output Tctop100cds__Ld_nucl.tsv' ablastplus -now -use 'blastn' -cpu 6 -db 'CPB_genome_nucl' -besthits '100' -expect '0.0000000001' -remark 'Tctop100cds__CPB_genome_nucl' -log 'Tctop100cds__CPB_genome_nucl.log' 'Biomart_Tc_top100CDS.fasta' 'Tctop100cds__CPB_genome_nucl.output' -exec 'sumablastplus -empties -maxhits 100 Tctop100cds__CPB_genome_nucl.output Tctop100cds__CPB_genome_nucl.tsv' ablastplus -now -use 'blastp' -cpu 6 -db 'CPB_genome_prot' -besthits '100' -expect '0.0000000001' -remark 'Tctop100cds__CPB_genome_prot' -log 'Tctop100cds__CPB_genome_prot.log' 'Biomart_Tc_top100pept.fasta' 'Tctop100cds__CPB_genome_prot.output' -exec 'sumablastplus -empties -maxhits 100 Tctop100cds__CPB_genome_prot.output Tctop100cds__CPB_genome_prot.tsv' #splitting CPB genes into 21 nt and blast against nontarget_insects splitter -sequence ../genomes_blast/tmp/mp1406_insects+potato/DATA/LDEC_mRNA.fna -outseq LDEC_mRNA_splitter.fasta -size 21 -overlap 20 #removing 21nt seqs with N-s in them (no single Ns are present in sequences) grep -v 'N' LDEC_mRNA_splitter.fasta > LDEC_mRNA_splitter_N-removed.fasta #not OK because it leaves empty sequences (although these will have no blast results); -B 1 doesnt remove sequence GRRR! #test if Ns were removed grep 'N' LDEC_mRNA_splitter_N-removed.fasta #--> after this line I run blastn @ibis2: ablastplus -mail marko.petek@nib.si -use 'blastn' -cpu 8 -db '../blast/non_targetinsects_cds' -besthits '1' -expect '0.01' -remark 'LDEC_mRNAs_splitter vs nontarget_insect_RNAs' -log 'LDEC_mRNA_splitter.log' 'LDEC_mRNA_splitter_N-removed.fasta' 'LDEC_mRNA_splitter.output' -exec 'sumablastplus -maxhits 1 LDEC_mRNA_splitter.output LDEC_mRNA_splitter.tsv' ## blastP to find Dm and Tc orthologs, E cutoff 10-E90 ablastplus -mail marko.petek@nib.si -use 'blastp' -cpu 8 -db '/DATA/blastdb/301-insects/Tribolium_castaneum-pep_v3' -besthits '1' -expect '0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001' -remark 'Tc_orthologs' -log 'LDEC_Tc_orthologs.log' '/DATA/workspace/markop/blast/LDEC_proteins.faa' 'LDEC_Tc_orthologs.output' -exec 'sumablastplus -maxhits 1 LDEC_Tc_orthologs.output LDEC_Tc_orthologs.tsv' ablastplus -mail marko.petek@nib.si -use 'blastp' -cpu 8 -db '/DATA/blastdb/301-insects/Drosophila_melanogaster-pep_v6' -besthits '1' -expect '0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001' -remark 'Dm_orthologs' -log 'LDEC_Dm_orthologs.log' '/DATA/workspace/markop/blast/LDEC_proteins.faa' 'LDEC_Dm_orthologs.output' -exec 'sumablastplus -maxhits 1 LDEC_Dm_orthologs.output LDEC_Dm_orthologs.tsv'