(bio)-informatics, data processing and visualization

Wednesday, April 25, 2012

batch extraction of Pfam HMM domains

Batch run for Pfam HMM domains - one model per search versus large fasta file with protein sequences:

Pfam-A.hmm - file with Pfam HMM models
Pfam-A.names.IDs - file with Pfam model names


head Pfam-A.names.IDs
1-cysPrx_C
120_Rick_ant
14-3-3
2-Hacid_dh
2-Hacid_dh_C
2-oxoacid_dh
2-ph_phosp
2CSK_N
2C_adapt
2Fe-2S_Ferredox


mkdir _hmm_files_

while read line; do hmmfetch Pfam-A.hmm $line > _hmm_files_/$line.hmm; echo $line; done < Pfam-A.names.IDs


mkdir _hmm_out_e20_

for long_crap in _hmm_files_/*.hmm; do short_crap=$(echo $long_crap | sed -e "s/.*\///"); hmmsearch -E 1e-20 $long_crap Lsat_CDS_BGI_V4_Prot.aa > _hmm_out_e20_/$short_crap.vs.Lsat_CDS_BGI_V4_Prot.e20; done &


ls -l _hmm_files_ | head
-rw-r--r--+ 1 akozik akozik  118664 Apr 25 12:37 120_Rick_ant.hmm
-rw-r--r--+ 1 akozik akozik  109882 Apr 25 12:37 14-3-3.hmm
-rw-r--r--+ 1 akozik akozik   19555 Apr 25 12:37 1-cysPrx_C.hmm
-rw-r--r--+ 1 akozik akozik   71642 Apr 25 12:37 2_5_RNA_ligase2.hmm
-rw-r--r--+ 1 akozik akozik   18154 Apr 25 12:37 2C_adapt.hmm
-rw-r--r--+ 1 akozik akozik   68415 Apr 25 12:37 2CSK_N.hmm
-rw-r--r--+ 1 akozik akozik   16791 Apr 25 12:37 2Fe-2S_Ferredox.hmm
-rw-r--r--+ 1 akozik akozik   83202 Apr 25 12:37 2-Hacid_dh_C.hmm
-rw-r--r--+ 1 akozik akozik   62452 Apr 25 12:37 2-Hacid_dh.hmm

ls -l _hmm_out_e20_ | head
-rw-r--r--+ 1 akozik akozik    1870 Apr 25 13:29 120_Rick_ant.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik   32467 Apr 25 13:29 14-3-3.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik    1879 Apr 25 13:29 1-cysPrx_C.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik    1879 Apr 25 13:29 2_5_RNA_ligase2.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik    1850 Apr 25 13:29 2C_adapt.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik    1871 Apr 25 13:29 2CSK_N.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik    1881 Apr 25 13:29 2Fe-2S_Ferredox.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik   30616 Apr 25 13:29 2-Hacid_dh_C.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20
-rw-r--r--+ 1 akozik akozik   25730 Apr 25 13:29 2-Hacid_dh.hmm.vs.Lsat_CDS_BGI_V4_Prot.e20

No comments:

Post a Comment