Commit 14cce39a authored by Gustaf Ahdritz's avatar Gustaf Ahdritz
Browse files

Add data pipeline test

parent f9dc8b15
This diff is collapsed.
# STOCKHOLM 1.0
#=GF ID query-i1
#=GF AU jackhmmer (HMMER 3.3.2)
#=GS MGYP000406148242/1-68 DE [subseq from] PL=00 UP=0 BIOMES=0101000000000
#=GS MGYP000119383271/47-117 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000430010134/3-69 DE [subseq from] PL=00 UP=0 BIOMES=0000110000000
#=GS MGYP000184282189/1-71 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000372988949/3-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000222615028/3-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000384795733/25-88 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000680660046/4-73 DE [subseq from] PL=00 UP=0 BIOMES=0000110000000
#=GS MGYP000586297297/4-70 DE [subseq from] PL=00 UP=0 BIOMES=0000110000000
#=GS MGYP000526302968/5-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000081082088/4-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000172493671/1-71 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000694390052/2-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000246175980/4-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000358235060/4-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000635416234/5-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000656061151/3-65 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000718018739/4-64 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000234420019/4-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000689530757/1-71 DE [subseq from] PL=00 UP=0 BIOMES=0000100000000
#=GS MGYP000266820214/24-89 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000190165740/1-71 DE [subseq from] PL=00 UP=0 BIOMES=0000000000001
#=GS MGYP000589249599/4-69 DE [subseq from] PL=00 UP=0 BIOMES=0000110000000
#=GS MGYP000048618675/3-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000377290797/1-69 DE [subseq from] PL=00 UP=1 BIOMES=0110000000000
#=GS MGYP000697367932/3-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000747506700/4-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000255037255/6-64 DE [subseq from] PL=10 UP=0 BIOMES=0000101000000
#=GS MGYP000602985373/3-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000420186793/4-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000452617499/5-64 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000119404247/1-68 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000134149386/3-60 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000461455637/26-91 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000119389418/96-161 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000546988737/26-93 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000624371167/1-68 DE [subseq from] PL=00 UP=0 BIOMES=0101000000000
#=GS MGYP000650157322/5-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000246214200/7-73 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000113479303/34-96 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000187226991/3-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000381848663/3-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000066325489/28-89 DE [subseq from] PL=00 UP=0 BIOMES=0000000000001
#=GS MGYP000013251582/4-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000499794189/19-84 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000555816272/4-69 DE [subseq from] PL=00 UP=0 BIOMES=0000000000001
#=GS MGYP000653248377/3-70 DE [subseq from] PL=00 UP=0 BIOMES=0110000000000
#=GS MGYP000113511630/3-70 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP001057101778/4-69 DE [subseq from] PL=00 UP=0 BIOMES=1000000000000
#=GS MGYP000210824545/3-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000676742083/9-64 DE [subseq from] PL=10 UP=0 BIOMES=0000101000000
#=GS MGYP000545010933/4-70 DE [subseq from] PL=00 UP=0 BIOMES=0000110000000
#=GS MGYP000541064880/3-68 DE [subseq from] PL=00 UP=0 BIOMES=0000000000001
#=GS MGYP000541064880/99-161 DE [subseq from] PL=00 UP=0 BIOMES=0000000000001
#=GS MGYP000729801087/3-52 DE [subseq from] PL=10 UP=0 BIOMES=0000101000000
#=GS MGYP000715079888/40-96 DE [subseq from] PL=10 UP=0 BIOMES=0000101000000
#=GS MGYP000033872322/3-43 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000464421157/4-69 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
query MAAHKGAEHHHK-AAEHHEQAAKHHHAAAEHHEKGE-HEQAAHHADTAYAHHKHAEEHAAQAAKHD-AEHHAPKPH
MGYP000406148242/1-68 MATHKGAESHKK-AAEHHTTAAKHHTEAAKSHESGN-HEKAAHHAHTATAHGKHASDHSDDAAKTY-ASEH-----
#=GR MGYP000406148242/1-68 PP 899*********.***********************.***************************98.8877.....
MGYP000119383271/47-117 MATHKGTEHHKK-AAEHHELAAKHHREAAKLHEAGS-HEKAAHHAQIAAGHGLHAVYHTEEATKHH-ADEHTGK--
#=GR MGYP000119383271/47-117 PP 899*********.***********************.*****************************.**99866..
MGYP000430010134/3-69 ---KKAAEHHRK-AAEHHQNAAKHHNAAAESHEAGN-HEKAAHHAHTAHGHHTQAGEHGGEAAKAH-RDEHGQ---
#=GR MGYP000430010134/3-69 PP ...699******.***********************.***************************88.877765...
MGYP000184282189/1-71 MPKHEGAEHHKK-AAEHHEKAAQHHKEAAKHHEEGR-HETAGHHAYVAHGHHLTAIQHSEEAAKYH-SQQHGEK--
#=GR MGYP000184282189/1-71 PP 568*********.***********************.****************************9.9999876..
MGYP000372988949/3-70 ---KKAAEHHLK-AAEHHEHAARHHKEAAKHHQAGS-YEKAAHHAHTARAHAEHADEHAVEAAKAH-AEEHGSK--
#=GR MGYP000372988949/3-70 PP ...699******.***********************.*****************************.**99865..
MGYP000222615028/3-68 ---KKAVEHHHK-AAEHHEHAARHHKEAAKHHEAGK-HETAAHHAHLARGHHEHAMHHAAEAAKAH-VEDHG----
#=GR MGYP000222615028/3-68 PP ...6899*****.***********************.***************************99.99986....
MGYP000384795733/25-88 ----SGSQQHDA-AAQHYEEAARHHRQAAKHYQASR-HEKAAHHAQLGYAHHLYAEQHAAEAAKAH-AKNH-----
#=GR MGYP000384795733/25-88 PP ....6999****.***********************.***************************99.9998.....
MGYP000680660046/4-73 -STHKGAEHHKE-AAAHHKKAAEHHLAAAEHHEAGD-HEKAGHHAHVAHGHHLNAVHHAEEAGKHHGAEHSGP---
#=GR MGYP000680660046/4-73 PP .57*********.***********************.**************************9752788777...
MGYP000586297297/4-70 ----QAAEHHQK-AAEHHEHAARHHREAAAHHEEGN-HETAAHHAHTAQGHLHHATHHASEAAKHH-VEHHGNK--
#=GR MGYP000586297297/4-70 PP ....689*****.***********************.*****************************.****977..
MGYP000526302968/5-69 -----REEHHLK-AAEHHEHAAKHHLAAAEHHAGGD-HEKAGHHAHVAHGHSTHAEHHAEEASKHT-ANHDAA---
#=GR MGYP000526302968/5-69 PP .....469999*.***********************.*****************************.***985...
MGYP000081082088/4-68 ----QAAEHHHK-AAEHHEHAARHHKEAAKHHEAGK-HETAAHHAHLARGHHEHAMHHAAEAAKAH-IQDHG----
#=GR MGYP000081082088/4-68 PP ....689*****.***********************.**************************977.66664....
MGYP000172493671/1-71 MTKHEGAEHHKQ-AAQQHQDAARHHLEAAKHHEAGA-HEKAGHHAHIAYGHHLQATHHAEEAAKHH-AMQHGDK--
#=GR MGYP000172493671/1-71 PP 678*********.***********************.*****************************.*999876..
MGYP000694390052/2-70 --SHAAAEHHKK-AAEHHEHAARHHQEAAKHHEAGN-HEKAAHHAHVAHGHHVHAVEHAEHAAKHH-AETHGAK--
#=GR MGYP000694390052/2-70 PP ..699*******.***********************.*****************************.**99865..
MGYP000246175980/4-68 ----QAAEHHHK-AAEHHEHAARHHKEAAKHHEAGK-HETAAHHAHLARGHHVHAMHHAGEAAKAH-IEDHG----
#=GR MGYP000246175980/4-68 PP ....689*****.***********************.***************************88.88885....
MGYP000358235060/4-70 ----QAAEHHGK-AAEHHEHAARHHREAANHHEAGD-HQQAAHHAHTAQGHLHHATHHSAEAAKLH-VEHHGHK--
#=GR MGYP000358235060/4-70 PP ....689*****.***********************.*****************************.****877..
MGYP000635416234/5-68 -----VADHHHK-AAEHHERAAKHHREAATHYESDR-HETAAHHAHMAHGHHQHAVHHASEAAKAH-IEHHD----
#=GR MGYP000635416234/5-68 PP .....489****.***********************.*****************************.****6....
MGYP000656061151/3-65 ---KKAAEHHRK-AAEHHEHAARHHKEAAKHHDAGA-HEKAAHHAHTAHAHHLHATHFADEAAKAH-AD-------
#=GR MGYP000656061151/3-65 PP ...699******.***********************.**************************977.75.......
MGYP000718018739/4-64 -----GAKHHNA-AAQHYEEAARHHRKAAELYQCGH-HEKVSHHANLASGHPLHAKQHAEEAAKAL-IE-------
#=GR MGYP000718018739/4-64 PP .....99*****.***********************.**************************976.55.......
MGYP000234420019/4-70 ----AAAEHHRK-AAEHHEHAARHHEEAAEHHESGA-HETAAHHAHSAQGHTHHALYHASEAAKEH-AEHHGDK--
#=GR MGYP000234420019/4-70 PP ....479*****.***********************.*****************************.****875..
MGYP000689530757/1-71 MPTHTGAEHHRK-AAEHHQLAAKHHLEAAKLHDAGS-HEKAAHHSEIAAGHGHHAVYHTEEATKQH-ADMNAEK--
#=GR MGYP000689530757/1-71 PP 578*********.***********************.****************************9.9999877..
MGYP000266820214/24-89 ---KKAAEHHLK-AAEHHEHAARHHKEAAKHHQAGS-HEKAAHHAHTARAHEEHAEFHSAEAAKAH-GQEHG----
#=GR MGYP000266820214/24-89 PP ...699******.***********************.**************************977.77775....
MGYP000190165740/1-71 MARHEGAEHHKQ-AAEHHQHAARHHLEAAKHHEAGA-HEKAGHHAHIAQGHHLHAIHHAEEAAKHH-AAQHGDK--
#=GR MGYP000190165740/1-71 PP 799*********.***********************.*****************************.*999876..
MGYP000589249599/4-69 ----QAAEHHTK-AAEHHQHAARHHLEAAKHHEAGR-HEAAGHHAHLAHGHHQHATHHASEAAKSH-IEHHGK---
#=GR MGYP000589249599/4-69 PP ....689*****.***********************.*****************************.****75...
MGYP000048618675/3-70 ---KKASEHHRK-AAEHHKLAATHHEEAAAHYDKGN-HEKAAHHAHVAHGHTLHATHYAAEAAKMH-VEEHGSK--
#=GR MGYP000048618675/3-70 PP ...6899*****.***********************.***************************99.9999866..
MGYP000377290797/1-69 MSDHAGVEHYHK-AAEHHEHAARHHREAAKHHEEGN-HEKAAHHAHSAHGHASHAQHHHTEASRHH-AEHHG----
#=GR MGYP000377290797/1-69 PP 678*********.***********************.*****************************.****7....
MGYP000697367932/3-70 ---KKASEHHRK-AAEHHKLAATHHEEAAAHHDKGN-YEKAAHHAHVAHGHTHHATYHAAEAAKIH-AEDYGSK--
#=GR MGYP000697367932/3-70 PP ...6899*****.***********************.***************************99.9988765..
MGYP000747506700/4-68 ----QAAEHHHK-AAEHHEHAALHHKEAAKHHEAGK-HEMAAHHAHLARAHHEHAMHHAVEAVKAH-LQDHG----
#=GR MGYP000747506700/4-68 PP ....689*****.***********************.**************************977.76664....
MGYP000255037255/6-64 ---SKIAEHHTK-AAEHHETAAQHHREAAKHHEAGS-IEKAAHHAQVAYGHGAHAWNYQEEAAK------------
#=GR MGYP000255037255/6-64 PP ...5789*****.***********************.******************999999998............
MGYP000602985373/3-68 ---KKAVEHHNK-AAEHHEHAARHHKEAAKHHEAGK-HETAGHHAHLARGHQEHAMHHSAEAAKAH-IEDHS----
#=GR MGYP000602985373/3-68 PP ...6899*****.***********************.***************************99.98886....
MGYP000420186793/4-69 ----QAAEHHLK-AAEHHEHAAHHHKEAAKHHQGGS-HEKAAHHAHTARGHHEHAQHHAAEAAKAH-AQEHGN---
#=GR MGYP000420186793/4-69 PP ....689*****.***********************.***************************99.999975...
MGYP000452617499/5-64 -----AAAHHLK-AVEHHEHAARHHREAAKHHEAGN-HEKAAHHAHLAHGHHLHATEYAGEAAKAH-I--------
#=GR MGYP000452617499/5-64 PP .....678999*.***********************.**************************965.5........
MGYP000119404247/1-68 MAGHKIHEHHEK-AADHHEHAAKHHREAAKHHKAGD-HEKAAHHSKVAHGHHLHATEHHDEASKKH-AEDH-----
#=GR MGYP000119404247/1-68 PP 799*********.***********************.***************************99.9998.....
MGYP000134149386/3-60 ---KKATEHHRK-AAEHHEHAARHHKEAAKHHEAGK-HETAAHHAHLARGHQERAAQQAAEAA-------------
#=GR MGYP000134149386/3-60 PP ...6899*****.***********************.***********************998.............
MGYP000461455637/26-91 -----AAKHHDL-AAQHYEEAARHHREAAQDYQSGR-HEKASHHAHLAYAHHLHAEQHAEEAAKAH-IKNHLDD--
#=GR MGYP000461455637/26-91 PP .....589****.***********************.***************************99.9999765..
MGYP000119389418/96-161 ---KQAAEHHRK-AAEHHEHAARHHKEAAKHHEAGK-HETAAHHAHLARAHHEVATHHAVEAAKAH-LEEHG----
#=GR MGYP000119389418/96-161 PP ...5689*****.***********************.***************************88.88775....
MGYP000546988737/26-93 ---EKAAEHHEK-AAEHNERAAQHHREAAKHHEEGH-HETAGHHAQIAHGHHLNATHHSEEAAKHH-AQQHGEK--
#=GR MGYP000546988737/26-93 PP ...589******.***********************.*****************************.****876..
MGYP000624371167/1-68 MAKHPGADYHRM-AAEHHEKAALHHKKAAEYYEAGN-LKKAAIHAELAAVFHKQADEHVYNKQEEI-DVHH-----
#=GR MGYP000624371167/1-68 PP 799*********.***********************.*********************98877665.5566.....
MGYP000650157322/5-70 -----ATEHHRR-AAEHHEHSAKHHKAVADHHEAGN-HEKAGHHASVAEGHLNHASHHAEEASKHH-AADHGHK--
#=GR MGYP000650157322/5-70 PP .....579****.***********************.*****************************.9999765..
MGYP000246214200/7-73 ----KIAEHHAQ-AAQHHEKAAEHHKEAAKHYGTGA-VEKGAHHAQVAQGHAVHAEYHADEAAKAH-AEHHAGK--
#=GR MGYP000246214200/7-73 PP ....779*****.***********************.*****************************.****976..
MGYP000113479303/34-96 --NHKGIENHRK-AAKHHEEAAKHHHDAAKHHEAGN-HDKACESTVKAHGHHCLASDHMREVSKQH-A--------
#=GR MGYP000113479303/34-96 PP ..5*********.***********************.**********************9999875.5........
MGYP000187226991/3-69 ---KKAADHHKQ-AAEHHTHAAKHHTEAARHHESGN-HEKAAHHAHSSRAHASQADDHAEQAAKAH-MDEHGK---
#=GR MGYP000187226991/3-69 PP ...689******.***********************.***************************88.888865...
MGYP000381848663/3-69 ---KKAAEHHHK-ASEHHTHAARHHSEAAKHHEGGH-HEKAAHHAHTARAHALHSRHHSDEAAKMH-GEEHGK---
#=GR MGYP000381848663/3-69 PP ...699******.***********************.***************************99.999876...
MGYP000066325489/28-89 ----KTIANHKQ-AARHHMEAAKHHMEAARHHEEGN-HEKAAHSTLLAYGHHTIAGEFVSDDAKHH-AQ-------
#=GR MGYP000066325489/28-89 PP ....56678999.***********************.********************999999988.75.......
MGYP000013251582/4-69 ----EAANHHKQ-AAEHHEHAARHHHEAAKHHLAGN-HEKAAHHAHLAHGHHVHATEHAENAAKEH-VKAHGA---
#=GR MGYP000013251582/4-69 PP ....57889999.***********************.***************************99.888865...
MGYP000499794189/19-84 ---NDAAEHHRK-AAEHHEHAAAHHREAAEHHANGN-HEKAAHHAHIAHGHGLHAAHHAGEATKHH-ANTHG----
#=GR MGYP000499794189/19-84 PP ...5689*****.***********************.*****************************.*9986....
MGYP000555816272/4-69 -----EAAHHHKQAAEHHEHAARHHHEAAKHHEAGN-HEKAAHHAHLAHAHHVLAAEHAENAAKEH-LKAHGT---
#=GR MGYP000555816272/4-69 PP .....4555554399*********************.***************************99.888865...
MGYP000653248377/3-70 ---KKAAEHHKK-ASEHLTHAARHHGEAAKHHEAGS-HEKAAHHAHTARAHIIHGRGHAEEAVKAH-AEEHGKK--
#=GR MGYP000653248377/3-70 PP ...699******.***********************.*****************************.**99865..
MGYP000113511630/3-70 ---KKAAEHHRK-AAEHHKHAAGHHEEAAAHHDKGN-HEKAAHHAHVAHGHTLHAAHHAEEAAKAH-VEEHGSK--
#=GR MGYP000113511630/3-70 PP ...699******.***********************.***************************99.9999866..
MGYP001057101778/4-69 ---DKIIEHHRS-AADHHEKAAQHHREAAKHHASDS-HEKAAHHAHSAHGHSAHATHHAGEASKHH-AEHHG----
#=GR MGYP001057101778/4-69 PP ...5678*****.***********************.*****************************.****6....
MGYP000210824545/3-69 ---KKAAESHKK-ASEHLTHAARHHTEAAKHHETGQ-HEKAAHHAHIARAHATHAREHSENAAKAH-LEEHGK---
#=GR MGYP000210824545/3-69 PP ...689******.***********************.***************************99.999976...
MGYP000676742083/9-64 ------RDEHNK-AAEHHENAAKAHRSAAEHHGKGD-HAKGKQHADTAKQHSQTAHQHTDQAHS------------
#=GR MGYP000676742083/9-64 PP ......5789**.***********************.**********************99854............
MGYP000545010933/4-70 --KHPSTEHHTS-AAEEHDNASRHHRAAAKNYEEGK-HETAAHHAHSASGHSSNARDQAEEASRKH-AKQHG----
#=GR MGYP000545010933/4-70 PP ..58999*****.***********************.*************************9888.88775....
MGYP000541064880/3-68 -AEHNAAEHHGF-AAHHHQRAAQFHREASRHYEAGKDYAHAAHQALVAHGHALLAIDHGNEAGKYY-AG-------
#=GR MGYP000541064880/3-68 PP .789********.*********************963789***********************997.64.......
MGYP000541064880/99-161 ------SEHHAA-AADDHEQAAQHHAQAAKHLNEKD-YELAAHEAQLAHRHAHYSIFHDDEAAKHH-VEHYG----
#=GR MGYP000541064880/99-161 PP ......69****.***********************.**************999************.***86....
MGYP000729801087/3-52 ---KKVAEHHLK-AAEHLEHAARHHKEAAKHHEAGN-HEKAAHHAHIARAHHEHA---------------------
#=GR MGYP000729801087/3-52 PP ...5889*****.***********************.*****************7.....................
MGYP000715079888/40-96 -----SAEYHKK-AANCHYEAAKHHNIAAKHHEAGN-HKKASEYALKAYWYHCLASEAEKEDVK------------
#=GR MGYP000715079888/40-96 PP .....69*****.***********************.***************998876655555............
MGYP000033872322/3-43 ---KKAAEHHRK-AAEHHEHAARHHKEAAKHHDAGA-HEKAAHHAH------------------------------
#=GR MGYP000033872322/3-43 PP ...699******.***********************.*******96..............................
MGYP000464421157/4-69 ----EAAEHHKH-AAEHLTHAARHHSEAAKHHEAGQ-HEKAAHHAHLAHGHQEHASEHAVEAAKKH-IEAHGN---
#=GR MGYP000464421157/4-69 PP ....689*****.***********************.***************************99.999875...
#=GC PP_cons 7887889*****.***********************.**************************999.9998766..
#=GC RF xxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxx
//
Query query
Match_columns 73
No_of_seqs 55 out of 57
Neff 2.88591
Searched_HMMs 80799
Date Thu Dec 30 19:40:02 2021
Command /home/ga122/openfold/lib/conda/envs/openfold_venv/bin/hhsearch -i /tmp/tmpedq9nsbw/query.a3m -o /tmp/tmpedq9nsbw/output.hhr -maxseq 1000000 -d /data/ga122/alphafold/pdb70/pdb70
No Hit Prob E-value P-value Score SS Cols Query HMM Template HMM
1 1HF9_B ATPASE INHIBITOR (MITOC 7.5 3.8E+02 0.0047 16.2 0.0 22 7-28 10-31 (41)
2 2CRB_A nuclear receptor bindin 6.4 4.7E+02 0.0058 18.0 0.0 20 11-30 32-51 (97)
3 4ZEY_A nuclear receptor bindin 6.3 4.7E+02 0.0059 17.3 0.0 20 11-30 26-45 (84)
4 3U8V_A Metal-binding protein s 4.1 8.1E+02 0.01 17.3 0.0 32 15-46 50-81 (93)
5 1PSM_A SPAM-H1 (RESIDUES 90 - 1.9 2.1E+03 0.026 13.4 0.0 18 11-28 14-31 (38)
6 5KC1_F Autophagy-related prote 1.5 2.7E+03 0.033 16.9 0.0 17 12-28 25-41 (226)
7 5KC1_J Autophagy-related prote 1.5 2.7E+03 0.033 16.9 0.0 17 12-28 25-41 (226)
8 3ZEE_A PARTITIONING DEFECTIVE 1.1 3.8E+03 0.046 12.5 0.0 15 58-72 30-44 (84)
9 4I6P_A Partitioning defective 1.0 4.3E+03 0.054 12.4 0.0 16 57-72 32-47 (88)
10 2Q2K_A Hypothetical protein/DN 1.0 4.3E+03 0.054 13.4 0.0 17 56-72 54-70 (70)
No 1
>1HF9_B ATPASE INHIBITOR (MITOCHONDRIAL); ATPASE INHIBITOR, F1 ATPASE INHIBITOR; NMR {BOS TAURUS} SCOP: h.4.8.1
Probab=7.51 E-value=3.8e+02 Score=16.19 Aligned_cols=22 Identities=23% Similarity=0.398 Sum_probs=13.1 Template_Neff=4.500
Q query 7 AEHHHKAAEHHEQAAKHHHAAA 28 (73)
Q Consensus 7 aEhH~kAAeHHe~AA~HH~eAA 28 (73)
.++|++-++.|+.|.+-|++-.
T Consensus 10 I~~He~qIk~heeaI~RHk~~i 31 (41)
T 1HF9_B 10 ISHHAKEIERLQKEIERHKQSI 31 (41)
T ss_dssp HHHHHHHHHHHHHHHHHHHHHH
T ss_pred HHHHHHHHHHHHHHHHHHHHHH
Confidence 3456666666666666665543
No 2
>2CRB_A nuclear receptor binding factor 2; NRBF-2, MIT domain, helix bundle; NMR {Mus musculus} SCOP: a.7.16.1
Probab=6.35 E-value=4.7e+02 Score=18.01 Aligned_cols=20 Identities=20% Similarity=0.165 Sum_probs=11.0 Template_Neff=5.700
Q query 11 HKAAEHHEQAAKHHHAAAEH 30 (73)
Q Consensus 11 ~kAAeHHe~AA~HH~eAAkH 30 (73)
..|++-|.+|++...+|.+-
T Consensus 32 ~~Aie~H~kAA~~f~~A~~~ 51 (97)
T 2CRB_A 32 EEAISCHRKATTYLSEAMKL 51 (97)
T ss_dssp HHHHHHHHHHHHHHHHHHTT
T ss_pred HHHHHHHHHHHHHHHHHHHc
Confidence 34555556666555555543
No 3
>4ZEY_A nuclear receptor binding factor 2; Structural Genomics, Joint Center for; HET: SO4, MSE; 1.5A {Homo sapiens}
Probab=6.31 E-value=4.7e+02 Score=17.29 Aligned_cols=20 Identities=25% Similarity=0.200 Sum_probs=10.8 Template_Neff=6.200
Q query 11 HKAAEHHEQAAKHHHAAAEH 30 (73)
Q Consensus 11 ~kAAeHHe~AA~HH~eAAkH 30 (73)
..|++-|.+|+....+|.+-
T Consensus 26 ~~A~e~H~~AA~~f~~A~~~ 45 (84)
T 4ZEY_A 26 EEAISCHKKAAAYLSEAMKL 45 (84)
T ss_dssp HHHHHHHHHHHHHHHHHHTT
T ss_pred HHHHHHHHHHHHHHHHHHHh
Confidence 44555555555555555543
No 4
>3U8V_A Metal-binding protein smbP; four helical bundle, metal chaperone; 1.9A {Nitrosomonas europaea}
Probab=4.11 E-value=8.1e+02 Score=17.29 Aligned_cols=32 Identities=34% Similarity=0.433 Sum_probs=15.8 Template_Neff=4.400
Q query 15 EHHEQAAKHHHAAAEHHEKGEHEQAAHHADTA 46 (73)
Q Consensus 15 eHHe~AA~HH~eAAkHheaG~HekAahhAh~A 46 (73)
+|-..+.++-.+|.++-..|+-+.|..++-.|
T Consensus 50 ~H~~~aik~LeeAI~hgk~ghad~A~kha~~A 81 (93)
T 3U8V_A 50 THVGHGIKHLEDAIKHGEEGHVGVATKHAQEA 81 (93)
T ss_dssp CHHHHHHHHHHHHHHHHHTTCHHHHHHHHHHH
T ss_pred hHHHHHHHHHHHHHHHHHcCcHHHHHHHHHHH
Confidence 34444455555555555555555544444433
No 5
>1PSM_A SPAM-H1 (RESIDUES 90 - 127; POLYMORPHIC ANTIGEN; NMR {Plasmodium falciparum} SCOP: j.18.1.1
Probab=1.89 E-value=2.1e+03 Score=13.41 Aligned_cols=18 Identities=39% Similarity=0.433 Sum_probs=8.0 Template_Neff=1.300
Q query 11 HKAAEHHEQAAKHHHAAA 28 (73)
Q Consensus 11 ~kAAeHHe~AA~HH~eAA 28 (73)
.+|++--|+|++.=.+|+
T Consensus 14 e~aa~dae~a~k~ae~a~ 31 (38)
T 1PSM_A 14 EQAAKDAENASKEAEEAA 31 (38)
T ss_dssp HSTTTTTTHHHHHTTTTT
T ss_pred HHHHHHHHHHHHHHHHHH
Confidence 444444444444444443
No 6
>5KC1_F Autophagy-related protein 38; Atg38, coiled-coil, dimerization, NRBF2, autophagy; HET: NO3, NH4, EDO, NA; 2.2A {Saccharomyces cerevisiae}
Probab=1.52 E-value=2.7e+03 Score=16.87 Aligned_cols=17 Identities=12% Similarity=0.040 Sum_probs=0.0 Template_Neff=5.100
Q query 12 KAAEHHEQAAKHHHAAA 28 (73)
Q Consensus 12 kAAeHHe~AA~HH~eAA 28 (73)
.|++-|.+|++.-.+|.
T Consensus 25 eAie~h~kAAe~l~~a~ 41 (226)
T 5KC1_F 25 NAKAKYQEAIEVLGPQN 41 (226)
T ss_dssp -----------------
T ss_pred HHHHHHHHHHHHHHHHH
Confidence 34444444444444443
No 7
>5KC1_J Autophagy-related protein 38; Atg38, coiled-coil, dimerization, NRBF2, autophagy; HET: NA, NO3, EDO, NH4; 2.2A {Saccharomyces cerevisiae}
Probab=1.52 E-value=2.7e+03 Score=16.87 Aligned_cols=17 Identities=12% Similarity=0.040 Sum_probs=0.0 Template_Neff=5.100
Q query 12 KAAEHHEQAAKHHHAAA 28 (73)
Q Consensus 12 kAAeHHe~AA~HH~eAA 28 (73)
.|++-|.+|++.-.+|.
T Consensus 25 eAie~h~kAAe~l~~a~ 41 (226)
T 5KC1_J 25 NAKAKYQEAIEVLGPQN 41 (226)
T ss_dssp -----------------
T ss_pred HHHHHHHHHHHHHHHHH
Confidence 34444444444444443
No 8
>3ZEE_A PARTITIONING DEFECTIVE 3 HOMOLOG; CELL CYCLE; 6.1A {RATTUS NORVEGICUS}
Probab=1.14 E-value=3.8e+03 Score=12.49 Aligned_cols=15 Identities=27% Similarity=0.237 Sum_probs=7.4 Template_Neff=7.600
Q query 58 AQAAKHDAEHHAPKP 72 (73)
Q Consensus 58 ~eAak~ha~~H~~kp 72 (73)
.+|.+.|....+.+|
T Consensus 30 ~~a~~Ry~~~~~~~~ 44 (84)
T 3ZEE_A 30 QQAVTRYRKAVAKDP 44 (84)
T ss_dssp HHHHHHHHHHHCSSS
T ss_pred HHHHHHHHHHcCCCc
Confidence 455555555544433
No 9
>4I6P_A Partitioning defective 3 homolog; PB1 like motif, DUF3534, Cell; 2.9A {Rattus norvegicus}
Probab=1.01 E-value=4.3e+03 Score=12.37 Aligned_cols=16 Identities=25% Similarity=0.206 Sum_probs=0.0 Template_Neff=7.500
Q query 57 AAQAAKHDAEHHAPKP 72 (73)
Q Consensus 57 a~eAak~ha~~H~~kp 72 (73)
+.+|.+.|....+.+|
T Consensus 32 ~~~a~~Ry~~~~~~~~ 47 (88)
T 4I6P_A 32 IQQAVTRYRKAVAKDP 47 (88)
T ss_dssp HHHHHHHHHHHHCCCT
T ss_pred HHHHHHHHHHHcCCCc
No 10
>2Q2K_A Hypothetical protein/DNA Complex; protein-DNA, partition, segregation, parB, DNA; HET: EPE; 3.0A {Staphylococcus aureus}
Probab=1.01 E-value=4.3e+03 Score=13.41 Aligned_cols=17 Identities=24% Similarity=0.343 Sum_probs=0.0 Template_Neff=1.100
Q query 56 HAAQAAKHDAEHHAPKP 72 (73)
Q Consensus 56 Ha~eAak~ha~~H~~kp 72 (73)
|-.||-+.|.++-|..|
T Consensus 54 hireal~ryiee~g~~p 70 (70)
T 2Q2K_A 54 HIREALRRYIEEIGENP 70 (70)
T ss_dssp HHHHHHHHHHHHCCHHC
T ss_pred HHHHHHHHHHHHHCCCC
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Copyright 2021 AlQuraishi Laboratory
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pickle
import shutil
import torch
import numpy as np
import unittest
from openfold.data.data_pipeline import DataPipeline
from openfold.data.templates import TemplateHitFeaturizer
from openfold.model.embedders import (
InputEmbedder,
RecyclingEmbedder,
TemplateAngleEmbedder,
TemplatePairEmbedder,
)
import tests.compare_utils as compare_utils
if compare_utils.alphafold_is_installed():
alphafold = compare_utils.import_alphafold()
import jax
import haiku as hk
class TestDataPipeline(unittest.TestCase):
@compare_utils.skip_unless_alphafold_installed()
def test_fasta_compare(self):
# AlphaFold runs the alignments and feature processing at the same
# time, taking forever. As such, we precompute AlphaFold's features
# using scripts/generate_alphafold_feature_dict.py and the default
# databases.
with open("tests/test_data/alphafold_feature_dict.pickle", "rb") as fp:
alphafold_feature_dict = pickle.load(fp)
template_featurizer = TemplateHitFeaturizer(
mmcif_dir="tests/test_data/mmcifs",
max_template_date="2021-12-20",
max_hits=20,
kalign_binary_path=shutil.which("kalign"),
_zero_center_positions=False,
)
data_pipeline = DataPipeline(
template_featurizer=template_featurizer,
)
openfold_feature_dict = data_pipeline.process_fasta(
"tests/test_data/short.fasta",
"tests/test_data/alignments"
)
openfold_feature_dict["template_all_atom_masks"] = openfold_feature_dict["template_all_atom_mask"]
checked = []
# AlphaFold and OpenFold process their MSAs in slightly different
# orders, which we compensate for below.
m_a = alphafold_feature_dict["msa"]
m_o = openfold_feature_dict["msa"]
# The first row of both MSAs should be the same, no matter what
self.assertTrue(np.all(m_a[0, :] == m_o[0, :]))
# Each row of each MSA should appear exactly once somewhere in its
# counterpart
matching_rows = np.all((m_a[:, None, ...] == m_o[None, :, ...]), axis=-1)
self.assertTrue(
np.all(
np.sum(matching_rows, axis=-1) == 1
)
)
checked.append("msa")
# The corresponding rows of the deletion matrix should also be equal
matching_idx = np.argmax(matching_rows, axis=-1)
rearranged_o_dmi = openfold_feature_dict["deletion_matrix_int"]
rearranged_o_dmi = rearranged_o_dmi[matching_idx, :]
self.assertTrue(
np.all(
alphafold_feature_dict["deletion_matrix_int"] ==
rearranged_o_dmi
)
)
checked.append("deletion_matrix_int")
# Remaining features have to be precisely equal
for k, v in alphafold_feature_dict.items():
self.assertTrue(
k in checked or np.all(v == openfold_feature_dict[k])
)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment