Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
e7eadc44
Unverified
Commit
e7eadc44
authored
Jul 16, 2023
by
Dingquan Yu
Committed by
GitHub
Jul 16, 2023
Browse files
Merge pull request #4 from dingquanyu/multimer-dataloader
created Multimer dataloader and datamodule classes
parents
585136e4
dbc0b085
Changes
123
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
217 additions
and
4 deletions
+217
-4
tests/test_data/alignments/5kc1_L/uniref90_hits.sto
tests/test_data/alignments/5kc1_L/uniref90_hits.sto
+135
-0
tests/test_multimer_datamodule.py
tests/test_multimer_datamodule.py
+80
-0
tests/test_permutation.py
tests/test_permutation.py
+2
-4
No files found.
tests/test_data/alignments/5kc1_L/uniref90_hits.sto
0 → 100644
View file @
e7eadc44
# STOCKHOLM 1.0
#=GS UniRef90_Q05789/1-226 DE [subseq from] Autophagy-related protein 38 n=9 Tax=Saccharomyces TaxID=4930 RepID=ATG38_YEAST
#=GS UniRef90_A0A0L8REU8/1-163 DE [subseq from] Biogenesis of lysosome-related organelles complex 1 subunit KXD1 n=2 Tax=Saccharomyces TaxID=4930 RepID=A0A0L8REU8_SACEU
#=GS UniRef90_J4TVV6/1-140 DE [subseq from] YLR211C-like protein n=2 Tax=Saccharomyces TaxID=4930 RepID=J4TVV6_SACK1
#=GS UniRef90_A0A4C2E9F3/1-218 DE [subseq from] AIP3 domain-containing protein n=1 Tax=Zygosaccharomyces mellis TaxID=42258 RepID=A0A4C2E9F3_9SACH
#=GS UniRef90_A0A7H9B602/1-223 DE [subseq from] TPR_REGION domain-containing protein n=1 Tax=Zygotorulaspora mrakii TaxID=42260 RepID=A0A7H9B602_ZYGMR
#=GS UniRef90_A0A1Q3A3G1/1-218 DE [subseq from] TPR_REGION domain-containing protein n=3 Tax=Zygosaccharomyces rouxii TaxID=4956 RepID=A0A1Q3A3G1_ZYGRO
#=GS UniRef90_G8ZRG8/11-233 DE [subseq from] TPR_REGION domain-containing protein n=1 Tax=Torulaspora delbrueckii (strain ATCC 10662 / CBS 1146 / NBRC 0425 / NCYC 2629 / NRRL Y-866) TaxID=1076872 RepID=G8ZRG8_TORDC
#=GS UniRef90_A0A8H2VE82/1-77 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania barnettii TaxID=61262 RepID=A0A8H2VE82_9SACH
#=GS UniRef90_A0A8H2VE82/111-251 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania barnettii TaxID=61262 RepID=A0A8H2VE82_9SACH
#=GS UniRef90_A0A1S7HZI1/1-209 DE [subseq from] YLR211C n=3 Tax=Zygosaccharomyces TaxID=4953 RepID=A0A1S7HZI1_9SACH
#=GS UniRef90_G0VIQ7/1-229 DE [subseq from] Autophagy-related protein 28 n=1 Tax=Naumovozyma castellii (strain ATCC 76901 / BCRC 22586 / CBS 4309 / NBRC 1992 / NRRL Y-12630) TaxID=1064592 RepID=G0VIQ7_NAUCC
#=GS UniRef90_G8JWZ7/8-217 DE [subseq from] WGS project CCBQ000000000 data, contig 00015 n=1 Tax=Eremothecium cymbalariae (strain CBS 270.75 / DBVPG 7215 / KCTC 17166 / NRRL Y-17582) TaxID=931890 RepID=G8JWZ7_ERECY
#=GS UniRef90_J7RGP5/1-232 DE [subseq from] Conserved protein n=1 Tax=Kazachstania naganishii (strain ATCC MYA-139 / BCRC 22969 / CBS 8797 / KCTC 17520 / NBRC 10181 / NCYC 3082 / Yp74L-3) TaxID=1071383 RepID=J7RGP5_KAZNA
#=GS UniRef90_I2H5W4/1-218 DE [subseq from] Uncharacterized protein n=1 Tax=Tetrapisispora blattae (strain ATCC 34711 / CBS 6284 / DSM 70876 / NBRC 10599 / NRRL Y-10934 / UCD 77-7) TaxID=1071380 RepID=I2H5W4_TETBL
#=GS UniRef90_A0A1X7R3T5/1-77 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania saulgeensis TaxID=1789683 RepID=A0A1X7R3T5_9SACH
#=GS UniRef90_A0A1X7R3T5/113-251 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania saulgeensis TaxID=1789683 RepID=A0A1X7R3T5_9SACH
#=GS UniRef90_Q6FNV0/1-224 DE [subseq from] Autophagy-related protein 14 n=2 Tax=Candida glabrata TaxID=5478 RepID=Q6FNV0_CANGA
#=GS UniRef90_A0A7G3ZDQ2/21-153 DE [subseq from] Biogenesis of lysosome-related organelles complex 1 subunit KXD1 n=1 Tax=Torulaspora globosa TaxID=48254 RepID=A0A7G3ZDQ2_9SACH
#=GS UniRef90_G0W9C3/1-91 DE [subseq from] t-SNARE coiled-coil homology domain-containing protein n=1 Tax=Naumovozyma dairenensis (strain ATCC 10597 / BCRC 20456 / CBS 421 / NBRC 0211 / NRRL Y-12639) TaxID=1071378 RepID=G0W9C3_NAUDC
#=GS UniRef90_G0W9C3/135-265 DE [subseq from] t-SNARE coiled-coil homology domain-containing protein n=1 Tax=Naumovozyma dairenensis (strain ATCC 10597 / BCRC 20456 / CBS 421 / NBRC 0211 / NRRL Y-12639) TaxID=1071378 RepID=G0W9C3_NAUDC
#=GS UniRef90_W0TGX9/3-203 DE [subseq from] Uncharacterized protein YLR211C n=1 Tax=Kluyveromyces marxianus (strain DMKU3-1042 / BCC 29191 / NBRC 104275) TaxID=1003335 RepID=W0TGX9_KLUMD
#=GS UniRef90_A0A109UYX3/3-207 DE [subseq from] HDL254Cp n=1 Tax=Eremothecium sinecaudum TaxID=45286 RepID=A0A109UYX3_9SACH
#=GS UniRef90_A0A7H9HPF6/24-146 DE [subseq from] Biogenesis of lysosome-related organelles complex 1 subunit BLI1 n=1 Tax=Torulaspora sp. CBS 2947 TaxID=2792677 RepID=A0A7H9HPF6_9SACH
#=GS UniRef90_A0A0A8L4X0/3-203 DE [subseq from] WGS project CCBQ000000000 data, contig 00043 n=1 Tax=Kluyveromyces dobzhanskii CBS 2104 TaxID=1427455 RepID=A0A0A8L4X0_9SACH
#=GS UniRef90_Q6CT11/5-203 DE [subseq from] KLLA0C16291p n=2 Tax=Kluyveromyces lactis TaxID=28985 RepID=Q6CT11_KLULA
#=GS UniRef90_Q75A45/4-157 DE [subseq from] ADR074Cp n=2 Tax=Eremothecium TaxID=33170 RepID=Q75A45_ASHGO
#=GS UniRef90_A0A0P1KV75/3-79 DE [subseq from] LAQU0S08e01640g1_1 n=1 Tax=Lachancea quebecensis TaxID=1654605 RepID=A0A0P1KV75_9SACH
#=GS UniRef90_A0A0P1KV75/92-192 DE [subseq from] LAQU0S08e01640g1_1 n=1 Tax=Lachancea quebecensis TaxID=1654605 RepID=A0A0P1KV75_9SACH
#=GS UniRef90_A7TPA7/1-172 DE [subseq from] Protein of centriole 5 n=1 Tax=Vanderwaltozyma polyspora (strain ATCC 22028 / DSM 70294 / BCRC 21397 / CBS 2163 / NBRC 10782 / NRRL Y-8283 / UCD 57-17) TaxID=436907 RepID=A7TPA7_VANPO
#=GS UniRef90_G8BYP4/3-115 DE [subseq from] MIT domain-containing protein n=1 Tax=Tetrapisispora phaffii (strain ATCC 24235 / CBS 4417 / NBRC 1672 / NRRL Y-8282 / UCD 70-5) TaxID=1071381 RepID=G8BYP4_TETPH
#=GS UniRef90_G8BYP4/107-262 DE [subseq from] MIT domain-containing protein n=1 Tax=Tetrapisispora phaffii (strain ATCC 24235 / CBS 4417 / NBRC 1672 / NRRL Y-8282 / UCD 70-5) TaxID=1071381 RepID=G8BYP4_TETPH
#=GS UniRef90_H2B2A0/1-223 DE [subseq from] MIT domain-containing protein n=1 Tax=Kazachstania africana (strain ATCC 22294 / BCRC 22015 / CBS 2517 / CECT 1963 / NBRC 1671 / NRRL Y-8276) TaxID=1071382 RepID=H2B2A0_KAZAF
#=GS UniRef90_C5E235/17-63 DE [subseq from] KLTH0H01826p n=1 Tax=Lachancea thermotolerans (strain ATCC 56472 / CBS 6340 / NRRL Y-8284) TaxID=559295 RepID=C5E235_LACTC
#=GS UniRef90_C5E235/77-177 DE [subseq from] KLTH0H01826p n=1 Tax=Lachancea thermotolerans (strain ATCC 56472 / CBS 6340 / NRRL Y-8284) TaxID=559295 RepID=C5E235_LACTC
#=GS UniRef90_A0A1G4K626/3-95 DE [subseq from] LAFA_0G22980g1_1 n=1 Tax=Lachancea sp. CBS 6924 TaxID=433476 RepID=A0A1G4K626_9SACH
#=GS UniRef90_A0A1G4K626/86-193 DE [subseq from] LAFA_0G22980g1_1 n=1 Tax=Lachancea sp. CBS 6924 TaxID=433476 RepID=A0A1G4K626_9SACH
#=GS UniRef90_A0A0C7MRN9/1-74 DE [subseq from] LALA0S05e08306g1_1 n=1 Tax=Lachancea lanzarotensis TaxID=1245769 RepID=A0A0C7MRN9_9SACH
#=GS UniRef90_A0A0C7MRN9/136-192 DE [subseq from] LALA0S05e08306g1_1 n=1 Tax=Lachancea lanzarotensis TaxID=1245769 RepID=A0A0C7MRN9_9SACH
#=GS UniRef90_A0A1G4JQ11/94-179 DE [subseq from] LANO_0E01750g1_1 n=1 Tax=Lachancea nothofagi CBS 11611 TaxID=1266666 RepID=A0A1G4JQ11_9SACH
#=GS UniRef90_A0A1G4JSN1/17-210 DE [subseq from] LAMI_0E15368g1_1 n=1 Tax=Lachancea mirantina TaxID=1230905 RepID=A0A1G4JSN1_9SACH
#=GS UniRef90_A0A061AL66/205-266 DE [subseq from] CYFA0S01e18734g1_1 n=2 Tax=Cyberlindnera fabianii TaxID=36022 RepID=A0A061AL66_CYBFA
#=GS UniRef90_A0A642V060/155-278 DE [subseq from] Protein phosphatase 1 regulatory subunit 12B n=1 Tax=Trichomonascus ciferrii TaxID=44093 RepID=A0A642V060_9ASCO
query MSTLAEVYTIIEDAEQECRKGDFTNAKAKYQEAIEVLGP--QNEN---LS---QN--KLSSDVTQAIDLLKQDITAKIQELELL---I-----EKQSS-EE-NN--I-G-MVNNN---M--LIGSVILNN--KSPIN-GIS--NA-RN-WD-N-P-AY--Q-D-T----LSPI----NDPLLMSIL-NRL--QFNL---N
UniRef90_Q05789/1-226 MSTLAEVYTIIEDAEQECRKGDFTNAKAKYQEAIEVLGP--QNEN---LS---QN--KLSSDVTQAIDLLKQDITAKIQELELL---I-----EKQSS-EE-NN--I-G-MVNNN---M--LIGSVILNN--KSPIN-GIS--NA-RN-WD-N-P-AY--Q-D-T----LSPI----NDPLLMSIL-NRL--QFNL---N
UniRef90_A0A0L8REU8/1-163 -------------------------------------------------------------------------MTAKIQELELL---I-----DKRSP-EE-NS--I-G-MLNNN---M--LIGSVILNN--KTSIN-GVG--NT-RN-WD-N-S-IY--Q-N-S----LNPL----NDPILISIL-NRL--QFNL---N
UniRef90_J4TVV6/1-140 --------------------------------------------------------------------------------------------------------------MVKNN---M--LIGTVILNN--KASIN-GVG--NT-RN-WD-N-S-VY--Q-D-T----LNPI----NDPVLISIL-NRL--QSNL---N
UniRef90_A0A4C2E9F3/1-218 MSQLIEVYTCIDDAENKTRKGNLTESLSCYKRAMELLN------G---IG---CQ--GVSAEIIHAIQLLRQDIDARIKELESL---I-----EDQKP-VSTTA--V-G-AVTKN--GS--LTNSTISN-----------A--KT-RN-WD-N-P-RSLGS-S-V----MGPS----GDPLLASIF-GKL--QVNL---V
UniRef90_A0A7H9B602/1-223 MKKLAEVYSNIDGAEQQSRKGDYLGAIKEYKKALDILDR--SGKS---SE---EQDVGLSHEVTRALDLLQDDIQAKIRELESL---V-----EVQRP-EE-SK---------NS---S--TVGSLW-NA--SSMSNQTVV--KT-RT-LE-G---SL--N-G-T----MGLM----MDPLLVSLI-NKL--QVNF---I
UniRef90_A0A1Q3A3G1/1-218 MSQLIEVYNCIDGAENKTRKGDLSESLSSYKHAIELLN------G---LE---CQ--GVSLEIIHAIQLLRQDIDARTKELESL---I-----EGQKP-IS-A---A-A-VAAAV---A--KNGSLA------SSTG-STA--KT-RG-WD-S-P-RN--L-N-S---PAMGP----GDPLLASIF-GKL--QANL---V
UniRef90_G8ZRG8/11-233 MTSLGEAYKYIDGAEQESRNGNLSEALEKYRHALDNLIV--KEEG---AE---EV--RLGPEVVEAIKLLRQDINDRVRDIEAL---V-----ELQRP-T----------TARST---S--LSGTILTNL--NSSSQ-NVA--SV-RP-WD-V-V-RG--G-N-P----AAGM---QVDPLLLKIL-NKL--QNEI---I
UniRef90_A0A8H2VE82/1-77 MSTLCNVYKLIETAEQETKRGNLQNSIIYYKETLKEINE--ITDN---IE---ES--GLSNNVIEAVQLLRKDVSQTIYDIQNV---L-----HV---------------------------------------------------------------------------------------------------------
UniRef90_A0A8H2VE82/111-251 ------------------------------------------------------------------------------------------------------N-------GINQN---F---LGSVYLRM--NPSVM-QPG--GA-RM-WE-N-I-IS--N-D-K----LIPN----NDPLFLGII-NKL--QSNV---I
UniRef90_A0A1S7HZI1/1-209 MSQLIDVYNSINTAEKQTRNGDLPEALKQYKHALALLA------N---LK---CQ--GTSSEIVHALELLRQDIDSRIKELESL---Q-----ERRNP-MT-PK--N-G-AVRNS---S------V------------SLA--KT-RS-WD-N-S-RN--MGS-G----LGSS----SDPLLISIL-GKL--QSNL---I
UniRef90_G0VIQ7/1-229 MSSLKEVYDLISQAELETRNGNLNVSINKYKNALSKTNC--LLKM---LK---SE--DVENDVTDAILMLRKDISKTIFELEDL---V-----SKQRP--D-SK--V-G-TVKNP---T--MLSSLAINP--SMNVL-GF----S-KT-WD-S-T-VN--T-N-TNLERINPY----NDPILRSIT-DKL--QTNL---L
UniRef90_G8JWZ7/8-217 --TSVQIYEHIEIAEQHCIERNYQNASKEYEHVLEQLES--LVKD---LA--------LNGDLKRAIMLLKEDIELKVKELEQW---D-----QRKQP-TP-A---------NTT---Q-----GLSTNN--KSS--------PG-RV-ID-M-N-MH--L-N-N----ANPI----TDPFLASII-NKL--QTNI---L
UniRef90_J7RGP5/1-232 MSHLRQTYQLIENAEQQIKRGNLNESLKYYRQSVNEINK--VILR---LNSEHPD--EVNDEVIESIEILKRDVSQTMFDLENF---I-----KAQRA-VS-KGSTVKN-SINMN------MMGSMLLSI--KPSMN-NVN--RP-SP-TSEQ-S-GD--G-E-G----NNFV----SDPILTGIL-NKL--QTNLFALT
UniRef90_I2H5W4/1-218 MSDLNEIYDLIRNAEQLTQRNDLLGALKKYREVETTIGSCKRKQK---LQ---GN-SDLDESVIEAIELLQEDISLRIREIESL---T-----GNQR------P--I-S-VGNNS---K--AM--SLLNS--WLPNN-NNS--II-NG-ID---------F-N-K----SSMV----TDPLLISII-DKL--KINI---L
UniRef90_A0A1X7R3T5/1-77 MPTLCNVYKLIETAEQETKRGNLQNSIIYYKETLKEINE--ITDK---IE---ES--GLSKNVIEAVQLLRKDISQTIYDIQNV---L-----PT---------------------------------------------------------------------------------------------------------
UniRef90_A0A1X7R3T5/113-251 ---------------------------------------------------------------------------------------------------------------INQN---F---LGSVYLRM--NPSVM-QPG--GA-KM-WE-T-S-MN--N-D-K----LIPN----NDPLFLGIV-NKL--QSNI---I
UniRef90_Q6FNV0/1-224 MSDLNTVYEHIRRAEQKCRSGDLSDALDLYVSALEGLDL--KSPN---FD---LQ--GLDDTIIEAVKLLRDDIQLRIKELQIC---V-----IESSK-DE-KD--S-----KNN---K--ASMALVQSS--AGNNN-NLA--AS-KY-WD-S-T-RN-IT-DIT----SSTF----IDPYTSSML-SKL--QNSM---V
UniRef90_A0A7G3ZDQ2/21-153 ----------------------------------------------------------------------------------------------------------------------------STLLGM--NSSMQ-PMS--KT-RS-WD-G-GRSG--N-D-T----TGLQ----TEPFLGRIL-NKL--QSNL---T
UniRef90_G0W9C3/1-91 MTTLNDIYELIDDAEEESRKGNITNSVRKYKETVKSLEE--LIKS---TK---RE--DINNEILVALDMLKKDISKTICELDKL---LT---IQRQQQQQQ-QH--N-G-QV----------------------------------------------------------------------------------------
UniRef90_G0W9C3/135-265 -------------------------------------------------------------------------------------------------------------------------------NYN--NAMTN-SMN--NA-NVERI-N-P-FN--D-V-V----IRTI----TDNLKMNLL-ESIKGQYHL---L
UniRef90_W0TGX9/3-203 -SELNNVFECIEDAESFIKSNKVVVAIQEYRKAIKQLDY--INET---------E--DLPENVQYAVTLLHDDILLRVKELGVL---Q-----EVQSN-SE-SS--E-S-GS--N---S--SISRFV-------------S--DG-SL-YP-N-G-----N-S-------VLI----SDPLLLSIT-SKL--ENNV---M
UniRef90_A0A109UYX3/3-207 --DLINVYAHIEAGEQYSRDSNYLGTVKEYNKALEKILQ--LEDS---VE---VN--E---GLKDAIGLLKQDLLVKIKELQHL---Q-----QKRPN-NA-AN--S-A-TVTAV---M--NSGTV--------RVQ-GSV--NE----GG-N-----------S----I-SI----SDPFLASIV-NKL--HMNI---L
UniRef90_A0A7H9HPF6/24-146 ------------------------------------------------------------------------------------------------------------------------------------NSSMQ-GMS--KT-RS-WD-G-R-NG--N-E-T----SGLQ----MEPFLGRIL-SKL--QSNL---T
UniRef90_A0A0A8L4X0/3-203 -SELNNVYETIDDAESYIKSNKISPAIEEFEKASKQLDI--IN------S---ME--SLPGNIHNAIVLLREDIDIRVKELGML---Q-----EAQST-SE-SS------EVGSN---S--SLSRFVTDG--SLYHN-G-----------------------N-S----LS-L----ADPLLLSIT-SKL--ENN-----
UniRef90_Q6CT11/5-203 ---LNNVYETIDDAESYIKRNKLSRAIELFENASKQLDA--IN------S---ME--SLPENIHNAIVLLREDIDARVKELGML---Q-----EAQST-SE-SS------EIGSN---S--SLSRFMMDG--SFYHN-G-------------N-S-L--------------FL----TDPLLSSIT-SKL--ENNV---I
UniRef90_Q75A45/4-157 ---------------------------------------------------------------------LREDIQIKIKELEQW---E-----LRQPS-AG-LG--A-G-LSPNR--------------K--NSPVR-GPE--AT-----------SH--L-S-N----GVPI----ADPFLASII-NKL--QTNI---L
UniRef90_A0A0P1KV75/3-79 --DLTEVYKLIESAEVQLRDRKFSESIETYRKSVEAIDK--LETQ---HE---SV--AVVGDVEQALTLLKKDLDRKIQEIKVL---G-----K-LSA------------------------------------------------------------------------------------------------------
UniRef90_A0A0P1KV75/92-192 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------QEPLMTYRL-SGF--QNNI---L
UniRef90_A7TPA7/1-172 --------------------------------------------------------------------MLKSDINLRVRELDTL---I-----TLKSS--S-NG--L-K-ASRNL---P--MMSSVILNN--NSSTN-PAE--SL----WN-N-N-NC--G-A---------N----LDPFLNSML-NKF--QHNL---E
UniRef90_G8BYP4/3-115 --ELLKVYELINNAERESQKGLYAKARSIYEEILDYILD--DNRNAITLD---LQ--KVGSKVGEAVELLVEDVKLRIRELDTL---IGIRNLQKPSS-EH-KE--L-NNSYNNN---S--LQKDGENSN--ITPLN---------------------------------------------------------------
UniRef90_G8BYP4/107-262 ---------------------------------------------------------------------------------------------------EN-SN--I-T-PLNKRKQYN--MLNSVILNN--GSIIN-PGE--SI----WN-I-N---------S----NKLI----ADPSLISIF-NKF--QSNL---T
UniRef90_H2B2A0/1-223 MITLKDIYILIENAEQQSRKGKVKKAISIYKVACKEIDQ--LLGS---IE---DD--GVDQDIITAVALLKRNIMQTVRDLENF---L-----RRTNF-DF-LN--S-D-SIANN---TPPSTANTIPSNLRNDNLN-SMF--QSTKL-MT-NSL-AM--W-E-Q----DQPIDNFSSDPFLITLL-NRL--QNTL---L
UniRef90_C5E235/17-63 ----------------------------KSAEAINQLEI--QHKS---IA---N-----VGDLKQALNLLKKDLNRKIREIQIL---G------KFS-------------------------------------------------------------------------------------------------------
UniRef90_C5E235/77-177 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HEPMAAYRL-SGF--QNNI---L
UniRef90_A0A1G4K626/3-95 --ELKDIYELIANAELKVREHNFEESINVYLETVKLIEE-FESKN---KG---FD--Q-VEDVKTAIELLKIDIESKVWELQQLNLRA-----KTPAA-KP-DN--V-A-AVKNP---M---------------------------------------------------------------------------------
UniRef90_A0A1G4K626/86-193 ------------------------------------------------------------------------------------------------------------------------------------------------------------------D-N----VAAV----KNPMTTTESPQTF--VENV---L
UniRef90_A0A0C7MRN9/1-74 MSELKDIYELIANAELKVREHNFEESINVYQETVKLIEE--LESK---NE---GF--KQIEDVKTAIDLLKIDIEGKISELQQL--------------------------------------------------------------------------------------------------------------------
UniRef90_A0A0C7MRN9/136-192 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A1G4JQ11/94-179 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NI---L
UniRef90_A0A1G4JSN1/17-210 ----------LEAAQLLLRDRRYDKATIMYERALKSTQT--LQEI---SE---KE--QDSEHVRQALSLVGDDIQNRVKELKLL---------E-QAA-QE-----V-A-QDSQN---Q--S-------D--RSEIT-AMTALRE-KS-WT-D-S-NF--A----------TV----IETYRTSLL-NLL--KVQL---E
UniRef90_A0A061AL66/205-266 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A642V060/155-278 ---------------------------------------------------------------------------------------------------------------------------------S--KYPIG-SMS--SA-GT-SE-S-FYVV--P-N-T----SSLT----YEELITENA-SLR--QLIN---K
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx..xxxx...xx...xx..xxxxxxxxxxxxxxxxxxxxxxxxxxx...x.....xxxxx.xx.xx..x.x.xxxxx...x..xxxxxxxxx..xxxxx.xxx..xx.xx.xx.x.x.xx..x.x.x....xxxx....xxxxxxxxx.xxx..xxxx...x
query N----D-I--QLKT----E--G-GKN----S--K-N-S-E-----------M-----K------INL-RLEQFKKELVLYEQKKFKEYGMKIDEITKENKKLANEIGRLRERWDSLVESAKQRRDKQKN
UniRef90_Q05789/1-226 N----D-I--QLKT----E--G-GKN----S--K-N-S-E-----------M-----K------INL-RLEQFKKELVLYEQKKFKEYGMKIDEITKENKKLANEIGRLRERWDSLVESAKQRRDKQKN
UniRef90_A0A0L8REU8/1-163 N----D-M--ESKV----K--G-VKN----P--N-N-L-E-----------M-----K------TNL-RLEQFKKELVLYEQNKFKEYGMKIDQIAKENKKLSNEIGRLRERWDSLVESAKQRRDKQRN
UniRef90_J4TVV6/1-140 N----D-I--QLKV----E--G-EKI----P--K-N-S-E-----------M-----K------INL-RLEQFMKELVLYEEKKFKEYGIKIDQVTKENKKLSNEIGRLRERWDSLVESAKQRRDKQQN
UniRef90_A0A4C2E9F3/1-218 N----S-VRNQFKE----E--D-PHV----V--N-E-L-D-----------N-----N------VRQ-QLVRFKKELGLYEQKKSKDYNIRLEQVINENKKLSNQILKLRGRWDSLVESAKQRRSRQ--
UniRef90_A0A7H9B602/1-223 N----A-VSEELKR----T--E-SHD----I--Q-S-I-E-----------S-----Q------VKQ-QIGRFKKELGMYEQKKIKEYNLRLDQAIKENKKLSNQIVKLRERWDSLVESAKQRRNKKQ-
UniRef90_A0A1Q3A3G1/1-218 N----S-ICEQFKE----E--D-PHI----V--G-E-V-E-----------N-----R------VRQ-QLVRFKKELGLYEQKKSKDYSVRFEQAINENKKLSNQILKLRGRWDSLVESAKQRRSRQ--
UniRef90_G8ZRG8/11-233 A----K-L--EEKIV--EK--D-YHK----G--Q-S-I-E-----------S-----I------VNQ-SLMQFGKDLAIYEQKNYKEFNARLEKANNENKKLSNQIVKLRERWDSLVESAKQRRTRQQ-
UniRef90_A0A8H2VE82/1-77 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A8H2VE82/111-251 L----S-V--SNQTKDIQQ--N-EEG----T--Q-S-I-E-----------E-----K------IQQ-HVEQFRKEVSWYEQKKYEEYENKIQEIERENRKLNLQVDRLKQRWDSLVESAKERKKK---
UniRef90_A0A1S7HZI1/1-209 N----S-I--SEQC---KE--D-PQV----L--K-E-L-D-----------G-----R------IAQ-QFSQFRKELALYEQKKSKDYNARLEQAIAENKKLSNQILKLRGRWDSLVESAKQRRSRQ--
UniRef90_G0VIQ7/1-229 NLVSDN-V--QVFQ----K--G-DKE----EFLQ-T-I-T-----------F-----A------VEQ-NFDIFRKELGFYEQKKFTEYDSNLENALKENKKLTNQISKLKERWDSLVESARQKKK----
UniRef90_G8JWZ7/8-217 Q----V-L--TQQF----A--GEAKT----A--G-Q-K-E-----------VS---CL------VTQ-QIAQFQKEIAIFEQRKFREYDTKMDQLIKENKKLSNQVVRLKDRWDSLVESAKQKRNQQE-
UniRef90_J7RGP5/1-232 A----D-T--KGVG----V--D-GRG----T--K-N-A-N-----------L-----E------VSH-HIEQFKRELSWYEQKKFSEYDSRLERTRKENRKLLQEVEKLKDRWNNLVESAKQRRNR---
UniRef90_I2H5W4/1-218 M----K-L--NDEL----E--G-EKT----E--A-G-SRE-----------F-----N------ITQ-QFNQFNKELLMYEQKKFNEYNLNLEQLAKENRKLSKQIIKLKERWDSLVASAKEKRNRQ--
UniRef90_A0A1X7R3T5/1-77 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A1X7R3T5/113-251 M----S-T--SNQF----K--N-GQHTVRGT--Q-A-I-E-----------E-----E------IQQ-HVEQFRKEVSWYEQKRFEEYENRMKELEMENKKLNLQVDRLKQRWDSLVESAKERKKK---
UniRef90_Q6FNV0/1-224 -----D-L--IKEA----K--N-QKT----D--V-N-D-L-----------V-----S------TALFQIDQFKKEMLIYEQRRTREYQIKVEHLNKDVKKLSSQNSKLKERWDSLVESARQRKNRQQ-
UniRef90_A0A7G3ZDQ2/21-153 L----R-IEERMKD----K--D-GKN----G--A-S-V-E-----------A-----I------VNQ-QLVQFRKDLAVYEQRKVREYTSRLEQATKENRKLSNQIVRLRERWDSLVESAKQRRIRQQ-
UniRef90_G0W9C3/1-91 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_G0W9C3/135-265 N----GNAKDERKN----D--E-DEF----I--K-Q-L-N-----------T-----T------FES-QFDMFRKELGFYEQKKFSEYDTNLDNLIKENKKLLNQIVKLRERWDSLVESAKQRRNK---
UniRef90_W0TGX9/3-203 R----L-I--NASE----D--P-GSV----S--K-T-E--------------------------IMQ-QFSQFKRELTVYEQKKSKDYEGKMEQVIKENKKLSNQVNRLKERWDSLVESAKQKRNQQ--
UniRef90_A0A109UYX3/3-207 Q----S-L--SQLS----G--I-QVD----K--T-E-L-E-----------Q-----L------LMY-QIKNLEKEIALFEQRKFREYDSKMEQLIKENKRLSNQVLRLKDRWDSLVESARQKRNQQ--
UniRef90_A0A7H9HPF6/24-146 T----R-I--EERI----K----DKD----G--V-S-V-E-----------S-----I------VNQ-QLVQFRKDLAVYEQRKIREYTSRLEQANKENRKLSHQIVRLRERWDSLVESAKQRRIRQQ-
UniRef90_A0A0A8L4X0/3-203 -----I-V--RLIT----S--T--QT----D--K-V-N-K-----------T-----D------VVQ-QFAQYRRELTMYEQKKSKDYEARLEQIMKENKKLLNQVNRLKDRWDSLVESAKQKRNQQQ-
UniRef90_Q6CT11/5-203 R----S-I--TSKQ-------T-DKV----I--K-N---------------------E------VAQ-QFAQFRRELSVYEQKKSRDYEAKSEQVMKENKKLLNQVNRLKERWDSLVESAKQKRNQQQ-
UniRef90_Q75A45/4-157 Q----T-L--SQRL----A--GEGKP----M-GKQE-L-E-----------A-----V------VSP-QMTQFQKEMTVFEQRKFREYDSKMDQLLKENRKLSNQVIRLKDRWDSLVESAKQKRNQQE-
UniRef90_A0A0P1KV75/3-79 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A0P1KV75/92-192 A----I-V--RGKT----DPKS-GVS----A--S-E-I-E-----------N-----S------ISK-EVSQLFKSFAFIDQQKFKEYDNKVEQLVRENRKLTGQIAKLKERWDSLVESARQKRNQQN-
UniRef90_A7TPA7/1-172 L----S-L--MDTL----K--S-GSN----S--S-NNN-DKKVNHNVQDVSL-----K------ISE-QMSQFKKELRIYEQQKCKEFQLRMEQALAENKKLSNQIVKLRERWDSLVESAKQKRNRQR-
UniRef90_G8BYP4/3-115 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_G8BYP4/107-262 K----S-LSDQLLN----S--N-EMN----D--S-N-S-KT--------TSIVAQQIKYIVAKAIED-ELSSFEKELCVYENKKCKEYQIKLNRSSEENKRLNKQIMKLRERWDGLVESAKQKKLR---
UniRef90_H2B2A0/1-223 -----D-I---------------TKT----S--D-H-V-E-----------E-----D------VSQ-QFQLFKRDLIWYEQKKFSDFNNHIKKMNDEKKKLENQITRQKELWEGLVENVKAK------
UniRef90_C5E235/17-63 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_C5E235/77-177 A----I-V--RGKT----DPKS-GVS----A--S-E-I-E-----------N-----S------ISK-EVSQLFKSFALIDQQKFKEYDNKVEQLVRENRKLTGQIVKLKERWDSLVESARQKRNQQN-
UniRef90_A0A1G4K626/3-95 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A1G4K626/86-193 N----L-V--RSKT----ELKP-GAS----V--H-D-L-E-----------T-----G------IAA-ETTQLLRGLSWVDQQRSKEYEARIEELCTENKQLTTQIHKLKERWDSLVESARQKRNQQN-
UniRef90_A0A0C7MRN9/1-74 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A0C7MRN9/136-192 ----------------------------------------------------------------------AQLLQGLSWVGQQRSKEYEAKIEQMCAENKQLTTQIHKLKERWESLVESARQKRNQQ--
UniRef90_A0A1G4JQ11/94-179 N----A-V--RAKT----DLKP-GIS----L--Y-E-L-E-----------N-----S------VSR-ELSQLLQGVSLVDQQKFKEYEFKIEQLHRENKQLTSQINKLKERWDSLVESARLKRNLQ--
UniRef90_A0A1G4JSN1/17-210 T----D-S--RFST-----------------------L-E-----------D-----I------VNQ-NVISLLSDMGMLEQRRVAQYEAKVEHLARENKKMSNQIVKLKERWGSLVESARQKRKQEK-
UniRef90_A0A061AL66/205-266 -----------------------------------------------------------------KH-YLSTLKREINQQESQIKKDYENKIEQLYKDNKRLEQQVGSLKSRWDALVESAKKRREDQK-
UniRef90_A0A642V060/155-278 T----S-I--QLQA----H--E-IAS----R--K-Q-K-D-----------------A------IKN-GLIQLKNELTAKENARNKEHDAELEKLKGENDKLKIQIGRLKSRWDELKESARKRREDE--
#=GC RF x....x.x..xxxx....x..x.xxx....x..x.x.x.x...........x.....x......xxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
tests/test_multimer_datamodule.py
0 → 100644
View file @
e7eadc44
# Copyright 2021 AlQuraishi Laboratory
# Dingquan Yu @ EMBL-Hamburg Kosinski group
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
pathlib
import
Path
import
shutil
import
pickle
import
torch
import
torch.nn
as
nn
import
numpy
as
np
from
functools
import
partial
import
unittest
from
openfold.utils.tensor_utils
import
tensor_tree_map
from
openfold.config
import
model_config
from
openfold.data.data_modules
import
OpenFoldMultimerDataModule
,
OpenFoldDataModule
from
openfold.model.model
import
AlphaFold
from
openfold.utils.loss
import
AlphaFoldMultimerLoss
from
tests.config
import
consts
import
logging
logger
=
logging
.
getLogger
(
__name__
)
import
os
class
TestMultimerDataModule
(
unittest
.
TestCase
):
def
setUp
(
self
):
"""
Set up model config
use model_1_multimer_v3 for now
"""
self
.
config
=
model_config
(
"model_1_multimer_v3"
,
train
=
True
,
low_prec
=
True
)
self
.
data_module
=
OpenFoldMultimerDataModule
(
config
=
self
.
config
.
data
,
batch_seed
=
42
,
train_epoch_len
=
100
,
template_mmcif_dir
=
"/g/alphafold/AlphaFold_DBs/2.3.0/pdb_mmcif/mmcif_files/"
,
template_release_dates_cache_path
=
os
.
path
.
join
(
os
.
getcwd
(),
"tests/test_data/mmcif_cache.json"
),
max_template_date
=
"2500-01-01"
,
train_data_dir
=
os
.
path
.
join
(
os
.
getcwd
(),
"tests/test_data/mmcifs"
),
train_alignment_dir
=
os
.
path
.
join
(
os
.
getcwd
(),
"tests/test_data/alignments/"
),
kalign_binary_path
=
shutil
.
which
(
'kalign'
),
train_mmcif_data_cache_path
=
os
.
path
.
join
(
os
.
getcwd
(),
"tests/test_data/train_mmcifs_cache.json"
),
train_chain_data_cache_path
=
os
.
path
.
join
(
os
.
getcwd
(),
"tests/test_data/train_chain_data_cache.json"
),
)
# setup model
self
.
c
=
model_config
(
consts
.
model
,
train
=
True
)
self
.
c
.
loss
.
masked_msa
.
num_classes
=
22
# somehow need overwrite this part in multimer loss config
self
.
c
.
model
.
evoformer_stack
.
no_blocks
=
4
# no need to go overboard here
self
.
c
.
model
.
evoformer_stack
.
blocks_per_ckpt
=
None
# don't want to set up
# deepspeed for this test
self
.
model
=
AlphaFold
(
self
.
c
)
self
.
multimer_loss
=
AlphaFoldMultimerLoss
(
self
.
c
.
loss
)
def
testPrepareData
(
self
):
self
.
data_module
.
prepare_data
()
self
.
data_module
.
setup
()
train_dataset
=
self
.
data_module
.
train_dataset
all_chain_features
,
ground_truth
=
train_dataset
[
1
]
add_batch_size_dimension
=
lambda
t
:
(
t
.
unsqueeze
(
0
)
)
all_chain_features
=
tensor_tree_map
(
add_batch_size_dimension
,
all_chain_features
)
with
torch
.
no_grad
():
out
=
self
.
model
(
all_chain_features
)
self
.
multimer_loss
(
out
,(
all_chain_features
,
ground_truth
))
\ No newline at end of file
tests/test_permutation.py
View file @
e7eadc44
# Copyright 2021 AlQuraishi Laboratory
#
#
Dingquan Yu @ EMBL-Hamburg Kosinski group
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
...
...
@@ -27,14 +27,12 @@ from tests.config import consts
import
logging
logger
=
logging
.
getLogger
(
__name__
)
import
os
import
io
,
contextlib
from
tests.data_utils
import
(
random_template_feats
,
random_extra_msa_feats
,
random_affines_vector
,
random_affines_4x4
random_affines_vector
)
from
openfold.utils.rigid_utils
import
(
Rotation
,
Rigid
,
)
...
...
Prev
1
…
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment