Unverified Commit e7eadc44 authored by Dingquan Yu's avatar Dingquan Yu Committed by GitHub
Browse files

Merge pull request #4 from dingquanyu/multimer-dataloader

created Multimer dataloader and datamodule classes
parents 585136e4 dbc0b085
# STOCKHOLM 1.0
#=GS UniRef90_Q05789/1-226 DE [subseq from] Autophagy-related protein 38 n=9 Tax=Saccharomyces TaxID=4930 RepID=ATG38_YEAST
#=GS UniRef90_A0A0L8REU8/1-163 DE [subseq from] Biogenesis of lysosome-related organelles complex 1 subunit KXD1 n=2 Tax=Saccharomyces TaxID=4930 RepID=A0A0L8REU8_SACEU
#=GS UniRef90_J4TVV6/1-140 DE [subseq from] YLR211C-like protein n=2 Tax=Saccharomyces TaxID=4930 RepID=J4TVV6_SACK1
#=GS UniRef90_A0A4C2E9F3/1-218 DE [subseq from] AIP3 domain-containing protein n=1 Tax=Zygosaccharomyces mellis TaxID=42258 RepID=A0A4C2E9F3_9SACH
#=GS UniRef90_A0A7H9B602/1-223 DE [subseq from] TPR_REGION domain-containing protein n=1 Tax=Zygotorulaspora mrakii TaxID=42260 RepID=A0A7H9B602_ZYGMR
#=GS UniRef90_A0A1Q3A3G1/1-218 DE [subseq from] TPR_REGION domain-containing protein n=3 Tax=Zygosaccharomyces rouxii TaxID=4956 RepID=A0A1Q3A3G1_ZYGRO
#=GS UniRef90_G8ZRG8/11-233 DE [subseq from] TPR_REGION domain-containing protein n=1 Tax=Torulaspora delbrueckii (strain ATCC 10662 / CBS 1146 / NBRC 0425 / NCYC 2629 / NRRL Y-866) TaxID=1076872 RepID=G8ZRG8_TORDC
#=GS UniRef90_A0A8H2VE82/1-77 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania barnettii TaxID=61262 RepID=A0A8H2VE82_9SACH
#=GS UniRef90_A0A8H2VE82/111-251 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania barnettii TaxID=61262 RepID=A0A8H2VE82_9SACH
#=GS UniRef90_A0A1S7HZI1/1-209 DE [subseq from] YLR211C n=3 Tax=Zygosaccharomyces TaxID=4953 RepID=A0A1S7HZI1_9SACH
#=GS UniRef90_G0VIQ7/1-229 DE [subseq from] Autophagy-related protein 28 n=1 Tax=Naumovozyma castellii (strain ATCC 76901 / BCRC 22586 / CBS 4309 / NBRC 1992 / NRRL Y-12630) TaxID=1064592 RepID=G0VIQ7_NAUCC
#=GS UniRef90_G8JWZ7/8-217 DE [subseq from] WGS project CCBQ000000000 data, contig 00015 n=1 Tax=Eremothecium cymbalariae (strain CBS 270.75 / DBVPG 7215 / KCTC 17166 / NRRL Y-17582) TaxID=931890 RepID=G8JWZ7_ERECY
#=GS UniRef90_J7RGP5/1-232 DE [subseq from] Conserved protein n=1 Tax=Kazachstania naganishii (strain ATCC MYA-139 / BCRC 22969 / CBS 8797 / KCTC 17520 / NBRC 10181 / NCYC 3082 / Yp74L-3) TaxID=1071383 RepID=J7RGP5_KAZNA
#=GS UniRef90_I2H5W4/1-218 DE [subseq from] Uncharacterized protein n=1 Tax=Tetrapisispora blattae (strain ATCC 34711 / CBS 6284 / DSM 70876 / NBRC 10599 / NRRL Y-10934 / UCD 77-7) TaxID=1071380 RepID=I2H5W4_TETBL
#=GS UniRef90_A0A1X7R3T5/1-77 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania saulgeensis TaxID=1789683 RepID=A0A1X7R3T5_9SACH
#=GS UniRef90_A0A1X7R3T5/113-251 DE [subseq from] Uncharacterized protein n=1 Tax=Kazachstania saulgeensis TaxID=1789683 RepID=A0A1X7R3T5_9SACH
#=GS UniRef90_Q6FNV0/1-224 DE [subseq from] Autophagy-related protein 14 n=2 Tax=Candida glabrata TaxID=5478 RepID=Q6FNV0_CANGA
#=GS UniRef90_A0A7G3ZDQ2/21-153 DE [subseq from] Biogenesis of lysosome-related organelles complex 1 subunit KXD1 n=1 Tax=Torulaspora globosa TaxID=48254 RepID=A0A7G3ZDQ2_9SACH
#=GS UniRef90_G0W9C3/1-91 DE [subseq from] t-SNARE coiled-coil homology domain-containing protein n=1 Tax=Naumovozyma dairenensis (strain ATCC 10597 / BCRC 20456 / CBS 421 / NBRC 0211 / NRRL Y-12639) TaxID=1071378 RepID=G0W9C3_NAUDC
#=GS UniRef90_G0W9C3/135-265 DE [subseq from] t-SNARE coiled-coil homology domain-containing protein n=1 Tax=Naumovozyma dairenensis (strain ATCC 10597 / BCRC 20456 / CBS 421 / NBRC 0211 / NRRL Y-12639) TaxID=1071378 RepID=G0W9C3_NAUDC
#=GS UniRef90_W0TGX9/3-203 DE [subseq from] Uncharacterized protein YLR211C n=1 Tax=Kluyveromyces marxianus (strain DMKU3-1042 / BCC 29191 / NBRC 104275) TaxID=1003335 RepID=W0TGX9_KLUMD
#=GS UniRef90_A0A109UYX3/3-207 DE [subseq from] HDL254Cp n=1 Tax=Eremothecium sinecaudum TaxID=45286 RepID=A0A109UYX3_9SACH
#=GS UniRef90_A0A7H9HPF6/24-146 DE [subseq from] Biogenesis of lysosome-related organelles complex 1 subunit BLI1 n=1 Tax=Torulaspora sp. CBS 2947 TaxID=2792677 RepID=A0A7H9HPF6_9SACH
#=GS UniRef90_A0A0A8L4X0/3-203 DE [subseq from] WGS project CCBQ000000000 data, contig 00043 n=1 Tax=Kluyveromyces dobzhanskii CBS 2104 TaxID=1427455 RepID=A0A0A8L4X0_9SACH
#=GS UniRef90_Q6CT11/5-203 DE [subseq from] KLLA0C16291p n=2 Tax=Kluyveromyces lactis TaxID=28985 RepID=Q6CT11_KLULA
#=GS UniRef90_Q75A45/4-157 DE [subseq from] ADR074Cp n=2 Tax=Eremothecium TaxID=33170 RepID=Q75A45_ASHGO
#=GS UniRef90_A0A0P1KV75/3-79 DE [subseq from] LAQU0S08e01640g1_1 n=1 Tax=Lachancea quebecensis TaxID=1654605 RepID=A0A0P1KV75_9SACH
#=GS UniRef90_A0A0P1KV75/92-192 DE [subseq from] LAQU0S08e01640g1_1 n=1 Tax=Lachancea quebecensis TaxID=1654605 RepID=A0A0P1KV75_9SACH
#=GS UniRef90_A7TPA7/1-172 DE [subseq from] Protein of centriole 5 n=1 Tax=Vanderwaltozyma polyspora (strain ATCC 22028 / DSM 70294 / BCRC 21397 / CBS 2163 / NBRC 10782 / NRRL Y-8283 / UCD 57-17) TaxID=436907 RepID=A7TPA7_VANPO
#=GS UniRef90_G8BYP4/3-115 DE [subseq from] MIT domain-containing protein n=1 Tax=Tetrapisispora phaffii (strain ATCC 24235 / CBS 4417 / NBRC 1672 / NRRL Y-8282 / UCD 70-5) TaxID=1071381 RepID=G8BYP4_TETPH
#=GS UniRef90_G8BYP4/107-262 DE [subseq from] MIT domain-containing protein n=1 Tax=Tetrapisispora phaffii (strain ATCC 24235 / CBS 4417 / NBRC 1672 / NRRL Y-8282 / UCD 70-5) TaxID=1071381 RepID=G8BYP4_TETPH
#=GS UniRef90_H2B2A0/1-223 DE [subseq from] MIT domain-containing protein n=1 Tax=Kazachstania africana (strain ATCC 22294 / BCRC 22015 / CBS 2517 / CECT 1963 / NBRC 1671 / NRRL Y-8276) TaxID=1071382 RepID=H2B2A0_KAZAF
#=GS UniRef90_C5E235/17-63 DE [subseq from] KLTH0H01826p n=1 Tax=Lachancea thermotolerans (strain ATCC 56472 / CBS 6340 / NRRL Y-8284) TaxID=559295 RepID=C5E235_LACTC
#=GS UniRef90_C5E235/77-177 DE [subseq from] KLTH0H01826p n=1 Tax=Lachancea thermotolerans (strain ATCC 56472 / CBS 6340 / NRRL Y-8284) TaxID=559295 RepID=C5E235_LACTC
#=GS UniRef90_A0A1G4K626/3-95 DE [subseq from] LAFA_0G22980g1_1 n=1 Tax=Lachancea sp. CBS 6924 TaxID=433476 RepID=A0A1G4K626_9SACH
#=GS UniRef90_A0A1G4K626/86-193 DE [subseq from] LAFA_0G22980g1_1 n=1 Tax=Lachancea sp. CBS 6924 TaxID=433476 RepID=A0A1G4K626_9SACH
#=GS UniRef90_A0A0C7MRN9/1-74 DE [subseq from] LALA0S05e08306g1_1 n=1 Tax=Lachancea lanzarotensis TaxID=1245769 RepID=A0A0C7MRN9_9SACH
#=GS UniRef90_A0A0C7MRN9/136-192 DE [subseq from] LALA0S05e08306g1_1 n=1 Tax=Lachancea lanzarotensis TaxID=1245769 RepID=A0A0C7MRN9_9SACH
#=GS UniRef90_A0A1G4JQ11/94-179 DE [subseq from] LANO_0E01750g1_1 n=1 Tax=Lachancea nothofagi CBS 11611 TaxID=1266666 RepID=A0A1G4JQ11_9SACH
#=GS UniRef90_A0A1G4JSN1/17-210 DE [subseq from] LAMI_0E15368g1_1 n=1 Tax=Lachancea mirantina TaxID=1230905 RepID=A0A1G4JSN1_9SACH
#=GS UniRef90_A0A061AL66/205-266 DE [subseq from] CYFA0S01e18734g1_1 n=2 Tax=Cyberlindnera fabianii TaxID=36022 RepID=A0A061AL66_CYBFA
#=GS UniRef90_A0A642V060/155-278 DE [subseq from] Protein phosphatase 1 regulatory subunit 12B n=1 Tax=Trichomonascus ciferrii TaxID=44093 RepID=A0A642V060_9ASCO
query MSTLAEVYTIIEDAEQECRKGDFTNAKAKYQEAIEVLGP--QNEN---LS---QN--KLSSDVTQAIDLLKQDITAKIQELELL---I-----EKQSS-EE-NN--I-G-MVNNN---M--LIGSVILNN--KSPIN-GIS--NA-RN-WD-N-P-AY--Q-D-T----LSPI----NDPLLMSIL-NRL--QFNL---N
UniRef90_Q05789/1-226 MSTLAEVYTIIEDAEQECRKGDFTNAKAKYQEAIEVLGP--QNEN---LS---QN--KLSSDVTQAIDLLKQDITAKIQELELL---I-----EKQSS-EE-NN--I-G-MVNNN---M--LIGSVILNN--KSPIN-GIS--NA-RN-WD-N-P-AY--Q-D-T----LSPI----NDPLLMSIL-NRL--QFNL---N
UniRef90_A0A0L8REU8/1-163 -------------------------------------------------------------------------MTAKIQELELL---I-----DKRSP-EE-NS--I-G-MLNNN---M--LIGSVILNN--KTSIN-GVG--NT-RN-WD-N-S-IY--Q-N-S----LNPL----NDPILISIL-NRL--QFNL---N
UniRef90_J4TVV6/1-140 --------------------------------------------------------------------------------------------------------------MVKNN---M--LIGTVILNN--KASIN-GVG--NT-RN-WD-N-S-VY--Q-D-T----LNPI----NDPVLISIL-NRL--QSNL---N
UniRef90_A0A4C2E9F3/1-218 MSQLIEVYTCIDDAENKTRKGNLTESLSCYKRAMELLN------G---IG---CQ--GVSAEIIHAIQLLRQDIDARIKELESL---I-----EDQKP-VSTTA--V-G-AVTKN--GS--LTNSTISN-----------A--KT-RN-WD-N-P-RSLGS-S-V----MGPS----GDPLLASIF-GKL--QVNL---V
UniRef90_A0A7H9B602/1-223 MKKLAEVYSNIDGAEQQSRKGDYLGAIKEYKKALDILDR--SGKS---SE---EQDVGLSHEVTRALDLLQDDIQAKIRELESL---V-----EVQRP-EE-SK---------NS---S--TVGSLW-NA--SSMSNQTVV--KT-RT-LE-G---SL--N-G-T----MGLM----MDPLLVSLI-NKL--QVNF---I
UniRef90_A0A1Q3A3G1/1-218 MSQLIEVYNCIDGAENKTRKGDLSESLSSYKHAIELLN------G---LE---CQ--GVSLEIIHAIQLLRQDIDARTKELESL---I-----EGQKP-IS-A---A-A-VAAAV---A--KNGSLA------SSTG-STA--KT-RG-WD-S-P-RN--L-N-S---PAMGP----GDPLLASIF-GKL--QANL---V
UniRef90_G8ZRG8/11-233 MTSLGEAYKYIDGAEQESRNGNLSEALEKYRHALDNLIV--KEEG---AE---EV--RLGPEVVEAIKLLRQDINDRVRDIEAL---V-----ELQRP-T----------TARST---S--LSGTILTNL--NSSSQ-NVA--SV-RP-WD-V-V-RG--G-N-P----AAGM---QVDPLLLKIL-NKL--QNEI---I
UniRef90_A0A8H2VE82/1-77 MSTLCNVYKLIETAEQETKRGNLQNSIIYYKETLKEINE--ITDN---IE---ES--GLSNNVIEAVQLLRKDVSQTIYDIQNV---L-----HV---------------------------------------------------------------------------------------------------------
UniRef90_A0A8H2VE82/111-251 ------------------------------------------------------------------------------------------------------N-------GINQN---F---LGSVYLRM--NPSVM-QPG--GA-RM-WE-N-I-IS--N-D-K----LIPN----NDPLFLGII-NKL--QSNV---I
UniRef90_A0A1S7HZI1/1-209 MSQLIDVYNSINTAEKQTRNGDLPEALKQYKHALALLA------N---LK---CQ--GTSSEIVHALELLRQDIDSRIKELESL---Q-----ERRNP-MT-PK--N-G-AVRNS---S------V------------SLA--KT-RS-WD-N-S-RN--MGS-G----LGSS----SDPLLISIL-GKL--QSNL---I
UniRef90_G0VIQ7/1-229 MSSLKEVYDLISQAELETRNGNLNVSINKYKNALSKTNC--LLKM---LK---SE--DVENDVTDAILMLRKDISKTIFELEDL---V-----SKQRP--D-SK--V-G-TVKNP---T--MLSSLAINP--SMNVL-GF----S-KT-WD-S-T-VN--T-N-TNLERINPY----NDPILRSIT-DKL--QTNL---L
UniRef90_G8JWZ7/8-217 --TSVQIYEHIEIAEQHCIERNYQNASKEYEHVLEQLES--LVKD---LA--------LNGDLKRAIMLLKEDIELKVKELEQW---D-----QRKQP-TP-A---------NTT---Q-----GLSTNN--KSS--------PG-RV-ID-M-N-MH--L-N-N----ANPI----TDPFLASII-NKL--QTNI---L
UniRef90_J7RGP5/1-232 MSHLRQTYQLIENAEQQIKRGNLNESLKYYRQSVNEINK--VILR---LNSEHPD--EVNDEVIESIEILKRDVSQTMFDLENF---I-----KAQRA-VS-KGSTVKN-SINMN------MMGSMLLSI--KPSMN-NVN--RP-SP-TSEQ-S-GD--G-E-G----NNFV----SDPILTGIL-NKL--QTNLFALT
UniRef90_I2H5W4/1-218 MSDLNEIYDLIRNAEQLTQRNDLLGALKKYREVETTIGSCKRKQK---LQ---GN-SDLDESVIEAIELLQEDISLRIREIESL---T-----GNQR------P--I-S-VGNNS---K--AM--SLLNS--WLPNN-NNS--II-NG-ID---------F-N-K----SSMV----TDPLLISII-DKL--KINI---L
UniRef90_A0A1X7R3T5/1-77 MPTLCNVYKLIETAEQETKRGNLQNSIIYYKETLKEINE--ITDK---IE---ES--GLSKNVIEAVQLLRKDISQTIYDIQNV---L-----PT---------------------------------------------------------------------------------------------------------
UniRef90_A0A1X7R3T5/113-251 ---------------------------------------------------------------------------------------------------------------INQN---F---LGSVYLRM--NPSVM-QPG--GA-KM-WE-T-S-MN--N-D-K----LIPN----NDPLFLGIV-NKL--QSNI---I
UniRef90_Q6FNV0/1-224 MSDLNTVYEHIRRAEQKCRSGDLSDALDLYVSALEGLDL--KSPN---FD---LQ--GLDDTIIEAVKLLRDDIQLRIKELQIC---V-----IESSK-DE-KD--S-----KNN---K--ASMALVQSS--AGNNN-NLA--AS-KY-WD-S-T-RN-IT-DIT----SSTF----IDPYTSSML-SKL--QNSM---V
UniRef90_A0A7G3ZDQ2/21-153 ----------------------------------------------------------------------------------------------------------------------------STLLGM--NSSMQ-PMS--KT-RS-WD-G-GRSG--N-D-T----TGLQ----TEPFLGRIL-NKL--QSNL---T
UniRef90_G0W9C3/1-91 MTTLNDIYELIDDAEEESRKGNITNSVRKYKETVKSLEE--LIKS---TK---RE--DINNEILVALDMLKKDISKTICELDKL---LT---IQRQQQQQQ-QH--N-G-QV----------------------------------------------------------------------------------------
UniRef90_G0W9C3/135-265 -------------------------------------------------------------------------------------------------------------------------------NYN--NAMTN-SMN--NA-NVERI-N-P-FN--D-V-V----IRTI----TDNLKMNLL-ESIKGQYHL---L
UniRef90_W0TGX9/3-203 -SELNNVFECIEDAESFIKSNKVVVAIQEYRKAIKQLDY--INET---------E--DLPENVQYAVTLLHDDILLRVKELGVL---Q-----EVQSN-SE-SS--E-S-GS--N---S--SISRFV-------------S--DG-SL-YP-N-G-----N-S-------VLI----SDPLLLSIT-SKL--ENNV---M
UniRef90_A0A109UYX3/3-207 --DLINVYAHIEAGEQYSRDSNYLGTVKEYNKALEKILQ--LEDS---VE---VN--E---GLKDAIGLLKQDLLVKIKELQHL---Q-----QKRPN-NA-AN--S-A-TVTAV---M--NSGTV--------RVQ-GSV--NE----GG-N-----------S----I-SI----SDPFLASIV-NKL--HMNI---L
UniRef90_A0A7H9HPF6/24-146 ------------------------------------------------------------------------------------------------------------------------------------NSSMQ-GMS--KT-RS-WD-G-R-NG--N-E-T----SGLQ----MEPFLGRIL-SKL--QSNL---T
UniRef90_A0A0A8L4X0/3-203 -SELNNVYETIDDAESYIKSNKISPAIEEFEKASKQLDI--IN------S---ME--SLPGNIHNAIVLLREDIDIRVKELGML---Q-----EAQST-SE-SS------EVGSN---S--SLSRFVTDG--SLYHN-G-----------------------N-S----LS-L----ADPLLLSIT-SKL--ENN-----
UniRef90_Q6CT11/5-203 ---LNNVYETIDDAESYIKRNKLSRAIELFENASKQLDA--IN------S---ME--SLPENIHNAIVLLREDIDARVKELGML---Q-----EAQST-SE-SS------EIGSN---S--SLSRFMMDG--SFYHN-G-------------N-S-L--------------FL----TDPLLSSIT-SKL--ENNV---I
UniRef90_Q75A45/4-157 ---------------------------------------------------------------------LREDIQIKIKELEQW---E-----LRQPS-AG-LG--A-G-LSPNR--------------K--NSPVR-GPE--AT-----------SH--L-S-N----GVPI----ADPFLASII-NKL--QTNI---L
UniRef90_A0A0P1KV75/3-79 --DLTEVYKLIESAEVQLRDRKFSESIETYRKSVEAIDK--LETQ---HE---SV--AVVGDVEQALTLLKKDLDRKIQEIKVL---G-----K-LSA------------------------------------------------------------------------------------------------------
UniRef90_A0A0P1KV75/92-192 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------QEPLMTYRL-SGF--QNNI---L
UniRef90_A7TPA7/1-172 --------------------------------------------------------------------MLKSDINLRVRELDTL---I-----TLKSS--S-NG--L-K-ASRNL---P--MMSSVILNN--NSSTN-PAE--SL----WN-N-N-NC--G-A---------N----LDPFLNSML-NKF--QHNL---E
UniRef90_G8BYP4/3-115 --ELLKVYELINNAERESQKGLYAKARSIYEEILDYILD--DNRNAITLD---LQ--KVGSKVGEAVELLVEDVKLRIRELDTL---IGIRNLQKPSS-EH-KE--L-NNSYNNN---S--LQKDGENSN--ITPLN---------------------------------------------------------------
UniRef90_G8BYP4/107-262 ---------------------------------------------------------------------------------------------------EN-SN--I-T-PLNKRKQYN--MLNSVILNN--GSIIN-PGE--SI----WN-I-N---------S----NKLI----ADPSLISIF-NKF--QSNL---T
UniRef90_H2B2A0/1-223 MITLKDIYILIENAEQQSRKGKVKKAISIYKVACKEIDQ--LLGS---IE---DD--GVDQDIITAVALLKRNIMQTVRDLENF---L-----RRTNF-DF-LN--S-D-SIANN---TPPSTANTIPSNLRNDNLN-SMF--QSTKL-MT-NSL-AM--W-E-Q----DQPIDNFSSDPFLITLL-NRL--QNTL---L
UniRef90_C5E235/17-63 ----------------------------KSAEAINQLEI--QHKS---IA---N-----VGDLKQALNLLKKDLNRKIREIQIL---G------KFS-------------------------------------------------------------------------------------------------------
UniRef90_C5E235/77-177 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HEPMAAYRL-SGF--QNNI---L
UniRef90_A0A1G4K626/3-95 --ELKDIYELIANAELKVREHNFEESINVYLETVKLIEE-FESKN---KG---FD--Q-VEDVKTAIELLKIDIESKVWELQQLNLRA-----KTPAA-KP-DN--V-A-AVKNP---M---------------------------------------------------------------------------------
UniRef90_A0A1G4K626/86-193 ------------------------------------------------------------------------------------------------------------------------------------------------------------------D-N----VAAV----KNPMTTTESPQTF--VENV---L
UniRef90_A0A0C7MRN9/1-74 MSELKDIYELIANAELKVREHNFEESINVYQETVKLIEE--LESK---NE---GF--KQIEDVKTAIDLLKIDIEGKISELQQL--------------------------------------------------------------------------------------------------------------------
UniRef90_A0A0C7MRN9/136-192 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A1G4JQ11/94-179 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NI---L
UniRef90_A0A1G4JSN1/17-210 ----------LEAAQLLLRDRRYDKATIMYERALKSTQT--LQEI---SE---KE--QDSEHVRQALSLVGDDIQNRVKELKLL---------E-QAA-QE-----V-A-QDSQN---Q--S-------D--RSEIT-AMTALRE-KS-WT-D-S-NF--A----------TV----IETYRTSLL-NLL--KVQL---E
UniRef90_A0A061AL66/205-266 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A642V060/155-278 ---------------------------------------------------------------------------------------------------------------------------------S--KYPIG-SMS--SA-GT-SE-S-FYVV--P-N-T----SSLT----YEELITENA-SLR--QLIN---K
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx..xxxx...xx...xx..xxxxxxxxxxxxxxxxxxxxxxxxxxx...x.....xxxxx.xx.xx..x.x.xxxxx...x..xxxxxxxxx..xxxxx.xxx..xx.xx.xx.x.x.xx..x.x.x....xxxx....xxxxxxxxx.xxx..xxxx...x
query N----D-I--QLKT----E--G-GKN----S--K-N-S-E-----------M-----K------INL-RLEQFKKELVLYEQKKFKEYGMKIDEITKENKKLANEIGRLRERWDSLVESAKQRRDKQKN
UniRef90_Q05789/1-226 N----D-I--QLKT----E--G-GKN----S--K-N-S-E-----------M-----K------INL-RLEQFKKELVLYEQKKFKEYGMKIDEITKENKKLANEIGRLRERWDSLVESAKQRRDKQKN
UniRef90_A0A0L8REU8/1-163 N----D-M--ESKV----K--G-VKN----P--N-N-L-E-----------M-----K------TNL-RLEQFKKELVLYEQNKFKEYGMKIDQIAKENKKLSNEIGRLRERWDSLVESAKQRRDKQRN
UniRef90_J4TVV6/1-140 N----D-I--QLKV----E--G-EKI----P--K-N-S-E-----------M-----K------INL-RLEQFMKELVLYEEKKFKEYGIKIDQVTKENKKLSNEIGRLRERWDSLVESAKQRRDKQQN
UniRef90_A0A4C2E9F3/1-218 N----S-VRNQFKE----E--D-PHV----V--N-E-L-D-----------N-----N------VRQ-QLVRFKKELGLYEQKKSKDYNIRLEQVINENKKLSNQILKLRGRWDSLVESAKQRRSRQ--
UniRef90_A0A7H9B602/1-223 N----A-VSEELKR----T--E-SHD----I--Q-S-I-E-----------S-----Q------VKQ-QIGRFKKELGMYEQKKIKEYNLRLDQAIKENKKLSNQIVKLRERWDSLVESAKQRRNKKQ-
UniRef90_A0A1Q3A3G1/1-218 N----S-ICEQFKE----E--D-PHI----V--G-E-V-E-----------N-----R------VRQ-QLVRFKKELGLYEQKKSKDYSVRFEQAINENKKLSNQILKLRGRWDSLVESAKQRRSRQ--
UniRef90_G8ZRG8/11-233 A----K-L--EEKIV--EK--D-YHK----G--Q-S-I-E-----------S-----I------VNQ-SLMQFGKDLAIYEQKNYKEFNARLEKANNENKKLSNQIVKLRERWDSLVESAKQRRTRQQ-
UniRef90_A0A8H2VE82/1-77 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A8H2VE82/111-251 L----S-V--SNQTKDIQQ--N-EEG----T--Q-S-I-E-----------E-----K------IQQ-HVEQFRKEVSWYEQKKYEEYENKIQEIERENRKLNLQVDRLKQRWDSLVESAKERKKK---
UniRef90_A0A1S7HZI1/1-209 N----S-I--SEQC---KE--D-PQV----L--K-E-L-D-----------G-----R------IAQ-QFSQFRKELALYEQKKSKDYNARLEQAIAENKKLSNQILKLRGRWDSLVESAKQRRSRQ--
UniRef90_G0VIQ7/1-229 NLVSDN-V--QVFQ----K--G-DKE----EFLQ-T-I-T-----------F-----A------VEQ-NFDIFRKELGFYEQKKFTEYDSNLENALKENKKLTNQISKLKERWDSLVESARQKKK----
UniRef90_G8JWZ7/8-217 Q----V-L--TQQF----A--GEAKT----A--G-Q-K-E-----------VS---CL------VTQ-QIAQFQKEIAIFEQRKFREYDTKMDQLIKENKKLSNQVVRLKDRWDSLVESAKQKRNQQE-
UniRef90_J7RGP5/1-232 A----D-T--KGVG----V--D-GRG----T--K-N-A-N-----------L-----E------VSH-HIEQFKRELSWYEQKKFSEYDSRLERTRKENRKLLQEVEKLKDRWNNLVESAKQRRNR---
UniRef90_I2H5W4/1-218 M----K-L--NDEL----E--G-EKT----E--A-G-SRE-----------F-----N------ITQ-QFNQFNKELLMYEQKKFNEYNLNLEQLAKENRKLSKQIIKLKERWDSLVASAKEKRNRQ--
UniRef90_A0A1X7R3T5/1-77 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A1X7R3T5/113-251 M----S-T--SNQF----K--N-GQHTVRGT--Q-A-I-E-----------E-----E------IQQ-HVEQFRKEVSWYEQKRFEEYENRMKELEMENKKLNLQVDRLKQRWDSLVESAKERKKK---
UniRef90_Q6FNV0/1-224 -----D-L--IKEA----K--N-QKT----D--V-N-D-L-----------V-----S------TALFQIDQFKKEMLIYEQRRTREYQIKVEHLNKDVKKLSSQNSKLKERWDSLVESARQRKNRQQ-
UniRef90_A0A7G3ZDQ2/21-153 L----R-IEERMKD----K--D-GKN----G--A-S-V-E-----------A-----I------VNQ-QLVQFRKDLAVYEQRKVREYTSRLEQATKENRKLSNQIVRLRERWDSLVESAKQRRIRQQ-
UniRef90_G0W9C3/1-91 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_G0W9C3/135-265 N----GNAKDERKN----D--E-DEF----I--K-Q-L-N-----------T-----T------FES-QFDMFRKELGFYEQKKFSEYDTNLDNLIKENKKLLNQIVKLRERWDSLVESAKQRRNK---
UniRef90_W0TGX9/3-203 R----L-I--NASE----D--P-GSV----S--K-T-E--------------------------IMQ-QFSQFKRELTVYEQKKSKDYEGKMEQVIKENKKLSNQVNRLKERWDSLVESAKQKRNQQ--
UniRef90_A0A109UYX3/3-207 Q----S-L--SQLS----G--I-QVD----K--T-E-L-E-----------Q-----L------LMY-QIKNLEKEIALFEQRKFREYDSKMEQLIKENKRLSNQVLRLKDRWDSLVESARQKRNQQ--
UniRef90_A0A7H9HPF6/24-146 T----R-I--EERI----K----DKD----G--V-S-V-E-----------S-----I------VNQ-QLVQFRKDLAVYEQRKIREYTSRLEQANKENRKLSHQIVRLRERWDSLVESAKQRRIRQQ-
UniRef90_A0A0A8L4X0/3-203 -----I-V--RLIT----S--T--QT----D--K-V-N-K-----------T-----D------VVQ-QFAQYRRELTMYEQKKSKDYEARLEQIMKENKKLLNQVNRLKDRWDSLVESAKQKRNQQQ-
UniRef90_Q6CT11/5-203 R----S-I--TSKQ-------T-DKV----I--K-N---------------------E------VAQ-QFAQFRRELSVYEQKKSRDYEAKSEQVMKENKKLLNQVNRLKERWDSLVESAKQKRNQQQ-
UniRef90_Q75A45/4-157 Q----T-L--SQRL----A--GEGKP----M-GKQE-L-E-----------A-----V------VSP-QMTQFQKEMTVFEQRKFREYDSKMDQLLKENRKLSNQVIRLKDRWDSLVESAKQKRNQQE-
UniRef90_A0A0P1KV75/3-79 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A0P1KV75/92-192 A----I-V--RGKT----DPKS-GVS----A--S-E-I-E-----------N-----S------ISK-EVSQLFKSFAFIDQQKFKEYDNKVEQLVRENRKLTGQIAKLKERWDSLVESARQKRNQQN-
UniRef90_A7TPA7/1-172 L----S-L--MDTL----K--S-GSN----S--S-NNN-DKKVNHNVQDVSL-----K------ISE-QMSQFKKELRIYEQQKCKEFQLRMEQALAENKKLSNQIVKLRERWDSLVESAKQKRNRQR-
UniRef90_G8BYP4/3-115 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_G8BYP4/107-262 K----S-LSDQLLN----S--N-EMN----D--S-N-S-KT--------TSIVAQQIKYIVAKAIED-ELSSFEKELCVYENKKCKEYQIKLNRSSEENKRLNKQIMKLRERWDGLVESAKQKKLR---
UniRef90_H2B2A0/1-223 -----D-I---------------TKT----S--D-H-V-E-----------E-----D------VSQ-QFQLFKRDLIWYEQKKFSDFNNHIKKMNDEKKKLENQITRQKELWEGLVENVKAK------
UniRef90_C5E235/17-63 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_C5E235/77-177 A----I-V--RGKT----DPKS-GVS----A--S-E-I-E-----------N-----S------ISK-EVSQLFKSFALIDQQKFKEYDNKVEQLVRENRKLTGQIVKLKERWDSLVESARQKRNQQN-
UniRef90_A0A1G4K626/3-95 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A1G4K626/86-193 N----L-V--RSKT----ELKP-GAS----V--H-D-L-E-----------T-----G------IAA-ETTQLLRGLSWVDQQRSKEYEARIEELCTENKQLTTQIHKLKERWDSLVESARQKRNQQN-
UniRef90_A0A0C7MRN9/1-74 ---------------------------------------------------------------------------------------------------------------------------------
UniRef90_A0A0C7MRN9/136-192 ----------------------------------------------------------------------AQLLQGLSWVGQQRSKEYEAKIEQMCAENKQLTTQIHKLKERWESLVESARQKRNQQ--
UniRef90_A0A1G4JQ11/94-179 N----A-V--RAKT----DLKP-GIS----L--Y-E-L-E-----------N-----S------VSR-ELSQLLQGVSLVDQQKFKEYEFKIEQLHRENKQLTSQINKLKERWDSLVESARLKRNLQ--
UniRef90_A0A1G4JSN1/17-210 T----D-S--RFST-----------------------L-E-----------D-----I------VNQ-NVISLLSDMGMLEQRRVAQYEAKVEHLARENKKMSNQIVKLKERWGSLVESARQKRKQEK-
UniRef90_A0A061AL66/205-266 -----------------------------------------------------------------KH-YLSTLKREINQQESQIKKDYENKIEQLYKDNKRLEQQVGSLKSRWDALVESAKKRREDQK-
UniRef90_A0A642V060/155-278 T----S-I--QLQA----H--E-IAS----R--K-Q-K-D-----------------A------IKN-GLIQLKNELTAKENARNKEHDAELEKLKGENDKLKIQIGRLKSRWDELKESARKRREDE--
#=GC RF x....x.x..xxxx....x..x.xxx....x..x.x.x.x...........x.....x......xxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
# Copyright 2021 AlQuraishi Laboratory
# Dingquan Yu @ EMBL-Hamburg Kosinski group
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
import shutil
import pickle
import torch
import torch.nn as nn
import numpy as np
from functools import partial
import unittest
from openfold.utils.tensor_utils import tensor_tree_map
from openfold.config import model_config
from openfold.data.data_modules import OpenFoldMultimerDataModule,OpenFoldDataModule
from openfold.model.model import AlphaFold
from openfold.utils.loss import AlphaFoldMultimerLoss
from tests.config import consts
import logging
logger = logging.getLogger(__name__)
import os
class TestMultimerDataModule(unittest.TestCase):
def setUp(self):
"""
Set up model config
use model_1_multimer_v3 for now
"""
self.config = model_config(
"model_1_multimer_v3",
train=True,
low_prec=True)
self.data_module = OpenFoldMultimerDataModule(
config=self.config.data,
batch_seed=42,
train_epoch_len=100,
template_mmcif_dir = "/g/alphafold/AlphaFold_DBs/2.3.0/pdb_mmcif/mmcif_files/",
template_release_dates_cache_path=os.path.join(os.getcwd(),"tests/test_data/mmcif_cache.json"),
max_template_date="2500-01-01",
train_data_dir=os.path.join(os.getcwd(),"tests/test_data/mmcifs"),
train_alignment_dir=os.path.join(os.getcwd(),"tests/test_data/alignments/"),
kalign_binary_path=shutil.which('kalign'),
train_mmcif_data_cache_path=os.path.join(os.getcwd(),
"tests/test_data/train_mmcifs_cache.json"),
train_chain_data_cache_path=os.path.join(os.getcwd(),
"tests/test_data/train_chain_data_cache.json"),
)
# setup model
self.c = model_config(consts.model, train=True)
self.c.loss.masked_msa.num_classes = 22 # somehow need overwrite this part in multimer loss config
self.c.model.evoformer_stack.no_blocks = 4 # no need to go overboard here
self.c.model.evoformer_stack.blocks_per_ckpt = None # don't want to set up
# deepspeed for this test
self.model = AlphaFold(self.c)
self.multimer_loss = AlphaFoldMultimerLoss(self.c.loss)
def testPrepareData(self):
self.data_module.prepare_data()
self.data_module.setup()
train_dataset = self.data_module.train_dataset
all_chain_features,ground_truth = train_dataset[1]
add_batch_size_dimension = lambda t: (
t.unsqueeze(0)
)
all_chain_features = tensor_tree_map(add_batch_size_dimension,all_chain_features)
with torch.no_grad():
out = self.model(all_chain_features)
self.multimer_loss(out,(all_chain_features,ground_truth))
\ No newline at end of file
# Copyright 2021 AlQuraishi Laboratory
#
# Dingquan Yu @ EMBL-Hamburg Kosinski group
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
......@@ -27,14 +27,12 @@ from tests.config import consts
import logging
logger = logging.getLogger(__name__)
import os
import io, contextlib
from tests.data_utils import (
random_template_feats,
random_extra_msa_feats,
random_affines_vector, random_affines_4x4
random_affines_vector
)
from openfold.utils.rigid_utils import (
Rotation,
Rigid,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment