CITATION.cff 5.72 KB
Newer Older
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
1
cff-version: 1.2.0
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
2
3
4
5
6
7
8
9
10
11
12
13
14

@article {Ahdritz2022.11.20.517210,
	author = {Ahdritz, Gustaf and Bouatta, Nazim and Kadyan, Sachin and Xia, Qinghui and Gerecke, William and O{\textquoteright}Donnell, Timothy J and Berenberg, Daniel and Fisk, Ian and Zanichelli, Niccolò and Zhang, Bo and Nowaczynski, Arkadiusz and Wang, Bei and Stepniewska-Dziubinska, Marta M and Zhang, Shang and Ojewole, Adegoke and Guney, Murat Efe and Biderman, Stella and Watkins, Andrew M and Ra, Stephen and Lorenzo, Pablo Ribalta and Nivon, Lucas and Weitzner, Brian and Ban, Yih-En Andrew and Sorger, Peter K and Mostaque, Emad and Zhang, Zhao and Bonneau, Richard and AlQuraishi, Mohammed},
	title = {OpenFold: Retraining AlphaFold2 yields new insights into its learning mechanisms and capacity for generalization},
	elocation-id = {2022.11.20.517210},
	year = {2022},
	doi = {10.1101/2022.11.20.517210},
	publisher = {Cold Spring Harbor Laboratory},
	abstract = {AlphaFold2 revolutionized structural biology with the ability to predict protein structures with exceptionally high accuracy. Its implementation, however, lacks the code and data required to train new models. These are necessary to (i) tackle new tasks, like protein-ligand complex structure prediction, (ii) investigate the process by which the model learns, which remains poorly understood, and (iii) assess the model{\textquoteright}s generalization capacity to unseen regions of fold space. Here we report OpenFold, a fast, memory-efficient, and trainable implementation of AlphaFold2, and OpenProteinSet, the largest public database of protein multiple sequence alignments. We use OpenProteinSet to train OpenFold from scratch, fully matching the accuracy of AlphaFold2. Having established parity, we assess OpenFold{\textquoteright}s capacity to generalize across fold space by retraining it using carefully designed datasets. We find that OpenFold is remarkably robust at generalizing despite extreme reductions in training set size and diversity, including near-complete elisions of classes of secondary structure elements. By analyzing intermediate structures produced by OpenFold during training, we also gain surprising insights into the manner in which the model learns to fold proteins, discovering that spatial dimensions are learned sequentially. Taken together, our studies demonstrate the power and utility of OpenFold, which we believe will prove to be a crucial new resource for the protein modeling community.Competing Interest StatementM.A. is a member of the Scientific Advisory Boards of Cyrus Biotechnology, Deep Forest Sciences, Nabla Bio, Oracle Therapeutics, and FL2021-002, a Foresite Labs company. P.K.S. is a member of the Scientific Advisory Board or Board of Di- rectors of Glencoe Software, Applied Biomath, RareCyte, and NanoString and is an advisor to Merck and Montai Health.},
	URL = {https://www.biorxiv.org/content/early/2022/11/22/2022.11.20.517210},
	eprint = {https://www.biorxiv.org/content/early/2022/11/22/2022.11.20.517210.full.pdf},
	journal = {bioRxiv}
}
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
15
16
17
authors:
- family-names: "Ahdritz"
  given-names: "Gustaf"
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
18
  orcid: https://orcid.org/0000-0001-8283-5324
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
19
20
- family-names: "Bouatta"
  given-names: "Nazim"
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
21
  orcid: https://orcid.org/0000-0002-6524-874X
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
22
23
- family-names: "Kadyan"
  given-names: "Sachin"
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
24
  orcid: https://orcid.org/0000-0002-6079-7627
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
25
26
27
28
- family-names: "Xia"
  given-names: "Qinghui"
- family-names: "Gerecke"
  given-names: "William"
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  orcid: https://orcid.org/0000-0002-9777-6192
- family-names: "O'Donnell"
  given-names: "Timothy J"
  orcid: https://orcid.org/0000-0002-9949-069X
- family-names: "Berenberg"
  given-names: "Daniel"
  orcid: https://orcid.org/0000-0003-4631-0947
- family-names: "Fisk"
  given-names: "Ian"
- family-names: "Zanichelli"
  given-names: "Niccolò"
  orcid: https://orcid.org/0000-0002-3093-3587
- family-names: "Zhang"
  given-names: "Bo"
  orcid: https://orcid.org/0000-0002-9714-2827
- family-names: "Nowaczynski"
  given-names: "Arkadiusz"
  orcid: https://orcid.org/0000-0002-3351-9584
- family-names: "Wang"
  given-names: "Bei"
  orcid: https://orcid.org/0000-0003-4942-9652
- family-names: "Stepniewska-Dziubinska"
  given-names: "Marta M."
  orcid: https://orcid.org/0000-0003-4942-9652
- family-names: "Zhang"
  given-names: "Shang"
  orcid: https://orcid.org/0000-0003-0759-2080
- family-names: "Ojewole"
  given-names: "Adegoke"
  orcid: https://orcid.org/0000-0003-2661-4388
- family-names: "Guney"
  given-names: "Murat Efe"
- family-names: "Biderman"
  given-names: "Stella"
  orcid: https://orcid.org/0000-0001-8228-1042
- family-names: "Watkins"
  given-names: "Andrew M"
  orcid: https://orcid.org/0000-0003-1617-1720
- family-names: "Ra"
  given-names: "Stephen"
  orcid: https://orcid.org/0000-0002-2820-0050
- family-names: "Lorenzo"
  given-names: "Pablo Ribalta"
  orcid: https://orcid.org/0000-0002-3657-8053
- family-names: "Nivon"
  given-names: "Lucas"
- family-names: "Weitzner"
  given-names: "Brian"
  orcid: https://orcid.org/0000-0002-1909-0961
- family-names: "Ban"
  given-names: "Yih-En"
  orcid: https://orcid.org/0000-0003-3698-3574
- family-names: "Ban"
  given-names: "Yih-En Andrew"
  orcid: https://orcid.org/0000-0003-3698-3574
- family-names: "Sorger"
  given-names: "Peter K"
  orcid: https://orcid.org/0000-0002-3364-1838
- family-names: "Mostaque"
  given-names: "Emad"
- family-names: "Zhang"
  given-names: "Zhao"
  orcid: https://orcid.org/0000-0001-5921-0035
- family-names: "Bonneau"
  given-names: "Richard"
  orcid: https://orcid.org/0000-0003-4354-7906
- family-names: "Bonneau"
  given-names: "Richard"
  orcid: https://orcid.org/0000-0003-4354-7906
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
98
99
- family-names: "AlQuraishi"
  given-names: "Mohammed"
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
100
  orcid: https://orcid.org/0000-0001-6817-1322
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
101
title: "OpenFold"
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
102
doi: 10.1101/2022.11.20.517210
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
103
date-released: 2021-11-12
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
104
url: "https://doi.org/10.1101/2022.11.20.517210"