Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
d7f3d622
Commit
d7f3d622
authored
Aug 04, 2023
by
Geoffrey Yu
Browse files
fixed TypeError: enabled must be a bool (got Tensor) when training multimer
parent
e963726b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
250 additions
and
128 deletions
+250
-128
openfold/config.py
openfold/config.py
+110
-127
openfold/model/model.py
openfold/model/model.py
+1
-1
tests/test_permutation.py
tests/test_permutation.py
+139
-0
No files found.
openfold/config.py
View file @
d7f3d622
...
@@ -155,12 +155,12 @@ def model_config(
...
@@ -155,12 +155,12 @@ def model_config(
c
.
loss
.
tm
.
weight
=
0.1
c
.
loss
.
tm
.
weight
=
0.1
elif
"multimer"
in
name
:
elif
"multimer"
in
name
:
c
.
globals
.
is_multimer
=
True
c
.
globals
.
is_multimer
=
True
c
.
globals
.
bfloat16
=
Fals
e
c
.
globals
.
bfloat16
=
Tru
e
c
.
globals
.
bfloat16_output
=
False
c
.
globals
.
bfloat16_output
=
False
c
.
loss
.
masked_msa
.
num_classes
=
22
c
.
loss
.
masked_msa
.
num_classes
=
22
c
.
data
.
common
.
max_recycling_iters
=
20
c
.
data
.
common
.
max_recycling_iters
=
20
for
k
,
v
in
multimer_model_config_update
[
'model'
]
.
items
():
for
k
,
v
in
multimer_model_config_update
.
items
():
c
.
model
[
k
]
=
v
c
.
model
[
k
]
=
v
for
k
,
v
in
multimer_model_config_update
[
'loss'
].
items
():
for
k
,
v
in
multimer_model_config_update
[
'loss'
].
items
():
...
@@ -593,12 +593,6 @@ config = mlc.ConfigDict(
...
@@ -593,12 +593,6 @@ config = mlc.ConfigDict(
"c_out"
:
37
,
"c_out"
:
37
,
},
},
},
},
# A negative value indicates that no early stopping will occur, i.e.
# the model will always run `max_recycling_iters` number of recycling
# iterations. A positive value will enable early stopping if the
# difference in pairwise distances is less than the tolerance between
# recycling steps.
"recycle_early_stop_tolerance"
:
-
1.
},
},
"relax"
:
{
"relax"
:
{
"max_iterations"
:
0
,
# no max
"max_iterations"
:
0
,
# no max
...
@@ -679,11 +673,17 @@ config = mlc.ConfigDict(
...
@@ -679,11 +673,17 @@ config = mlc.ConfigDict(
"eps"
:
eps
,
"eps"
:
eps
,
},
},
"ema"
:
{
"decay"
:
0.999
},
"ema"
:
{
"decay"
:
0.999
},
# A negative value indicates that no early stopping will occur, i.e.
# the model will always run `max_recycling_iters` number of recycling
# iterations. A positive value will enable early stopping if the
# difference in pairwise distances is less than the tolerance between
# recycling steps.
"recycle_early_stop_tolerance"
:
-
1
}
}
)
)
multimer_model_config_update
=
{
multimer_model_config_update
=
{
'model'
:{
"input_embedder"
:
{
"input_embedder"
:
{
"tf_dim"
:
21
,
"tf_dim"
:
21
,
"msa_dim"
:
49
,
"msa_dim"
:
49
,
#"num_msa": 508,
#"num_msa": 508,
...
@@ -702,146 +702,128 @@ multimer_model_config_update = {
...
@@ -702,146 +702,128 @@ multimer_model_config_update = {
},
},
"template_pair_embedder"
:
{
"template_pair_embedder"
:
{
"c_z"
:
c_z
,
"c_z"
:
c_z
,
"c_out"
:
64
,
"c_dgram"
:
39
,
"c_aatype"
:
22
,
},
"template_single_embedder"
:
{
"c_in"
:
34
,
"c_m"
:
c_m
,
"c_m"
:
c_m
,
"relpos_k"
:
32
,
"max_relative_chain"
:
2
,
"max_relative_idx"
:
32
,
"use_chain_relative"
:
True
,
},
},
"template"
:
{
"template_pair_stack"
:
{
"distogram"
:
{
"min_bin"
:
3.25
,
"max_bin"
:
50.75
,
"no_bins"
:
39
,
},
"template_pair_embedder"
:
{
"c_z"
:
c_z
,
"c_out"
:
64
,
"c_dgram"
:
39
,
"c_aatype"
:
22
,
},
"template_single_embedder"
:
{
"c_in"
:
34
,
"c_m"
:
c_m
,
},
"template_pair_stack"
:
{
"c_t"
:
c_t
,
# DISCREPANCY: c_hidden_tri_att here is given in the supplement
# as 64. In the code, it's 16.
"c_hidden_tri_att"
:
16
,
"c_hidden_tri_mul"
:
64
,
"no_blocks"
:
2
,
"no_heads"
:
4
,
"pair_transition_n"
:
2
,
"dropout_rate"
:
0.25
,
"tri_mul_first"
:
True
,
"fuse_projection_weights"
:
True
,
"blocks_per_ckpt"
:
blocks_per_ckpt
,
"inf"
:
1e9
,
},
"c_t"
:
c_t
,
"c_t"
:
c_t
,
"c_z"
:
c_z
,
# DISCREPANCY: c_hidden_tri_att here is given in the supplement
"inf"
:
1e5
,
# 1e9,
# as 64. In the code, it's 16.
"eps"
:
eps
,
# 1e-6,
"c_hidden_tri_att"
:
16
,
"enabled"
:
templates_enabled
,
"c_hidden_tri_mul"
:
64
,
"embed_angles"
:
embed_template_torsion_angles
,
"no_blocks"
:
2
,
"use_unit_vector"
:
True
"no_heads"
:
4
,
"pair_transition_n"
:
2
,
"dropout_rate"
:
0.25
,
"tri_mul_first"
:
True
,
"fuse_projection_weights"
:
True
,
"blocks_per_ckpt"
:
blocks_per_ckpt
,
"inf"
:
1e9
,
},
},
"extra_msa"
:
{
"c_t"
:
c_t
,
"extra_msa_embedder"
:
{
"c_z"
:
c_z
,
"c_in"
:
25
,
"inf"
:
1e5
,
# 1e9,
"c_out"
:
c_e
,
"eps"
:
eps
,
# 1e-6,
#"num_extra_msa": 2048
"enabled"
:
templates_enabled
,
},
"embed_angles"
:
embed_template_torsion_angles
,
"extra_msa_stack"
:
{
"use_unit_vector"
:
True
"c_m"
:
c_e
,
},
"c_z"
:
c_z
,
"extra_msa"
:
{
"c_hidden_msa_att"
:
8
,
"extra_msa_embedder"
:
{
"c_hidden_opm"
:
32
,
"c_in"
:
25
,
"c_hidden_mul"
:
128
,
"c_out"
:
c_e
,
"c_hidden_pair_att"
:
32
,
#"num_extra_msa": 2048
"no_heads_msa"
:
8
,
"no_heads_pair"
:
4
,
"no_blocks"
:
4
,
"transition_n"
:
4
,
"msa_dropout"
:
0.15
,
"pair_dropout"
:
0.25
,
"opm_first"
:
True
,
"fuse_projection_weights"
:
True
,
"clear_cache_between_blocks"
:
True
,
"inf"
:
1e9
,
"eps"
:
eps
,
# 1e-10,
"ckpt"
:
blocks_per_ckpt
is
not
None
,
},
"enabled"
:
True
,
},
},
"e
voformer
_stack"
:
{
"e
xtra_msa
_stack"
:
{
"c_m"
:
c_
m
,
"c_m"
:
c_
e
,
"c_z"
:
c_z
,
"c_z"
:
c_z
,
"c_hidden_msa_att"
:
32
,
"c_hidden_msa_att"
:
8
,
"c_hidden_opm"
:
32
,
"c_hidden_opm"
:
32
,
"c_hidden_mul"
:
128
,
"c_hidden_mul"
:
128
,
"c_hidden_pair_att"
:
32
,
"c_hidden_pair_att"
:
32
,
"c_s"
:
c_s
,
"no_heads_msa"
:
8
,
"no_heads_msa"
:
8
,
"no_heads_pair"
:
4
,
"no_heads_pair"
:
4
,
"no_blocks"
:
4
8
,
"no_blocks"
:
4
,
"transition_n"
:
4
,
"transition_n"
:
4
,
"msa_dropout"
:
0.15
,
"msa_dropout"
:
0.15
,
"pair_dropout"
:
0.25
,
"pair_dropout"
:
0.25
,
"opm_first"
:
True
,
"opm_first"
:
True
,
"fuse_projection_weights"
:
True
,
"fuse_projection_weights"
:
True
,
"blocks_per_ckpt"
:
blocks_per_ckpt
,
"clear_cache_between_blocks"
:
True
,
"clear_cache_between_blocks"
:
False
,
"inf"
:
1e9
,
"inf"
:
1e9
,
"eps"
:
eps
,
# 1e-10,
"eps"
:
eps
,
# 1e-10,
"ckpt"
:
blocks_per_ckpt
is
not
None
,
},
},
"structure_module"
:
{
"enabled"
:
True
,
"c_s"
:
c_s
,
},
"evoformer_stack"
:
{
"c_m"
:
c_m
,
"c_z"
:
c_z
,
"c_hidden_msa_att"
:
32
,
"c_hidden_opm"
:
32
,
"c_hidden_mul"
:
128
,
"c_hidden_pair_att"
:
32
,
"c_s"
:
c_s
,
"no_heads_msa"
:
8
,
"no_heads_pair"
:
4
,
"no_blocks"
:
48
,
"transition_n"
:
4
,
"msa_dropout"
:
0.15
,
"pair_dropout"
:
0.25
,
"opm_first"
:
True
,
"fuse_projection_weights"
:
True
,
"blocks_per_ckpt"
:
blocks_per_ckpt
,
"clear_cache_between_blocks"
:
False
,
"inf"
:
1e9
,
"eps"
:
eps
,
# 1e-10,
},
"structure_module"
:
{
"c_s"
:
c_s
,
"c_z"
:
c_z
,
"c_ipa"
:
16
,
"c_resnet"
:
128
,
"no_heads_ipa"
:
12
,
"no_qk_points"
:
4
,
"no_v_points"
:
8
,
"dropout_rate"
:
0.1
,
"no_blocks"
:
8
,
"no_transition_layers"
:
1
,
"no_resnet_blocks"
:
2
,
"no_angles"
:
7
,
"trans_scale_factor"
:
20
,
"epsilon"
:
eps
,
# 1e-12,
"inf"
:
1e5
,
},
"heads"
:
{
"lddt"
:
{
"no_bins"
:
50
,
"c_in"
:
c_s
,
"c_hidden"
:
128
,
},
"distogram"
:
{
"c_z"
:
c_z
,
"c_z"
:
c_z
,
"c_ipa"
:
16
,
"no_bins"
:
aux_distogram_bins
,
"c_resnet"
:
128
,
"no_heads_ipa"
:
12
,
"no_qk_points"
:
4
,
"no_v_points"
:
8
,
"dropout_rate"
:
0.1
,
"no_blocks"
:
8
,
"no_transition_layers"
:
1
,
"no_resnet_blocks"
:
2
,
"no_angles"
:
7
,
"trans_scale_factor"
:
20
,
"epsilon"
:
eps
,
# 1e-12,
"inf"
:
1e5
,
},
},
"heads"
:
{
"tm"
:
{
"lddt"
:
{
"c_z"
:
c_z
,
"no_bins"
:
50
,
"no_bins"
:
aux_distogram_bins
,
"c_in"
:
c_s
,
"ptm_weight"
:
0.2
,
"c_hidden"
:
128
,
"iptm_weight"
:
0.8
,
},
"enabled"
:
True
,
"distogram"
:
{
},
"c_z"
:
c_z
,
"masked_msa"
:
{
"no_bins"
:
aux_distogram_bins
,
"c_m"
:
c_m
,
},
"c_out"
:
22
,
"tm"
:
{
},
"c_z"
:
c_z
,
"experimentally_resolved"
:
{
"no_bins"
:
aux_distogram_bins
,
"c_s"
:
c_s
,
"ptm_weight"
:
0.2
,
"c_out"
:
37
,
"iptm_weight"
:
0.8
,
"enabled"
:
True
,
},
"masked_msa"
:
{
"c_m"
:
c_m
,
"c_out"
:
22
,
},
"experimentally_resolved"
:
{
"c_s"
:
c_s
,
"c_out"
:
37
,
},
},
},
"recycle_early_stop_tolerance"
:
0.5
},
"recycle_early_stop_tolerance"
:
0.5
},
},
"loss"
:
{
"loss"
:
{
"distogram"
:
{
"distogram"
:
{
...
@@ -919,4 +901,5 @@ multimer_model_config_update = {
...
@@ -919,4 +901,5 @@ multimer_model_config_update = {
},
},
"eps"
:
eps
,
"eps"
:
eps
,
},
},
"recycle_early_stop_tolerance"
:
0.5
}
}
openfold/model/model.py
View file @
d7f3d622
...
@@ -190,7 +190,7 @@ class AlphaFold(nn.Module):
...
@@ -190,7 +190,7 @@ class AlphaFold(nn.Module):
sq_diff
=
(
distances
(
prev_pos
[...,
ca_idx
,
:])
-
distances
(
next_pos
[...,
ca_idx
,
:]))
**
2
sq_diff
=
(
distances
(
prev_pos
[...,
ca_idx
,
:])
-
distances
(
next_pos
[...,
ca_idx
,
:]))
**
2
mask
=
mask
[...,
None
]
*
mask
[...,
None
,
:]
mask
=
mask
[...,
None
]
*
mask
[...,
None
,
:]
sq_diff
=
masked_mean
(
mask
=
mask
,
value
=
sq_diff
,
dim
=
list
(
range
(
len
(
mask
.
shape
))))
sq_diff
=
masked_mean
(
mask
=
mask
,
value
=
sq_diff
,
dim
=
list
(
range
(
len
(
mask
.
shape
))))
diff
=
torch
.
sqrt
(
sq_diff
+
eps
)
diff
=
torch
.
sqrt
(
sq_diff
+
eps
)
.
item
()
return
diff
<=
self
.
config
.
recycle_early_stop_tolerance
return
diff
<=
self
.
config
.
recycle_early_stop_tolerance
def
iteration
(
self
,
feats
,
prevs
,
_recycle
=
True
):
def
iteration
(
self
,
feats
,
prevs
,
_recycle
=
True
):
...
...
tests/test_permutation.py
0 → 100644
View file @
d7f3d622
# Copyright 2021 AlQuraishi Laboratory
# Dingquan Yu @ EMBL-Hamburg Kosinski group
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
pathlib
import
Path
import
pickle
import
torch
import
torch.nn
as
nn
import
numpy
as
np
import
unittest
from
openfold.config
import
model_config
from
openfold.data
import
data_transforms
from
openfold.model.model
import
AlphaFold
from
openfold.utils.loss
import
AlphaFoldMultimerLoss
from
openfold.utils.tensor_utils
import
tensor_tree_map
from
tests.config
import
consts
import
logging
logger
=
logging
.
getLogger
(
__name__
)
import
os
from
tests.data_utils
import
(
random_template_feats
,
random_extra_msa_feats
,
random_affines_vector
)
from
openfold.utils.rigid_utils
import
(
Rigid
,
)
class
TestPermutation
(
unittest
.
TestCase
):
def
setUp
(
self
):
"""
Firstly setup model configs and model as in
test_model.py
In the test case, use PDB ID 1e4k as the label
"""
self
.
test_data_dir
=
os
.
path
.
join
(
os
.
getcwd
(),
"tests/test_data"
)
self
.
label_ids
=
[
'label_1'
,
'label_1'
,
'label_2'
,
'label_2'
,
'label_2'
]
self
.
asym_id
=
[
0
]
*
9
+
[
1
]
*
9
+
[
2
]
*
13
+
[
3
]
*
13
+
[
4
]
*
13
def
affine_vector_to_4x4
(
self
,
affine
):
r
=
Rigid
.
from_tensor_7
(
affine
)
return
r
.
to_tensor_4x4
()
def
test_dry_run
(
self
):
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
"0"
os
.
environ
[
"CUDA_LAUNCH_BLOCKING"
]
=
"1"
n_seq
=
consts
.
n_seq
n_templ
=
consts
.
n_templ
n_res
=
len
(
self
.
asym_id
)
n_extra_seq
=
consts
.
n_extra
c
=
model_config
(
consts
.
model
,
train
=
True
)
c
.
loss
.
masked_msa
.
num_classes
=
22
# somehow need overwrite this part in multimer loss config
c
.
model
.
evoformer_stack
.
no_blocks
=
4
# no need to go overboard here
c
.
model
.
evoformer_stack
.
blocks_per_ckpt
=
None
# don't want to set up
# deepspeed for this test
model
=
AlphaFold
(
c
)
multimer_loss
=
AlphaFoldMultimerLoss
(
c
.
loss
)
example_label
=
[
pickle
.
load
(
open
(
os
.
path
.
join
(
self
.
test_data_dir
,
f
"
{
i
}
.pkl"
),
'rb'
))
for
i
in
self
.
label_ids
]
batch
=
{}
tf
=
torch
.
randint
(
c
.
model
.
input_embedder
.
tf_dim
-
1
,
size
=
(
n_res
,))
batch
[
"target_feat"
]
=
nn
.
functional
.
one_hot
(
tf
,
c
.
model
.
input_embedder
.
tf_dim
).
float
()
batch
[
"aatype"
]
=
torch
.
argmax
(
batch
[
"target_feat"
],
dim
=-
1
)
batch
[
"residue_index"
]
=
torch
.
arange
(
n_res
)
backbone_dict
=
{
"backbone_affine_tensor"
:
torch
.
tensor
(
random_affines_vector
((
n_res
,))),
"backbone_affine_mask"
:
torch
.
from_numpy
(
np
.
random
.
randint
(
0
,
2
,
(
n_res
,)).
astype
(
np
.
float32
)),
"use_clamped_fape"
:
torch
.
from_numpy
(
np
.
array
(
0.0
)),
}
batch
[
'backbone_rigid_tensor'
]
=
self
.
affine_vector_to_4x4
(
backbone_dict
[
'backbone_affine_tensor'
])
batch
[
'backbone_rigid_mask'
]
=
backbone_dict
[
'backbone_affine_mask'
]
true_msa_dict
=
{
"true_msa"
:
torch
.
tensor
(
np
.
random
.
randint
(
0
,
21
,
(
n_seq
,
n_res
))),
"bert_mask"
:
torch
.
tensor
(
np
.
random
.
randint
(
0
,
2
,
(
n_seq
,
n_res
)).
astype
(
np
.
float32
)
)
}
batch
.
update
(
true_msa_dict
)
batch
[
"msa_feat"
]
=
torch
.
rand
((
n_seq
,
n_res
,
c
.
model
.
input_embedder
.
msa_dim
))
t_feats
=
random_template_feats
(
n_templ
,
n_res
)
batch
.
update
({
k
:
torch
.
tensor
(
v
)
for
k
,
v
in
t_feats
.
items
()})
extra_feats
=
random_extra_msa_feats
(
n_extra_seq
,
n_res
)
batch
.
update
({
k
:
torch
.
tensor
(
v
)
for
k
,
v
in
extra_feats
.
items
()})
batch
[
"msa_mask"
]
=
torch
.
randint
(
low
=
0
,
high
=
2
,
size
=
(
n_seq
,
n_res
)
).
float
()
batch
[
"seq_mask"
]
=
torch
.
randint
(
low
=
0
,
high
=
2
,
size
=
(
n_res
,)).
float
()
batch
.
update
(
data_transforms
.
make_atom14_masks
(
batch
))
batch
[
"no_recycling_iters"
]
=
torch
.
tensor
(
2.
)
batch
[
"seq_length"
]
=
torch
.
from_numpy
(
np
.
array
([
n_res
]
*
n_res
))
if
consts
.
is_multimer
:
#
# Modify asym_id, entity_id and sym_id so that it encodes
# 2 chains
# #
asym_id
=
self
.
asym_id
batch
[
"asym_id"
]
=
torch
.
tensor
(
asym_id
,
dtype
=
torch
.
float64
)
# batch["entity_id"] = torch.randint(0, 1, size=(n_res,))
batch
[
'entity_id'
]
=
torch
.
tensor
([
0
]
*
18
+
[
1
]
*
39
,
dtype
=
torch
.
float64
)
batch
[
"sym_id"
]
=
torch
.
tensor
(
asym_id
,
dtype
=
torch
.
float64
)
# batch["num_sym"] = torch.tensor([1]*18+[2]*13,dtype=torch.int64) # currently there are just 2 chains
batch
[
"extra_deletion_matrix"
]
=
torch
.
randint
(
0
,
2
,
size
=
(
n_extra_seq
,
n_res
))
add_recycling_dims
=
lambda
t
:
(
t
.
unsqueeze
(
-
1
).
expand
(
*
t
.
shape
,
c
.
data
.
common
.
max_recycling_iters
)
)
add_batch_size_dimension
=
lambda
t
:
(
t
.
unsqueeze
(
0
)
)
batch
=
tensor_tree_map
(
add_recycling_dims
,
batch
)
batch
=
tensor_tree_map
(
add_batch_size_dimension
,
batch
)
with
torch
.
no_grad
():
out
=
model
(
batch
)
print
(
f
"finished foward on batch with batch_size dim"
)
multimer_loss
(
out
,(
batch
,
example_label
))
# print(f"permuated_labels is {type(permutated_labels)} and keys are:\n {permutated_labels.keys()}")
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment