Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
6f26b0ad
"runtime/rust/python-wheel/examples/error_handling/run.py" did not exist on "ffbc06ccf7c9abb40123f3d6ea047caff4609c6c"
Unverified
Commit
6f26b0ad
authored
Dec 06, 2023
by
Dingquan Yu
Committed by
GitHub
Dec 06, 2023
Browse files
Merge branch 'multimer' into speedup-dataloader
parents
78ecfc64
58d65692
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
16 deletions
+26
-16
openfold/data/data_pipeline.py
openfold/data/data_pipeline.py
+2
-1
openfold/data/tools/jackhmmer.py
openfold/data/tools/jackhmmer.py
+23
-14
openfold/model/structure_module.py
openfold/model/structure_module.py
+1
-1
No files found.
openfold/data/data_pipeline.py
View file @
6f26b0ad
...
@@ -726,7 +726,8 @@ class DataPipeline:
...
@@ -726,7 +726,8 @@ class DataPipeline:
)
)
# The "hmm_output" exception is a crude way to exclude
# The "hmm_output" exception is a crude way to exclude
# multimer template hits.
# multimer template hits.
elif
(
ext
==
".sto"
and
not
"hmm_output"
==
filename
):
# Multimer "uniprot_hits" processed separately.
elif
(
ext
==
".sto"
and
filename
not
in
[
"uniprot_hits"
,
"hmm_output"
]):
msa
=
parsers
.
parse_stockholm
(
read_msa
(
start
,
size
))
msa
=
parsers
.
parse_stockholm
(
read_msa
(
start
,
size
))
else
:
else
:
continue
continue
...
...
openfold/data/tools/jackhmmer.py
View file @
6f26b0ad
...
@@ -191,12 +191,21 @@ class Jackhmmer:
...
@@ -191,12 +191,21 @@ class Jackhmmer:
input_fasta_path
:
str
,
input_fasta_path
:
str
,
max_sequences
:
Optional
[
int
]
=
None
max_sequences
:
Optional
[
int
]
=
None
)
->
Sequence
[
Mapping
[
str
,
Any
]]:
)
->
Sequence
[
Mapping
[
str
,
Any
]]:
return
self
.
query_multiple
([
input_fasta_path
],
max_sequences
)[
0
]
def
query_multiple
(
self
,
input_fasta_paths
:
str
,
max_sequences
:
Optional
[
int
]
=
None
)
->
Sequence
[
Sequence
[
Mapping
[
str
,
Any
]]]:
"""Queries the database using Jackhmmer."""
"""Queries the database using Jackhmmer."""
if
self
.
num_streamed_chunks
is
None
:
if
self
.
num_streamed_chunks
is
None
:
single_chunk_results
=
[]
for
input_fasta_path
in
input_fasta_paths
:
single_chunk_result
=
self
.
_query_chunk
(
single_chunk_result
=
self
.
_query_chunk
(
input_fasta_path
,
self
.
database_path
,
max_sequences
,
input_fasta_path
,
self
.
database_path
,
max_sequences
,
)
)
return
[
single_chunk_result
]
single_chunk_results
.
append
(
single_chunk_result
)
return
single_chunk_results
db_basename
=
os
.
path
.
basename
(
self
.
database_path
)
db_basename
=
os
.
path
.
basename
(
self
.
database_path
)
db_remote_chunk
=
lambda
db_idx
:
f
"
{
self
.
database_path
}
.
{
db_idx
}
"
db_remote_chunk
=
lambda
db_idx
:
f
"
{
self
.
database_path
}
.
{
db_idx
}
"
...
@@ -211,7 +220,7 @@ class Jackhmmer:
...
@@ -211,7 +220,7 @@ class Jackhmmer:
# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
with
futures
.
ThreadPoolExecutor
(
max_workers
=
2
)
as
executor
:
with
futures
.
ThreadPoolExecutor
(
max_workers
=
2
)
as
executor
:
chunked_output
=
[]
chunked_output
s
=
[
[]
for
_
in
range
(
len
(
input_fasta_paths
))
]
for
i
in
range
(
1
,
self
.
num_streamed_chunks
+
1
):
for
i
in
range
(
1
,
self
.
num_streamed_chunks
+
1
):
# Copy the chunk locally
# Copy the chunk locally
if
i
==
1
:
if
i
==
1
:
...
@@ -229,7 +238,8 @@ class Jackhmmer:
...
@@ -229,7 +238,8 @@ class Jackhmmer:
# Run Jackhmmer with the chunk
# Run Jackhmmer with the chunk
future
.
result
()
future
.
result
()
chunked_output
.
append
(
for
fasta_idx
,
input_fasta_path
in
enumerate
(
input_fasta_paths
):
chunked_outputs
[
fasta_idx
].
append
(
self
.
_query_chunk
(
self
.
_query_chunk
(
input_fasta_path
,
input_fasta_path
,
db_local_chunk
(
i
),
db_local_chunk
(
i
),
...
@@ -239,11 +249,10 @@ class Jackhmmer:
...
@@ -239,11 +249,10 @@ class Jackhmmer:
# Remove the local copy of the chunk
# Remove the local copy of the chunk
os
.
remove
(
db_local_chunk
(
i
))
os
.
remove
(
db_local_chunk
(
i
))
future
=
next_future
# Do not set next_future for the last chunk so that this works
# Do not set next_future for the last chunk so that this works
# even for databases with only 1 chunk
# even for databases with only 1 chunk
if
(
i
<
self
.
num_streamed_chunks
):
if
(
i
<
self
.
num_streamed_chunks
):
future
=
next_future
future
=
next_future
if
self
.
streaming_callback
:
if
self
.
streaming_callback
:
self
.
streaming_callback
(
i
)
self
.
streaming_callback
(
i
)
return
chunked_output
return
chunked_output
s
openfold/model/structure_module.py
View file @
6f26b0ad
...
@@ -716,7 +716,7 @@ class InvariantPointAttentionMultimer(nn.Module):
...
@@ -716,7 +716,7 @@ class InvariantPointAttentionMultimer(nn.Module):
o_pt_norm
=
o_pt
.
norm
(
epsilon
=
1e-8
)
o_pt_norm
=
o_pt
.
norm
(
epsilon
=
1e-8
)
if
(
_offload_inference
):
if
(
_offload_inference
):
z
[
0
]
=
z
[
0
].
to
(
o_pt
.
device
)
z
[
0
]
=
z
[
0
].
to
(
o_pt
.
x
.
device
)
o_pair
=
torch
.
einsum
(
'...ijh, ...ijc->...ihc'
,
a
,
z
[
0
].
to
(
dtype
=
a
.
dtype
))
o_pair
=
torch
.
einsum
(
'...ijh, ...ijc->...ihc'
,
a
,
z
[
0
].
to
(
dtype
=
a
.
dtype
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment