Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
96f46b08
Unverified
Commit
96f46b08
authored
Dec 05, 2023
by
Jennifer Wei
Committed by
GitHub
Dec 05, 2023
Browse files
Merge pull request #375 from aqlaboratory/jackhmmer_query_update
Adds query_multiple to jackhammer.py
parents
2c36cb8b
4958683b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
14 deletions
+23
-14
openfold/data/tools/jackhmmer.py
openfold/data/tools/jackhmmer.py
+23
-14
No files found.
openfold/data/tools/jackhmmer.py
View file @
96f46b08
...
...
@@ -186,17 +186,26 @@ class Jackhmmer:
)
return
raw_output
def
query
(
self
,
input_fasta_path
:
str
,
max_sequences
:
Optional
[
int
]
=
None
)
->
Sequence
[
Mapping
[
str
,
Any
]]:
return
self
.
query_multiple
([
input_fasta_path
],
max_sequences
)[
0
]
def
query_multiple
(
self
,
input_fasta_paths
:
str
,
max_sequences
:
Optional
[
int
]
=
None
)
->
Sequence
[
Sequence
[
Mapping
[
str
,
Any
]]]:
"""Queries the database using Jackhmmer."""
if
self
.
num_streamed_chunks
is
None
:
single_chunk_result
=
self
.
_query_chunk
(
input_fasta_path
,
self
.
database_path
,
max_sequences
,
)
return
[
single_chunk_result
]
single_chunk_results
=
[]
for
input_fasta_path
in
input_fasta_paths
:
single_chunk_result
=
self
.
_query_chunk
(
input_fasta_path
,
self
.
database_path
,
max_sequences
,
)
single_chunk_results
.
append
(
single_chunk_result
)
return
single_chunk_results
db_basename
=
os
.
path
.
basename
(
self
.
database_path
)
db_remote_chunk
=
lambda
db_idx
:
f
"
{
self
.
database_path
}
.
{
db_idx
}
"
...
...
@@ -211,7 +220,7 @@ class Jackhmmer:
# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
with
futures
.
ThreadPoolExecutor
(
max_workers
=
2
)
as
executor
:
chunked_output
=
[]
chunked_output
s
=
[
[]
for
_
in
range
(
len
(
input_fasta_paths
))
]
for
i
in
range
(
1
,
self
.
num_streamed_chunks
+
1
):
# Copy the chunk locally
if
i
==
1
:
...
...
@@ -229,21 +238,21 @@ class Jackhmmer:
# Run Jackhmmer with the chunk
future
.
result
()
chunked_output
.
append
(
self
.
_query_chunk
(
input_fasta_path
,
db_local_chunk
(
i
),
max_sequences
for
fasta_idx
,
input_fasta_path
in
enumerate
(
input_fasta_paths
):
chunked_outputs
[
fasta_idx
].
append
(
self
.
_query_chunk
(
input_fasta_path
,
db_local_chunk
(
i
),
max_sequences
)
)
)
# Remove the local copy of the chunk
os
.
remove
(
db_local_chunk
(
i
))
future
=
next_future
# Do not set next_future for the last chunk so that this works
# even for databases with only 1 chunk
if
(
i
<
self
.
num_streamed_chunks
):
future
=
next_future
if
self
.
streaming_callback
:
self
.
streaming_callback
(
i
)
return
chunked_output
return
chunked_output
s
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment