Commit ed3ecabf authored by Augustin Zidek's avatar Augustin Zidek Committed by Copybara-Service
Browse files

Add query_multiple to Jackhmmer.

This enables searching with multiple queries against a db chunk without having to re-download it each time.

PiperOrigin-RevId: 495553498
Change-Id: I5e3df1cc31cdcef591a1516797f0372f171e413c
parent dd643b1e
......@@ -167,10 +167,20 @@ class Jackhmmer:
input_fasta_path: str,
max_sequences: Optional[int] = None) -> Sequence[Mapping[str, Any]]:
"""Queries the database using Jackhmmer."""
return self.query_multiple([input_fasta_path], max_sequences)[0]
def query_multiple(
self,
input_fasta_paths: Sequence[str],
max_sequences: Optional[int] = None,
) -> Sequence[Sequence[Mapping[str, Any]]]:
"""Queries the database for multiple queries using Jackhmmer."""
if self.num_streamed_chunks is None:
single_chunk_result = self._query_chunk(
input_fasta_path, self.database_path, max_sequences)
return [single_chunk_result]
single_chunk_results = []
for input_fasta_path in input_fasta_paths:
single_chunk_results.append([self._query_chunk(
input_fasta_path, self.database_path, max_sequences)])
return single_chunk_results
db_basename = os.path.basename(self.database_path)
db_remote_chunk = lambda db_idx: f'{self.database_path}.{db_idx}'
......@@ -185,7 +195,7 @@ class Jackhmmer:
# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
with futures.ThreadPoolExecutor(max_workers=2) as executor:
chunked_output = []
chunked_outputs = [[] for _ in range(len(input_fasta_paths))]
for i in range(1, self.num_streamed_chunks + 1):
# Copy the chunk locally
if i == 1:
......@@ -197,9 +207,9 @@ class Jackhmmer:
# Run Jackhmmer with the chunk
future.result()
chunked_output.append(self._query_chunk(
input_fasta_path, db_local_chunk(i), max_sequences))
for fasta_index, input_fasta_path in enumerate(input_fasta_paths):
chunked_outputs[fasta_index].append(self._query_chunk(
input_fasta_path, db_local_chunk(i), max_sequences))
# Remove the local copy of the chunk
os.remove(db_local_chunk(i))
# Do not set next_future for the last chunk so that this works even for
......@@ -208,4 +218,4 @@ class Jackhmmer:
future = next_future
if self.streaming_callback:
self.streaming_callback(i)
return chunked_output
return chunked_outputs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment