Unverified Commit 96f46b08 authored by Jennifer Wei's avatar Jennifer Wei Committed by GitHub
Browse files

Merge pull request #375 from aqlaboratory/jackhmmer_query_update

Adds query_multiple to jackhammer.py
parents 2c36cb8b 4958683b
...@@ -186,17 +186,26 @@ class Jackhmmer: ...@@ -186,17 +186,26 @@ class Jackhmmer:
) )
return raw_output return raw_output
def query(self, def query(self,
input_fasta_path: str, input_fasta_path: str,
max_sequences: Optional[int] = None max_sequences: Optional[int] = None
) -> Sequence[Mapping[str, Any]]: ) -> Sequence[Mapping[str, Any]]:
return self.query_multiple([input_fasta_path], max_sequences)[0]
def query_multiple(self,
input_fasta_paths: str,
max_sequences: Optional[int] = None
) -> Sequence[Sequence[Mapping[str, Any]]]:
"""Queries the database using Jackhmmer.""" """Queries the database using Jackhmmer."""
if self.num_streamed_chunks is None: if self.num_streamed_chunks is None:
single_chunk_result = self._query_chunk( single_chunk_results = []
input_fasta_path, self.database_path, max_sequences, for input_fasta_path in input_fasta_paths:
) single_chunk_result = self._query_chunk(
return [single_chunk_result] input_fasta_path, self.database_path, max_sequences,
)
single_chunk_results.append(single_chunk_result)
return single_chunk_results
db_basename = os.path.basename(self.database_path) db_basename = os.path.basename(self.database_path)
db_remote_chunk = lambda db_idx: f"{self.database_path}.{db_idx}" db_remote_chunk = lambda db_idx: f"{self.database_path}.{db_idx}"
...@@ -211,7 +220,7 @@ class Jackhmmer: ...@@ -211,7 +220,7 @@ class Jackhmmer:
# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk # Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
with futures.ThreadPoolExecutor(max_workers=2) as executor: with futures.ThreadPoolExecutor(max_workers=2) as executor:
chunked_output = [] chunked_outputs = [[] for _ in range(len(input_fasta_paths))]
for i in range(1, self.num_streamed_chunks + 1): for i in range(1, self.num_streamed_chunks + 1):
# Copy the chunk locally # Copy the chunk locally
if i == 1: if i == 1:
...@@ -229,21 +238,21 @@ class Jackhmmer: ...@@ -229,21 +238,21 @@ class Jackhmmer:
# Run Jackhmmer with the chunk # Run Jackhmmer with the chunk
future.result() future.result()
chunked_output.append( for fasta_idx, input_fasta_path in enumerate(input_fasta_paths):
self._query_chunk( chunked_outputs[fasta_idx].append(
input_fasta_path, self._query_chunk(
db_local_chunk(i), input_fasta_path,
max_sequences db_local_chunk(i),
max_sequences
)
) )
)
# Remove the local copy of the chunk # Remove the local copy of the chunk
os.remove(db_local_chunk(i)) os.remove(db_local_chunk(i))
future = next_future
# Do not set next_future for the last chunk so that this works # Do not set next_future for the last chunk so that this works
# even for databases with only 1 chunk # even for databases with only 1 chunk
if(i < self.num_streamed_chunks): if(i < self.num_streamed_chunks):
future = next_future future = next_future
if self.streaming_callback: if self.streaming_callback:
self.streaming_callback(i) self.streaming_callback(i)
return chunked_output return chunked_outputs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment