"tests/vscode:/vscode.git/clone" did not exist on "7a47df22a5db7366f83a943ea5f56f509a9a0330"
Unverified Commit 56a32c3b authored by Yuanqi Wang's avatar Yuanqi Wang Committed by GitHub
Browse files

Add more parameter functionality to HHBlits and HHSearch runners (#51)



* [ray] use ray for DB queries

* patched openmm

* add test exec script

* remove output before exec

* [tools] add coverage param to HHBlits

* [tools] add more param options to HHSearch

* [tools] remove ray from JackHmmer runner

* [tools] add comment for cov param in hhblits

* [tools] cleanup all debug code

* [tools] restore original files
Co-authored-by: default avatarshenggan <csg19971016@gmail.com>
parent ae44a3b7
......@@ -46,6 +46,7 @@ class HHBlits:
alt: Optional[int] = None,
p: int = _HHBLITS_DEFAULT_P,
z: int = _HHBLITS_DEFAULT_Z,
cov: int = 0,
):
"""Initializes the Python HHblits wrapper.
......@@ -71,6 +72,8 @@ class HHBlits:
HHblits default: 20.
z: Hard cap on number of hits reported in the hhr file.
HHblits default: 500. NB: The relevant HHblits flag is -Z not -z.
cov: Minimum coverage with master sequence (%).
HHBlits default: 0
Raises:
RuntimeError: If HHblits binary not found within the path.
......@@ -98,6 +101,7 @@ class HHBlits:
self.alt = alt
self.p = p
self.z = z
self.cov = cov
def query(self, input_fasta_path: str) -> Mapping[str, Any]:
"""Queries the database using HHblits."""
......@@ -139,6 +143,8 @@ class HHBlits:
cmd += ["-p", str(self.p)]
if self.z != _HHBLITS_DEFAULT_Z:
cmd += ["-Z", str(self.z)]
if self.cov:
cmd += ["-cov", str(self.cov)]
cmd += db_cmd
logging.info('Launching subprocess "%s"', " ".join(cmd))
......
......@@ -18,7 +18,7 @@ import glob
import logging
import os
import subprocess
from typing import Sequence
from typing import Sequence, Union
from fastfold.data.tools import utils
......@@ -33,6 +33,14 @@ class HHSearch:
databases: Sequence[str],
n_cpu: int = 2,
maxseq: int = 1_000_000,
mact: float = 0.35,
min_align: int = 10,
max_align: int = 500,
min_lines: int = 10,
max_lines: int = 500,
aliw: int = 100000,
e_value: float = 0.001,
min_prob: float = 20.0,
):
"""Initializes the Python HHsearch wrapper.
......@@ -44,6 +52,15 @@ class HHSearch:
n_cpu: The number of CPUs to use
maxseq: The maximum number of rows in an input alignment. Note that this
parameter is only supported in HHBlits version 3.1 and higher.
mact: Posterior probability threshold for MAC realignment controlling greediness at alignment
ends.
min_align: Minimum number of alignments in alignment list. (-b)
max_align: Maximum number of alignments in alignment list. (-B)
min_lines: Minimum number of lines in summary hit list. (-z)
max_lines: Maximum number of lines in summary hit list. (-Z)
aliw: Number of columns per line in alignment list.
e_value: E-value cutoff for inclusion in result alignment. (-e)
min_prob: Minimum probability in summary and alignment list. (-p)
Raises:
RuntimeError: If HHsearch binary not found within the path.
......@@ -52,6 +69,14 @@ class HHSearch:
self.databases = databases
self.n_cpu = n_cpu
self.maxseq = maxseq
self.mact = mact
self.min_align = min_align
self.max_align = max_align
self.min_lines = min_lines
self.max_lines = max_lines
self.aliw = aliw
self.e_value = e_value
self.min_prob = min_prob
for database_path in self.databases:
if not glob.glob(database_path + "_*"):
......@@ -62,11 +87,12 @@ class HHSearch:
f"Could not find HHsearch database {database_path}"
)
def query(self, a3m: str) -> str:
def query(self, a3m: str, gen_atab: bool = False) -> Union[str, tuple]:
"""Queries the database using HHsearch using a given a3m."""
with utils.tmpdir_manager(base_dir="/tmp") as query_tmp_dir:
input_path = os.path.join(query_tmp_dir, "query.a3m")
hhr_path = os.path.join(query_tmp_dir, "output.hhr")
atab_path = os.path.join(query_tmp_dir, "output.atab")
with open(input_path, "w") as f:
f.write(a3m)
......@@ -84,7 +110,25 @@ class HHSearch:
str(self.maxseq),
"-cpu",
str(self.n_cpu),
"-b",
str(self.min_align),
"-B",
str(self.max_align),
"-z",
str(self.min_lines),
"-Z",
str(self.max_lines),
"-mact",
str(self.mact),
"-aliw",
str(self.aliw),
"-e",
str(self.e_value),
"-p",
str(self.min_prob),
] + db_cmd
if gen_atab:
cmd += ["-atab", atab_path]
logging.info('Launching subprocess "%s"', " ".join(cmd))
process = subprocess.Popen(
......@@ -103,4 +147,10 @@ class HHSearch:
with open(hhr_path) as f:
hhr = f.read()
return hhr
if gen_atab:
with open(atab_path) as f:
atab = f.read()
if gen_atab:
return hhr, atab
else:
return hhr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment