Commit 61df432b authored by Gustaf Ahdritz's avatar Gustaf Ahdritz
Browse files

Add option to run with no MSAs

parent 72b89b59
...@@ -310,7 +310,6 @@ class AlignmentRunner: ...@@ -310,7 +310,6 @@ class AlignmentRunner:
class DataPipeline: class DataPipeline:
"""Assembles input features.""" """Assembles input features."""
def __init__( def __init__(
self, self,
template_featurizer: templates.TemplateHitFeaturizer, template_featurizer: templates.TemplateHitFeaturizer,
...@@ -362,11 +361,27 @@ class DataPipeline: ...@@ -362,11 +361,27 @@ class DataPipeline:
def _process_msa_feats( def _process_msa_feats(
self, self,
alignment_dir: str, alignment_dir: str,
input_sequence: Optional[str] = None,
) -> Mapping[str, Any]: ) -> Mapping[str, Any]:
msa_data = self._parse_msa_data(alignment_dir) msa_data = self._parse_msa_data(alignment_dir)
if(len(msa_data) == 0):
if(input_sequence is None):
raise ValueError(
"""
If the alignment dir contains no MSAs, an input sequence
must be provided.
"""
)
msa_data["dummy"] = {
"msa": [input_sequence],
"deletion_matrix": [[0 for _ in input_sequence]],
}
msas, deletion_matrices = zip(*[ msas, deletion_matrices = zip(*[
(v["msa"], v["deletion_matrix"]) for v in msa_data.values() (v["msa"], v["deletion_matrix"]) for v in msa_data.values()
]) ])
msa_features = make_msa_features( msa_features = make_msa_features(
msas=msas, msas=msas,
deletion_matrices=deletion_matrices, deletion_matrices=deletion_matrices,
...@@ -404,7 +419,7 @@ class DataPipeline: ...@@ -404,7 +419,7 @@ class DataPipeline:
num_res=num_res, num_res=num_res,
) )
msa_features = self._process_msa_feats(alignment_dir) msa_features = self._process_msa_feats(alignment_dir, input_sequence)
return { return {
**sequence_features, **sequence_features,
...@@ -442,7 +457,7 @@ class DataPipeline: ...@@ -442,7 +457,7 @@ class DataPipeline:
query_release_date=to_date(mmcif.header["release_date"]) query_release_date=to_date(mmcif.header["release_date"])
) )
msa_features = self._process_msa_feats(alignment_dir) msa_features = self._process_msa_feats(alignment_dir, input_sequence)
return {**mmcif_feats, **template_features, **msa_features} return {**mmcif_feats, **template_features, **msa_features}
...@@ -469,7 +484,7 @@ class DataPipeline: ...@@ -469,7 +484,7 @@ class DataPipeline:
self.template_featurizer, self.template_featurizer,
) )
msa_features = self._process_msa_feats(alignment_dir) msa_features = self._process_msa_feats(alignment_dir, input_sequence)
return {**pdb_feats, **template_features, **msa_features} return {**pdb_feats, **template_features, **msa_features}
...@@ -496,7 +511,7 @@ class DataPipeline: ...@@ -496,7 +511,7 @@ class DataPipeline:
self.template_featurizer, self.template_featurizer,
) )
msa_features = self._process_msa_feats(alignment_dir) msa_features = self._process_msa_feats(alignment_dir, input_sequence)
return {**core_feats, **template_features, **msa_features} return {**core_feats, **template_features, **msa_features}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment