Commit 08ef6e9f authored by Sachin Kadyan's avatar Sachin Kadyan
Browse files

Add sequence embedding mode option to .core file parser

parent 395a9f1b
......@@ -240,6 +240,7 @@ class OpenFoldSingleDataset(torch.utils.data.Dataset):
elif(ext == ".core"):
data = self.data_pipeline.process_core(
path, alignment_dir, alignment_index,
seqemb_mode=self.config.seqemb_mode.enabled,
)
elif(ext == ".pdb"):
structure_index = None
......
......@@ -802,6 +802,7 @@ class DataPipeline:
core_path: str,
alignment_dir: str,
alignment_index: Optional[str] = None,
seqemb_mode: bool = False,
) -> FeatureDict:
"""
Assembles features for a protein in a ProteinNet .core file.
......@@ -821,9 +822,15 @@ class DataPipeline:
self.template_featurizer,
)
msa_features = self._process_msa_feats(alignment_dir, input_sequence)
sequence_embedding_features = {}
# If in sequence embedding mode, generate dummy MSA features using just the input sequence
if seqemb_mode:
msa_features = make_dummy_msa_feats(input_sequence)
sequence_embedding_features = self._process_seqemb_features(alignment_dir)
else:
msa_features = self._process_msa_feats(alignment_dir, input_sequence)
return {**core_feats, **template_features, **msa_features}
return {**core_feats, **template_features, **msa_features, **sequence_embedding_features}
def process_multiseq_fasta(self,
fasta_path: str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment