Commit e9287b49 authored by Dingquan Yu's avatar Dingquan Yu
Browse files

added steps to read zipped msa files

parent 9d6127cb
...@@ -18,7 +18,6 @@ import copy ...@@ -18,7 +18,6 @@ import copy
import collections import collections
import contextlib import contextlib
import dataclasses import dataclasses
from multiprocessing import cpu_count
import tempfile import tempfile
from typing import Mapping, Optional, Sequence, Any, MutableMapping, Union from typing import Mapping, Optional, Sequence, Any, MutableMapping, Union
import subprocess import subprocess
...@@ -30,8 +29,7 @@ from openfold.data.templates import get_custom_template_features, empty_template ...@@ -30,8 +29,7 @@ from openfold.data.templates import get_custom_template_features, empty_template
from openfold.data.tools import jackhmmer, hhblits, hhsearch, hmmsearch from openfold.data.tools import jackhmmer, hhblits, hhsearch, hmmsearch
from openfold.data.tools.utils import to_date from openfold.data.tools.utils import to_date
from openfold.np import residue_constants, protein from openfold.np import residue_constants, protein
import concurrent import tarfile
from concurrent.futures import ThreadPoolExecutor
FeatureDict = MutableMapping[str, np.ndarray] FeatureDict = MutableMapping[str, np.ndarray]
TemplateSearcher = Union[hhsearch.HHSearch, hmmsearch.Hmmsearch] TemplateSearcher = Union[hhsearch.HHSearch, hmmsearch.Hmmsearch]
...@@ -1160,10 +1158,28 @@ class DataPipelineMultimer: ...@@ -1160,10 +1158,28 @@ class DataPipelineMultimer:
is_homomer_or_monomer: bool is_homomer_or_monomer: bool
) -> FeatureDict: ) -> FeatureDict:
"""Runs the monomer pipeline on a single chain.""" """Runs the monomer pipeline on a single chain."""
@contextlib.contextmanager
def open_tar_bz2(file_path):
tar = tarfile.open(file_path, 'r:bz2')
try:
yield tar
except:
print(f"Filed to unzip the file at: {file_path}")
finally:
tar.close()
chain_fasta_str = f'>{chain_id}\n{sequence}\n' chain_fasta_str = f'>{chain_id}\n{sequence}\n'
if chain_alignment_index is None and not os.path.exists(chain_alignment_dir): if chain_alignment_index is not None and os.path.exists(chain_alignment_dir):
raise ValueError(f"Alignments for {chain_id} not found...") pass
elif chain_alignment_index is None and not os.path.exists(chain_alignment_dir):
raise ValueError(f"Alignments for {chain_id} not found...")
elif chain_alignment_index is not None and os.path.exists(os.path.join(chain_alignment_dir,".tar.bz2")):
with tempfile.TemporaryDirectory(delete=False) as tmpdir:
with open_tar_bz2(os.path.join(chain_alignment_dir,".tar.bz2")) as tar:
tar.extractcall(path = tmpdir.name)
chain_alignment_dir = tmpdir.name
with temp_fasta_file(chain_fasta_str) as chain_fasta_path: with temp_fasta_file(chain_fasta_str) as chain_fasta_path:
chain_features = self._monomer_data_pipeline.process_fasta( chain_features = self._monomer_data_pipeline.process_fasta(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment