Commit de28d842 authored by Dingquan Yu's avatar Dingquan Yu
Browse files

added steps of reading in compressed msa files

parent e9287b49
...@@ -737,8 +737,10 @@ class DataPipeline: ...@@ -737,8 +737,10 @@ class DataPipeline:
# Now will split the following steps into multiple processes # Now will split the following steps into multiple processes
current_directory = os.path.dirname(os.path.abspath(__file__)) current_directory = os.path.dirname(os.path.abspath(__file__))
cmd = f"{current_directory}/tools/parse_msa_files.py" cmd = f"{current_directory}/tools/parse_msa_files.py"
msa_data = subprocess.run(['python',cmd, f"--alignment_dir={alignment_dir}"],capture_output=True, text=True) msa_data_path = subprocess.run(['python',cmd, f"--alignment_dir={alignment_dir}"],capture_output=True, text=True)
msa_data = pickle.load((open(msa_data.stdout.lstrip().rstrip(),'rb'))) msa_data_path = msa_data_path.stdout.lstrip().rstrip()
msa_data = pickle.load((open(msa_data_path,'rb')))
os.remove(msa_data_path)
return msa_data return msa_data
...@@ -1159,27 +1161,17 @@ class DataPipelineMultimer: ...@@ -1159,27 +1161,17 @@ class DataPipelineMultimer:
) -> FeatureDict: ) -> FeatureDict:
"""Runs the monomer pipeline on a single chain.""" """Runs the monomer pipeline on a single chain."""
@contextlib.contextmanager
def open_tar_bz2(file_path):
tar = tarfile.open(file_path, 'r:bz2')
try:
yield tar
except:
print(f"Filed to unzip the file at: {file_path}")
finally:
tar.close()
chain_fasta_str = f'>{chain_id}\n{sequence}\n' chain_fasta_str = f'>{chain_id}\n{sequence}\n'
if chain_alignment_index is not None and os.path.exists(chain_alignment_dir): if chain_alignment_index is not None and os.path.exists(chain_alignment_dir):
pass pass
elif chain_alignment_index is None and not os.path.exists(chain_alignment_dir): elif chain_alignment_index is None and not (os.path.exists(chain_alignment_dir) or os.path.exists(chain_alignment_dir + ".tar.bz2")):
raise ValueError(f"Alignments for {chain_id} not found...") raise ValueError(f"Alignments for {chain_id} not found...")
elif chain_alignment_index is not None and os.path.exists(os.path.join(chain_alignment_dir,".tar.bz2")): elif chain_alignment_index is not None or os.path.exists(chain_alignment_dir + ".tar.bz2"):
with tempfile.TemporaryDirectory(delete=False) as tmpdir: tmpdir = tempfile.mkdtemp()
with open_tar_bz2(os.path.join(chain_alignment_dir,".tar.bz2")) as tar: cmd = f"tar -xvf {chain_alignment_dir + '.tar.bz2'} -C {tmpdir}"
tar.extractcall(path = tmpdir.name) result = subprocess.run(cmd, capture_output=True, text=True, shell=True)
chain_alignment_dir = tmpdir.name chain_alignment_dir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
with temp_fasta_file(chain_fasta_str) as chain_fasta_path: with temp_fasta_file(chain_fasta_str) as chain_fasta_path:
chain_features = self._monomer_data_pipeline.process_fasta( chain_features = self._monomer_data_pipeline.process_fasta(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment