Unverified Commit a475ad11 authored by Christina Floristean's avatar Christina Floristean Committed by GitHub
Browse files

Merge pull request #385 from dingquanyu/update-data-pipeline

Update data pipeline
parents 9d6127cb 989f2d93
...@@ -30,8 +30,6 @@ from openfold.data.templates import get_custom_template_features, empty_template ...@@ -30,8 +30,6 @@ from openfold.data.templates import get_custom_template_features, empty_template
from openfold.data.tools import jackhmmer, hhblits, hhsearch, hmmsearch from openfold.data.tools import jackhmmer, hhblits, hhsearch, hmmsearch
from openfold.data.tools.utils import to_date from openfold.data.tools.utils import to_date
from openfold.np import residue_constants, protein from openfold.np import residue_constants, protein
import concurrent
from concurrent.futures import ThreadPoolExecutor
FeatureDict = MutableMapping[str, np.ndarray] FeatureDict = MutableMapping[str, np.ndarray]
TemplateSearcher = Union[hhsearch.HHSearch, hmmsearch.Hmmsearch] TemplateSearcher = Union[hhsearch.HHSearch, hmmsearch.Hmmsearch]
...@@ -739,8 +737,10 @@ class DataPipeline: ...@@ -739,8 +737,10 @@ class DataPipeline:
# Now will split the following steps into multiple processes # Now will split the following steps into multiple processes
current_directory = os.path.dirname(os.path.abspath(__file__)) current_directory = os.path.dirname(os.path.abspath(__file__))
cmd = f"{current_directory}/tools/parse_msa_files.py" cmd = f"{current_directory}/tools/parse_msa_files.py"
msa_data = subprocess.run(['python',cmd, f"--alignment_dir={alignment_dir}"],capture_output=True, text=True) msa_data_path = subprocess.run(['python',cmd, f"--alignment_dir={alignment_dir}"],capture_output=True, text=True)
msa_data = pickle.load((open(msa_data.stdout.lstrip().rstrip(),'rb'))) msa_data_path = msa_data_path.stdout.lstrip().rstrip()
msa_data = pickle.load((open(msa_data_path,'rb')))
os.remove(msa_data_path)
return msa_data return msa_data
...@@ -1351,4 +1351,4 @@ class DataPipelineMultimer: ...@@ -1351,4 +1351,4 @@ class DataPipelineMultimer:
# Pad MSA to avoid zero-sized extra_msa. # Pad MSA to avoid zero-sized extra_msa.
np_example = pad_msa(np_example, 512) np_example = pad_msa(np_example, 512)
return np_example return np_example
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment