Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
e9287b49
Commit
e9287b49
authored
Dec 13, 2023
by
Dingquan Yu
Browse files
added steps to read zipped msa files
parent
9d6127cb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
5 deletions
+21
-5
openfold/data/data_pipeline.py
openfold/data/data_pipeline.py
+21
-5
No files found.
openfold/data/data_pipeline.py
View file @
e9287b49
...
@@ -18,7 +18,6 @@ import copy
...
@@ -18,7 +18,6 @@ import copy
import
collections
import
collections
import
contextlib
import
contextlib
import
dataclasses
import
dataclasses
from
multiprocessing
import
cpu_count
import
tempfile
import
tempfile
from
typing
import
Mapping
,
Optional
,
Sequence
,
Any
,
MutableMapping
,
Union
from
typing
import
Mapping
,
Optional
,
Sequence
,
Any
,
MutableMapping
,
Union
import
subprocess
import
subprocess
...
@@ -30,8 +29,7 @@ from openfold.data.templates import get_custom_template_features, empty_template
...
@@ -30,8 +29,7 @@ from openfold.data.templates import get_custom_template_features, empty_template
from
openfold.data.tools
import
jackhmmer
,
hhblits
,
hhsearch
,
hmmsearch
from
openfold.data.tools
import
jackhmmer
,
hhblits
,
hhsearch
,
hmmsearch
from
openfold.data.tools.utils
import
to_date
from
openfold.data.tools.utils
import
to_date
from
openfold.np
import
residue_constants
,
protein
from
openfold.np
import
residue_constants
,
protein
import
concurrent
import
tarfile
from
concurrent.futures
import
ThreadPoolExecutor
FeatureDict
=
MutableMapping
[
str
,
np
.
ndarray
]
FeatureDict
=
MutableMapping
[
str
,
np
.
ndarray
]
TemplateSearcher
=
Union
[
hhsearch
.
HHSearch
,
hmmsearch
.
Hmmsearch
]
TemplateSearcher
=
Union
[
hhsearch
.
HHSearch
,
hmmsearch
.
Hmmsearch
]
...
@@ -1160,10 +1158,28 @@ class DataPipelineMultimer:
...
@@ -1160,10 +1158,28 @@ class DataPipelineMultimer:
is_homomer_or_monomer
:
bool
is_homomer_or_monomer
:
bool
)
->
FeatureDict
:
)
->
FeatureDict
:
"""Runs the monomer pipeline on a single chain."""
"""Runs the monomer pipeline on a single chain."""
@
contextlib
.
contextmanager
def
open_tar_bz2
(
file_path
):
tar
=
tarfile
.
open
(
file_path
,
'r:bz2'
)
try
:
yield
tar
except
:
print
(
f
"Filed to unzip the file at:
{
file_path
}
"
)
finally
:
tar
.
close
()
chain_fasta_str
=
f
'>
{
chain_id
}
\n
{
sequence
}
\n
'
chain_fasta_str
=
f
'>
{
chain_id
}
\n
{
sequence
}
\n
'
if
chain_alignment_index
is
None
and
not
os
.
path
.
exists
(
chain_alignment_dir
):
if
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
chain_alignment_dir
):
raise
ValueError
(
f
"Alignments for
{
chain_id
}
not found..."
)
pass
elif
chain_alignment_index
is
None
and
not
os
.
path
.
exists
(
chain_alignment_dir
):
raise
ValueError
(
f
"Alignments for
{
chain_id
}
not found..."
)
elif
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
os
.
path
.
join
(
chain_alignment_dir
,
".tar.bz2"
)):
with
tempfile
.
TemporaryDirectory
(
delete
=
False
)
as
tmpdir
:
with
open_tar_bz2
(
os
.
path
.
join
(
chain_alignment_dir
,
".tar.bz2"
))
as
tar
:
tar
.
extractcall
(
path
=
tmpdir
.
name
)
chain_alignment_dir
=
tmpdir
.
name
with
temp_fasta_file
(
chain_fasta_str
)
as
chain_fasta_path
:
with
temp_fasta_file
(
chain_fasta_str
)
as
chain_fasta_path
:
chain_features
=
self
.
_monomer_data_pipeline
.
process_fasta
(
chain_features
=
self
.
_monomer_data_pipeline
.
process_fasta
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment