Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
de28d842
Commit
de28d842
authored
Dec 14, 2023
by
Dingquan Yu
Browse files
added steps of reading in compressed msa files
parent
e9287b49
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
18 deletions
+10
-18
openfold/data/data_pipeline.py
openfold/data/data_pipeline.py
+10
-18
No files found.
openfold/data/data_pipeline.py
View file @
de28d842
...
@@ -737,8 +737,10 @@ class DataPipeline:
...
@@ -737,8 +737,10 @@ class DataPipeline:
# Now will split the following steps into multiple processes
# Now will split the following steps into multiple processes
current_directory
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
current_directory
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cmd
=
f
"
{
current_directory
}
/tools/parse_msa_files.py"
cmd
=
f
"
{
current_directory
}
/tools/parse_msa_files.py"
msa_data
=
subprocess
.
run
([
'python'
,
cmd
,
f
"--alignment_dir=
{
alignment_dir
}
"
],
capture_output
=
True
,
text
=
True
)
msa_data_path
=
subprocess
.
run
([
'python'
,
cmd
,
f
"--alignment_dir=
{
alignment_dir
}
"
],
capture_output
=
True
,
text
=
True
)
msa_data
=
pickle
.
load
((
open
(
msa_data
.
stdout
.
lstrip
().
rstrip
(),
'rb'
)))
msa_data_path
=
msa_data_path
.
stdout
.
lstrip
().
rstrip
()
msa_data
=
pickle
.
load
((
open
(
msa_data_path
,
'rb'
)))
os
.
remove
(
msa_data_path
)
return
msa_data
return
msa_data
...
@@ -1159,27 +1161,17 @@ class DataPipelineMultimer:
...
@@ -1159,27 +1161,17 @@ class DataPipelineMultimer:
)
->
FeatureDict
:
)
->
FeatureDict
:
"""Runs the monomer pipeline on a single chain."""
"""Runs the monomer pipeline on a single chain."""
@
contextlib
.
contextmanager
def
open_tar_bz2
(
file_path
):
tar
=
tarfile
.
open
(
file_path
,
'r:bz2'
)
try
:
yield
tar
except
:
print
(
f
"Filed to unzip the file at:
{
file_path
}
"
)
finally
:
tar
.
close
()
chain_fasta_str
=
f
'>
{
chain_id
}
\n
{
sequence
}
\n
'
chain_fasta_str
=
f
'>
{
chain_id
}
\n
{
sequence
}
\n
'
if
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
chain_alignment_dir
):
if
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
chain_alignment_dir
):
pass
pass
elif
chain_alignment_index
is
None
and
not
os
.
path
.
exists
(
chain_alignment_dir
):
elif
chain_alignment_index
is
None
and
not
(
os
.
path
.
exists
(
chain_alignment_dir
)
or
os
.
path
.
exists
(
chain_alignment_dir
+
".tar.bz2"
))
:
raise
ValueError
(
f
"Alignments for
{
chain_id
}
not found..."
)
raise
ValueError
(
f
"Alignments for
{
chain_id
}
not found..."
)
elif
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
os
.
path
.
join
(
chain_alignment_dir
,
".tar.bz2"
)
)
:
elif
chain_alignment_index
is
not
None
or
os
.
path
.
exists
(
chain_alignment_dir
+
".tar.bz2"
):
with
tempfile
.
TemporaryDirectory
(
delete
=
False
)
as
tmpdir
:
tmpdir
=
tempfile
.
mkdtemp
()
with
open_tar_bz2
(
os
.
path
.
join
(
chain_alignment_dir
,
"
.tar.bz2
"
))
as
tar
:
cmd
=
f
"tar -xvf
{
chain_alignment_dir
+
'
.tar.bz2
'
}
-C
{
tmpdir
}
"
tar
.
extractcall
(
path
=
tmpdir
.
nam
e
)
result
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
text
=
True
,
shell
=
Tru
e
)
chain_alignment_dir
=
tmpdir
.
name
chain_alignment_dir
=
os
.
path
.
join
(
tmpdir
,
os
.
listdir
(
tmpdir
)[
0
])
with
temp_fasta_file
(
chain_fasta_str
)
as
chain_fasta_path
:
with
temp_fasta_file
(
chain_fasta_str
)
as
chain_fasta_path
:
chain_features
=
self
.
_monomer_data_pipeline
.
process_fasta
(
chain_features
=
self
.
_monomer_data_pipeline
.
process_fasta
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment