Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
de28d842
"vscode:/vscode.git/clone" did not exist on "a0a49fea4d6eb27af15c956c1f25cc14f34420c2"
Commit
de28d842
authored
Dec 14, 2023
by
Dingquan Yu
Browse files
added steps of reading in compressed msa files
parent
e9287b49
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
18 deletions
+10
-18
openfold/data/data_pipeline.py
openfold/data/data_pipeline.py
+10
-18
No files found.
openfold/data/data_pipeline.py
View file @
de28d842
...
@@ -737,8 +737,10 @@ class DataPipeline:
...
@@ -737,8 +737,10 @@ class DataPipeline:
# Now will split the following steps into multiple processes
# Now will split the following steps into multiple processes
current_directory
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
current_directory
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cmd
=
f
"
{
current_directory
}
/tools/parse_msa_files.py"
cmd
=
f
"
{
current_directory
}
/tools/parse_msa_files.py"
msa_data
=
subprocess
.
run
([
'python'
,
cmd
,
f
"--alignment_dir=
{
alignment_dir
}
"
],
capture_output
=
True
,
text
=
True
)
msa_data_path
=
subprocess
.
run
([
'python'
,
cmd
,
f
"--alignment_dir=
{
alignment_dir
}
"
],
capture_output
=
True
,
text
=
True
)
msa_data
=
pickle
.
load
((
open
(
msa_data
.
stdout
.
lstrip
().
rstrip
(),
'rb'
)))
msa_data_path
=
msa_data_path
.
stdout
.
lstrip
().
rstrip
()
msa_data
=
pickle
.
load
((
open
(
msa_data_path
,
'rb'
)))
os
.
remove
(
msa_data_path
)
return
msa_data
return
msa_data
...
@@ -1159,27 +1161,17 @@ class DataPipelineMultimer:
...
@@ -1159,27 +1161,17 @@ class DataPipelineMultimer:
)
->
FeatureDict
:
)
->
FeatureDict
:
"""Runs the monomer pipeline on a single chain."""
"""Runs the monomer pipeline on a single chain."""
@
contextlib
.
contextmanager
def
open_tar_bz2
(
file_path
):
tar
=
tarfile
.
open
(
file_path
,
'r:bz2'
)
try
:
yield
tar
except
:
print
(
f
"Filed to unzip the file at:
{
file_path
}
"
)
finally
:
tar
.
close
()
chain_fasta_str
=
f
'>
{
chain_id
}
\n
{
sequence
}
\n
'
chain_fasta_str
=
f
'>
{
chain_id
}
\n
{
sequence
}
\n
'
if
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
chain_alignment_dir
):
if
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
chain_alignment_dir
):
pass
pass
elif
chain_alignment_index
is
None
and
not
os
.
path
.
exists
(
chain_alignment_dir
):
elif
chain_alignment_index
is
None
and
not
(
os
.
path
.
exists
(
chain_alignment_dir
)
or
os
.
path
.
exists
(
chain_alignment_dir
+
".tar.bz2"
))
:
raise
ValueError
(
f
"Alignments for
{
chain_id
}
not found..."
)
raise
ValueError
(
f
"Alignments for
{
chain_id
}
not found..."
)
elif
chain_alignment_index
is
not
None
and
os
.
path
.
exists
(
os
.
path
.
join
(
chain_alignment_dir
,
".tar.bz2"
)
)
:
elif
chain_alignment_index
is
not
None
or
os
.
path
.
exists
(
chain_alignment_dir
+
".tar.bz2"
):
with
tempfile
.
TemporaryDirectory
(
delete
=
False
)
as
tmpdir
:
tmpdir
=
tempfile
.
mkdtemp
()
with
open_tar_bz2
(
os
.
path
.
join
(
chain_alignment_dir
,
"
.tar.bz2
"
))
as
tar
:
cmd
=
f
"tar -xvf
{
chain_alignment_dir
+
'
.tar.bz2
'
}
-C
{
tmpdir
}
"
tar
.
extractcall
(
path
=
tmpdir
.
nam
e
)
result
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
text
=
True
,
shell
=
Tru
e
)
chain_alignment_dir
=
tmpdir
.
name
chain_alignment_dir
=
os
.
path
.
join
(
tmpdir
,
os
.
listdir
(
tmpdir
)[
0
])
with
temp_fasta_file
(
chain_fasta_str
)
as
chain_fasta_path
:
with
temp_fasta_file
(
chain_fasta_str
)
as
chain_fasta_path
:
chain_features
=
self
.
_monomer_data_pipeline
.
process_fasta
(
chain_features
=
self
.
_monomer_data_pipeline
.
process_fasta
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment