Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
6f3e0c0c
Commit
6f3e0c0c
authored
Nov 30, 2023
by
Dingquan Yu
Browse files
now used asynchronised version in parse_msa_data
parent
aec12764
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
25 deletions
+32
-25
openfold/data/data_pipeline.py
openfold/data/data_pipeline.py
+32
-25
No files found.
openfold/data/data_pipeline.py
View file @
6f3e0c0c
...
@@ -21,7 +21,7 @@ import dataclasses
...
@@ -21,7 +21,7 @@ import dataclasses
from
multiprocessing
import
cpu_count
from
multiprocessing
import
cpu_count
import
tempfile
import
tempfile
from
typing
import
Mapping
,
Optional
,
Sequence
,
Any
,
MutableMapping
,
Union
from
typing
import
Mapping
,
Optional
,
Sequence
,
Any
,
MutableMapping
,
Union
import
asyncio
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
...
@@ -737,30 +737,37 @@ class DataPipeline:
...
@@ -737,30 +737,37 @@ class DataPipeline:
fp
.
close
()
fp
.
close
()
else
:
else
:
for
f
in
os
.
listdir
(
alignment_dir
):
# Now will split the following steps into multiple processes
path
=
os
.
path
.
join
(
alignment_dir
,
f
)
async
def
parse_stockholm_file
(
alignment_dir
:
str
,
stockholm_file
:
str
):
filename
,
ext
=
os
.
path
.
splitext
(
f
)
path
=
os
.
path
.
join
(
alignment_dir
,
stockholm_file
)
file_name
,
_
=
os
.
path
.
splitext
(
stockholm_file
)
if
(
ext
==
".a3m"
):
with
open
(
path
,
"r"
)
as
infile
:
import
time
msa
=
parsers
.
parse_stockholm
(
infile
.
read
())
start
=
time
.
time
()
infile
.
close
()
with
open
(
path
,
"r"
)
as
fp
:
return
{
file_name
:
msa
}
msa
=
parsers
.
parse_a3m
(
fp
.
read
())
end
=
time
.
time
()
async
def
parse_a3m_file
(
alignment_dir
:
str
,
a3m_file
:
str
):
calculate_elapse
(
start
,
end
,
"parser.parse_a3m"
)
path
=
os
.
path
.
join
(
alignment_dir
,
a3m_file
)
elif
(
ext
==
".sto"
and
not
"hmm_output"
==
filename
):
file_name
,
_
=
os
.
path
.
splitext
(
a3m_file
)
import
time
with
open
(
path
,
"r"
)
as
infile
:
start
=
time
.
time
()
msa
=
parsers
.
parse_a3m
(
infile
.
read
())
with
open
(
path
,
"r"
)
as
fp
:
infile
.
close
()
msa
=
parsers
.
parse_stockholm
(
return
{
file_name
:
msa
}
fp
.
read
()
)
async
def
run_parse_all_msa_files
(
stockholm_files
:
list
,
a3m_files
:
list
,
alignment_dir
:
str
):
end
=
time
.
time
()
all_tasks
=
[
asyncio
.
create_task
(
parse_stockholm_file
(
alignment_dir
,
sto
))
for
sto
in
stockholm_files
]
calculate_elapse
(
start
,
end
,
"parsers.parse_stockholm"
)
all_tasks
+=
[
asyncio
.
create_task
(
parse_a3m_file
(
alignment_dir
,
a3m
))
for
a3m
in
a3m_files
]
else
:
results
=
await
asyncio
.
gather
(
*
all_tasks
)
continue
return
results
stockholm_files
=
[
i
for
i
in
os
.
listdir
(
alignment_dir
)
if
(
i
.
endswith
(
'.sto'
)
and
(
"hmm_output"
not
in
i
))]
msa_data
[
f
]
=
msa
a3m_files
=
[
i
for
i
in
os
.
listdir
(
alignment_dir
)
if
i
.
endswith
(
'.a3m'
)]
import
time
start
=
time
.
time
()
msa_results
=
asyncio
.
run
(
run_parse_all_msa_files
(
stockholm_files
,
a3m_files
,
alignment_dir
))
end
=
time
.
time
()
calculate_elapse
(
start
,
end
,
"asynchronised version"
)
for
i
in
msa_results
:
msa_data
.
update
({
k
:
v
for
k
,
v
in
i
.
items
()})
return
msa_data
return
msa_data
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment