Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
728f99fc
Commit
728f99fc
authored
Jun 27, 2022
by
Gustaf Ahdritz
Browse files
Add alignment DB scripts
parent
4e730269
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
100 additions
and
2 deletions
+100
-2
README.md
README.md
+12
-0
scripts/alignment_db_scripts/create_alignment_db.py
scripts/alignment_db_scripts/create_alignment_db.py
+47
-0
scripts/alignment_db_scripts/unify_alignment_db_indices.py
scripts/alignment_db_scripts/unify_alignment_db_indices.py
+37
-0
train_openfold.py
train_openfold.py
+4
-2
No files found.
README.md
View file @
728f99fc
...
...
@@ -315,6 +315,18 @@ or even ProteinNet .core files. To emulate the AlphaFold training procedure,
which uses a self-distillation set subject to special preprocessing steps, use
the family of
`--distillation`
flags.
In cases where it may be burdensome to create separate files for each chain's
alignments, alignment directories can be consolidated using the scripts in
`scripts/alignment_db_scripts/`
. First, run
`create_alignment_db.py`
to
consolidate an alignment directory into a pair of database and index files.
Once all alignment directories (or shards of a single alignment directory)
have been compiled, unify the indices with
`unify_alignment_db_indices`
. The
resulting index,
`super.index`
can be passed to the training script flags
containing the phrase
`alignment_index`
. In this scenario, the
`alignment_dir`
flags instead represent the directory containing the compiled alignment
databases. Both the training and distillation datasets can be compiled in this
way.
## Testing
To run unit tests, use
...
...
scripts/alignment_db_scripts/create_alignment_db.py
0 → 100644
View file @
728f99fc
import
argparse
import
json
import
os
def
main
(
args
):
db_path
=
os
.
path
.
join
(
args
.
output_db_path
,
f
"
{
args
.
output_db_name
}
.db"
)
index_path
=
os
.
path
.
join
(
args
.
output_db_path
,
f
"
{
args
.
output_db_name
}
.index"
)
db_fp
=
open
(
db_path
,
"wb"
)
index
=
{}
db_offset
=
0
for
chain_alignment_dir
in
os
.
listdir
(
args
.
alignment_dir
):
cad_path
=
os
.
path
.
join
(
args
.
alignment_dir
,
chain_alignment_dir
)
for
f
in
os
.
listdir
(
cad_path
):
f_path
=
os
.
path
.
join
(
cad_path
,
f
)
with
open
(
f_path
,
"rb"
)
as
fp
:
file_bytes
=
fp
.
read
()
l
=
len
(
file_bytes
)
file_list
=
index
.
setdefault
(
chain_alignment_dir
,
[])
file_list
.
append
((
f
,
db_offset
,
l
))
db_fp
.
write
(
file_bytes
)
db_offset
+=
l
db_fp
.
close
()
with
open
(
index_path
,
"w"
)
as
fp
:
json
.
dump
(
index
,
fp
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"alignment_dir"
,
type
=
str
,
help
=
"""Path to precomputed alignment directory, with one subdirectory
per chain."""
)
parser
.
add_argument
(
"output_db_path"
,
type
=
str
)
parser
.
add_argument
(
"output_db_name"
,
type
=
str
)
args
=
parser
.
parse_args
()
main
(
args
)
scripts/alignment_db_scripts/unify_alignment_db_indices.py
0 → 100644
View file @
728f99fc
import
argparse
import
json
import
os
""" Unifies databases created with create_alignment_db.py """
def
main
(
args
):
super_index
=
{}
for
f
in
os
.
listdir
(
args
.
alignment_db_dir
):
if
(
not
os
.
path
.
splitext
(
f
)[
-
1
]
==
".index"
):
continue
with
open
(
os
.
path
.
join
(
args
.
alignment_db_dir
,
f
),
"r"
)
as
fp
:
index
=
json
.
load
(
fp
)
db_name
=
f
"
{
os
.
path
.
splitext
(
f
)[
0
]
}
.db"
for
k
in
index
:
super_index
[
k
]
=
{
"db"
:
db_name
,
"files"
:
index
[
k
],
}
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"super.index"
),
"w"
)
as
fp
:
json
.
dump
(
super_index
,
fp
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"alignment_db_dir"
,
type
=
str
,
help
=
"Path to directory containing alignment_dbs"
)
parser
.
add_argument
(
"output_dir"
,
type
=
str
,
help
=
"Path in which to output super index"
)
args
=
parser
.
parse_args
()
main
(
args
)
train_openfold.py
View file @
728f99fc
...
...
@@ -512,10 +512,12 @@ if __name__ == "__main__":
"--_distillation_structure_index_path"
,
type
=
str
,
default
=
None
,
)
parser
.
add_argument
(
"--_alignment_index_path"
,
type
=
str
,
default
=
None
,
"--alignment_index_path"
,
type
=
str
,
default
=
None
,
help
=
"Training alignment index. See the README for instructions."
)
parser
.
add_argument
(
"--_distillation_alignment_index_path"
,
type
=
str
,
default
=
None
,
"--distillation_alignment_index_path"
,
type
=
str
,
default
=
None
,
help
=
"Distillation alignment index. See the README for instructions."
)
parser
=
pl
.
Trainer
.
add_argparse_args
(
parser
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment