Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
425bdb5e
"lib/bindings/git@developer.sourcefind.cn:OpenDAS/dynamo.git" did not exist on "110f3f8caeff051b32f168c44fda9faa0d71ed18"
Commit
425bdb5e
authored
Apr 18, 2023
by
Christina Floristean
Browse files
Added UniRef30 to data pipeline
parent
68828c49
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
48 additions
and
11 deletions
+48
-11
openfold/data/data_pipeline.py
openfold/data/data_pipeline.py
+21
-8
run_pretrained_openfold.py
run_pretrained_openfold.py
+2
-0
scripts/download_uniref30.sh
scripts/download_uniref30.sh
+4
-1
scripts/precompute_alignments.py
scripts/precompute_alignments.py
+18
-2
scripts/utils.py
scripts/utils.py
+3
-0
No files found.
openfold/data/data_pipeline.py
View file @
425bdb5e
...
@@ -330,6 +330,7 @@ class AlignmentRunner:
...
@@ -330,6 +330,7 @@ class AlignmentRunner:
uniref90_database_path
:
Optional
[
str
]
=
None
,
uniref90_database_path
:
Optional
[
str
]
=
None
,
mgnify_database_path
:
Optional
[
str
]
=
None
,
mgnify_database_path
:
Optional
[
str
]
=
None
,
bfd_database_path
:
Optional
[
str
]
=
None
,
bfd_database_path
:
Optional
[
str
]
=
None
,
uniref30_database_path
:
Optional
[
str
]
=
None
,
uniclust30_database_path
:
Optional
[
str
]
=
None
,
uniclust30_database_path
:
Optional
[
str
]
=
None
,
uniprot_database_path
:
Optional
[
str
]
=
None
,
uniprot_database_path
:
Optional
[
str
]
=
None
,
template_searcher
:
Optional
[
TemplateSearcher
]
=
None
,
template_searcher
:
Optional
[
TemplateSearcher
]
=
None
,
...
@@ -355,12 +356,15 @@ class AlignmentRunner:
...
@@ -355,12 +356,15 @@ class AlignmentRunner:
Path to BFD database. Depending on the value of use_small_bfd,
Path to BFD database. Depending on the value of use_small_bfd,
one of hhblits_binary_path or jackhmmer_binary_path must be
one of hhblits_binary_path or jackhmmer_binary_path must be
provided.
provided.
uniref30_database_path:
Path to uniref30. Searched alongside BFD if use_small_bfd is
false.
uniclust30_database_path:
uniclust30_database_path:
Path to uniclust30. Searched alongside BFD if use_small_bfd is
Path to uniclust30. Searched alongside BFD if use_small_bfd is
false.
false.
use_small_bfd:
use_small_bfd:
Whether to search the BFD database alone with jackhmmer or
Whether to search the BFD database alone with jackhmmer or
in conjunction with uniclust30 with hhblits.
in conjunction with
uniref30/
uniclust30 with hhblits.
no_cpus:
no_cpus:
The number of CPUs available for alignment. By default, all
The number of CPUs available for alignment. By default, all
CPUs are used.
CPUs are used.
...
@@ -413,7 +417,7 @@ class AlignmentRunner:
...
@@ -413,7 +417,7 @@ class AlignmentRunner:
)
)
self
.
jackhmmer_small_bfd_runner
=
None
self
.
jackhmmer_small_bfd_runner
=
None
self
.
hhblits_bfd_uniclust_runner
=
None
self
.
hhblits_bfd_uni
ref
clust_runner
=
None
if
(
bfd_database_path
is
not
None
):
if
(
bfd_database_path
is
not
None
):
if
use_small_bfd
:
if
use_small_bfd
:
self
.
jackhmmer_small_bfd_runner
=
jackhmmer
.
Jackhmmer
(
self
.
jackhmmer_small_bfd_runner
=
jackhmmer
.
Jackhmmer
(
...
@@ -423,9 +427,11 @@ class AlignmentRunner:
...
@@ -423,9 +427,11 @@ class AlignmentRunner:
)
)
else
:
else
:
dbs
=
[
bfd_database_path
]
dbs
=
[
bfd_database_path
]
if
(
uniclust30_database_path
is
not
None
):
if
(
uniref30_database_path
is
not
None
):
dbs
.
append
(
uniref30_database_path
)
if
(
uniclust30_database_path
is
not
None
):
dbs
.
append
(
uniclust30_database_path
)
dbs
.
append
(
uniclust30_database_path
)
self
.
hhblits_bfd_uniclust_runner
=
hhblits
.
HHBlits
(
self
.
hhblits_bfd_uni
ref
clust_runner
=
hhblits
.
HHBlits
(
binary_path
=
hhblits_binary_path
,
binary_path
=
hhblits_binary_path
,
databases
=
dbs
,
databases
=
dbs
,
n_cpu
=
no_cpus
,
n_cpu
=
no_cpus
,
...
@@ -516,10 +522,17 @@ class AlignmentRunner:
...
@@ -516,10 +522,17 @@ class AlignmentRunner:
msa_out_path
=
bfd_out_path
,
msa_out_path
=
bfd_out_path
,
msa_format
=
"sto"
,
msa_format
=
"sto"
,
)
)
elif
(
self
.
hhblits_bfd_uniclust_runner
is
not
None
):
elif
(
self
.
hhblits_bfd_unirefclust_runner
is
not
None
):
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
"bfd_uniclust_hits.a3m"
)
uni_name
=
"uni"
hhblits_bfd_uniclust_result
=
run_msa_tool
(
for
db_name
in
self
.
hhblits_bfd_unirefclust_runner
.
databases
:
msa_runner
=
self
.
hhblits_bfd_uniclust_runner
,
if
"uniref"
in
db_name
.
lower
():
uni_name
=
f
"
{
uni_name
}
ref"
elif
"uniclust"
in
db_name
.
lower
():
uni_name
=
f
"
{
uni_name
}
clust"
bfd_out_path
=
os
.
path
.
join
(
output_dir
,
f
"bfd_
{
uni_name
}
_hits.a3m"
)
hhblits_bfd_unirefclust_result
=
run_msa_tool
(
msa_runner
=
self
.
hhblits_bfd_unirefclust_runner
,
fasta_path
=
fasta_path
,
fasta_path
=
fasta_path
,
msa_out_path
=
bfd_out_path
,
msa_out_path
=
bfd_out_path
,
msa_format
=
"a3m"
,
msa_format
=
"a3m"
,
...
...
run_pretrained_openfold.py
View file @
425bdb5e
...
@@ -88,6 +88,7 @@ def precompute_alignments(tags, seqs, alignment_dir, args, is_multimer):
...
@@ -88,6 +88,7 @@ def precompute_alignments(tags, seqs, alignment_dir, args, is_multimer):
uniref90_database_path
=
args
.
uniref90_database_path
,
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uniref30_database_path
=
args
.
uniref30_database_path
,
uniclust30_database_path
=
args
.
uniclust30_database_path
,
uniclust30_database_path
=
args
.
uniclust30_database_path
,
no_cpus
=
args
.
cpus
,
no_cpus
=
args
.
cpus
,
)
)
...
@@ -208,6 +209,7 @@ def main(args):
...
@@ -208,6 +209,7 @@ def main(args):
uniref90_database_path
=
args
.
uniref90_database_path
,
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uniref30_database_path
=
args
.
uniref30_database_path
,
uniclust30_database_path
=
args
.
uniclust30_database_path
,
uniclust30_database_path
=
args
.
uniclust30_database_path
,
uniprot_database_path
=
args
.
uniprot_database_path
,
uniprot_database_path
=
args
.
uniprot_database_path
,
template_searcher
=
template_searcher
,
template_searcher
=
template_searcher
,
...
...
scripts/download_uniref30.sh
View file @
425bdb5e
...
@@ -38,4 +38,7 @@ BASENAME=$(basename "${SOURCE_URL}")
...
@@ -38,4 +38,7 @@ BASENAME=$(basename "${SOURCE_URL}")
mkdir
--parents
"
${
ROOT_DIR
}
"
mkdir
--parents
"
${
ROOT_DIR
}
"
aria2c
"
${
SOURCE_URL
}
"
--dir
=
"
${
ROOT_DIR
}
"
-x
4
--check-certificate
=
false
aria2c
"
${
SOURCE_URL
}
"
--dir
=
"
${
ROOT_DIR
}
"
-x
4
--check-certificate
=
false
gunzip
"
${
ROOT_DIR
}
/
${
BASENAME
}
"
tar
--extract
--verbose
--file
=
"
${
ROOT_DIR
}
/
${
BASENAME
}
"
\
--directory
=
"
${
ROOT_DIR
}
"
rm
"
${
ROOT_DIR
}
/
${
BASENAME
}
"
scripts/precompute_alignments.py
View file @
425bdb5e
...
@@ -11,6 +11,7 @@ import tempfile
...
@@ -11,6 +11,7 @@ import tempfile
import
openfold.data.mmcif_parsing
as
mmcif_parsing
import
openfold.data.mmcif_parsing
as
mmcif_parsing
from
openfold.data.data_pipeline
import
AlignmentRunner
from
openfold.data.data_pipeline
import
AlignmentRunner
from
openfold.data.parsers
import
parse_fasta
from
openfold.data.parsers
import
parse_fasta
from
openfold.data.tools
import
hhsearch
,
hmmsearch
from
openfold.np
import
protein
,
residue_constants
from
openfold.np
import
protein
,
residue_constants
from
utils
import
add_data_args
from
utils
import
add_data_args
...
@@ -114,15 +115,30 @@ def parse_and_align(files, alignment_runner, args):
...
@@ -114,15 +115,30 @@ def parse_and_align(files, alignment_runner, args):
def
main
(
args
):
def
main
(
args
):
# Build the alignment tool runner
# Build the alignment tool runner
if
(
args
.
hmmsearch_binary_path
is
not
None
):
template_searcher
=
hmmsearch
.
Hmmsearch
(
binary_path
=
args
.
hmmsearch_binary_path
,
hmmbuild_binary_path
=
args
.
hmmbuild_binary_path
,
database_path
=
args
.
pdb_seqres_database_path
,
)
elif
(
args
.
hhsearch_binary_path
is
not
None
):
template_searcher
=
hhsearch
.
HHSearch
(
binary_path
=
args
.
hhsearch_binary_path
,
databases
=
[
args
.
pdb70_database_path
],
)
else
:
template_searcher
=
None
alignment_runner
=
AlignmentRunner
(
alignment_runner
=
AlignmentRunner
(
jackhmmer_binary_path
=
args
.
jackhmmer_binary_path
,
jackhmmer_binary_path
=
args
.
jackhmmer_binary_path
,
hhblits_binary_path
=
args
.
hhblits_binary_path
,
hhblits_binary_path
=
args
.
hhblits_binary_path
,
hhsearch_binary_path
=
args
.
hhsearch_binary_path
,
uniref90_database_path
=
args
.
uniref90_database_path
,
uniref90_database_path
=
args
.
uniref90_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
mgnify_database_path
=
args
.
mgnify_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
bfd_database_path
=
args
.
bfd_database_path
,
uniref30_database_path
=
args
.
uniref30_database_path
,
uniclust30_database_path
=
args
.
uniclust30_database_path
,
uniclust30_database_path
=
args
.
uniclust30_database_path
,
pdb70_database_path
=
args
.
pdb70_database_path
,
uniprot_database_path
=
args
.
uniprot_database_path
,
template_searcher
=
template_searcher
,
use_small_bfd
=
args
.
bfd_database_path
is
None
,
use_small_bfd
=
args
.
bfd_database_path
is
None
,
no_cpus
=
args
.
cpus_per_task
,
no_cpus
=
args
.
cpus_per_task
,
)
)
...
...
scripts/utils.py
View file @
425bdb5e
...
@@ -17,6 +17,9 @@ def add_data_args(parser: argparse.ArgumentParser):
...
@@ -17,6 +17,9 @@ def add_data_args(parser: argparse.ArgumentParser):
parser
.
add_argument
(
parser
.
add_argument
(
'--pdb_seqres_database_path'
,
type
=
str
,
default
=
None
,
'--pdb_seqres_database_path'
,
type
=
str
,
default
=
None
,
)
)
parser
.
add_argument
(
'--uniref30_database_path'
,
type
=
str
,
default
=
None
,
)
parser
.
add_argument
(
parser
.
add_argument
(
'--uniclust30_database_path'
,
type
=
str
,
default
=
None
,
'--uniclust30_database_path'
,
type
=
str
,
default
=
None
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment