Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
alphafold2_jax
Commits
0bab1bf8
Commit
0bab1bf8
authored
Jul 22, 2021
by
Saran Tunyasuvunakool
Browse files
Add a Colab notebook, add reduced BFD, and various other fixes and improvements.
PiperOrigin-RevId: 386228948
parent
d26287ea
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
99 additions
and
25 deletions
+99
-25
run_alphafold.py
run_alphafold.py
+27
-7
scripts/download_all_data.sh
scripts/download_all_data.sh
+15
-8
scripts/download_alphafold_params.sh
scripts/download_alphafold_params.sh
+1
-1
scripts/download_bfd.sh
scripts/download_bfd.sh
+3
-1
scripts/download_mgnify.sh
scripts/download_mgnify.sh
+4
-2
scripts/download_pdb70.sh
scripts/download_pdb70.sh
+1
-1
scripts/download_pdb_mmcif.sh
scripts/download_pdb_mmcif.sh
+2
-2
scripts/download_small_bfd.sh
scripts/download_small_bfd.sh
+41
-0
scripts/download_uniclust30.sh
scripts/download_uniclust30.sh
+4
-2
scripts/download_uniref90.sh
scripts/download_uniref90.sh
+1
-1
No files found.
run_alphafold.py
View file @
0bab1bf8
...
...
@@ -59,6 +59,8 @@ flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify '
'database for use by JackHMMER.'
)
flags
.
DEFINE_string
(
'bfd_database_path'
,
None
,
'Path to the BFD '
'database for use by HHblits.'
)
flags
.
DEFINE_string
(
'small_bfd_database_path'
,
None
,
'Path to the small '
'version of BFD used with the "reduced_dbs" preset.'
)
flags
.
DEFINE_string
(
'uniclust30_database_path'
,
None
,
'Path to the Uniclust30 '
'database for use by HHblits.'
)
flags
.
DEFINE_string
(
'pdb70_database_path'
,
None
,
'Path to the PDB70 '
...
...
@@ -70,9 +72,13 @@ flags.DEFINE_string('max_template_date', None, 'Maximum template release date '
flags
.
DEFINE_string
(
'obsolete_pdbs_path'
,
None
,
'Path to file containing a '
'mapping from obsolete PDB IDs to the PDB IDs of their '
'replacements.'
)
flags
.
DEFINE_enum
(
'preset'
,
'full_dbs'
,
[
'full_dbs'
,
'casp14'
],
'Choose preset model configuration - no ensembling '
'(full_dbs) or 8 model ensemblings (casp14).'
)
flags
.
DEFINE_enum
(
'preset'
,
'full_dbs'
,
[
'reduced_dbs'
,
'full_dbs'
,
'casp14'
],
'Choose preset model configuration - no ensembling and '
'smaller genetic database config (reduced_dbs), no '
'ensembling and full genetic database config (full_dbs) or '
'full genetic database config and 8 model ensemblings '
'(casp14).'
)
flags
.
DEFINE_boolean
(
'benchmark'
,
False
,
'Run multiple JAX model evaluations '
'to obtain a timing that excludes the compilation time, '
'which should be more indicative of the time required for '
...
...
@@ -92,6 +98,12 @@ RELAX_EXCLUDE_RESIDUES = []
RELAX_MAX_OUTER_ITERATIONS
=
20
def
_check_flag
(
flag_name
:
str
,
preset
:
str
,
should_be_set
:
bool
):
if
should_be_set
!=
bool
(
FLAGS
[
flag_name
].
value
):
verb
=
'be'
if
should_be_set
else
'not be'
raise
ValueError
(
f
'
{
flag_name
}
must
{
verb
}
set for preset "
{
preset
}
"'
)
def
predict_structure
(
fasta_path
:
str
,
fasta_name
:
str
,
...
...
@@ -197,7 +209,15 @@ def main(argv):
if
len
(
argv
)
>
1
:
raise
app
.
UsageError
(
'Too many command-line arguments.'
)
if
FLAGS
.
preset
==
'full_dbs'
:
use_small_bfd
=
FLAGS
.
preset
==
'reduced_dbs'
_check_flag
(
'small_bfd_database_path'
,
FLAGS
.
preset
,
should_be_set
=
use_small_bfd
)
_check_flag
(
'bfd_database_path'
,
FLAGS
.
preset
,
should_be_set
=
not
use_small_bfd
)
_check_flag
(
'uniclust30_database_path'
,
FLAGS
.
preset
,
should_be_set
=
not
use_small_bfd
)
if
FLAGS
.
preset
in
(
'reduced_dbs'
,
'full_dbs'
):
num_ensemble
=
1
elif
FLAGS
.
preset
==
'casp14'
:
num_ensemble
=
8
...
...
@@ -223,8 +243,10 @@ def main(argv):
mgnify_database_path
=
FLAGS
.
mgnify_database_path
,
bfd_database_path
=
FLAGS
.
bfd_database_path
,
uniclust30_database_path
=
FLAGS
.
uniclust30_database_path
,
small_bfd_database_path
=
FLAGS
.
small_bfd_database_path
,
pdb70_database_path
=
FLAGS
.
pdb70_database_path
,
template_featurizer
=
template_featurizer
)
template_featurizer
=
template_featurizer
,
use_small_bfd
=
use_small_bfd
)
model_runners
=
{}
for
model_name
in
FLAGS
.
model_names
:
...
...
@@ -272,8 +294,6 @@ if __name__ == '__main__':
'preset'
,
'uniref90_database_path'
,
'mgnify_database_path'
,
'uniclust30_database_path'
,
'bfd_database_path'
,
'pdb70_database_path'
,
'template_mmcif_dir'
,
'max_template_date'
,
...
...
scripts/download_all_data.sh
View file @
0bab1bf8
...
...
@@ -24,24 +24,31 @@ if [[ $# -eq 0 ]]; then
exit
1
fi
if
!
command
-v
rsync &> /dev/null
;
then
echo
"Error: rsync could not be found. Please install rsync."
exit
fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
DOWNLOAD_MODE
=
"
${
2
:-
full_dbs
}
"
# Default mode to full_dbs.
if
[[
"
${
DOWNLOAD_MODE
}
"
!=
full_dbs
&&
"
${
DOWNLOAD_MODE
}
"
!=
reduced_dbs
]]
then
echo
"DOWNLOAD_MODE
${
DOWNLOAD_MODE
}
not recognized."
exit
1
fi
SCRIPT_DIR
=
"
$(
dirname
"
$(
realpath
"
$0
"
)
"
)
"
echo
"Downloading AlphaFold parameters..."
bash
"
${
SCRIPT_DIR
}
/download_alphafold_params.sh"
"
${
DOWNLOAD_DIR
}
"
echo
"Downloading BFD..."
bash
"
${
SCRIPT_DIR
}
/download_bfd.sh"
"
${
DOWNLOAD_DIR
}
"
if
[[
"
${
DOWNLOAD_MODE
}
"
=
full_dbs
]]
;
then
echo
"Downloading BFD..."
bash
"
${
SCRIPT_DIR
}
/download_bfd.sh"
"
${
DOWNLOAD_DIR
}
"
else
echo
"Downloading Small BFD..."
bash
"
${
SCRIPT_DIR
}
/download_small_bfd.sh"
"
${
DOWNLOAD_DIR
}
"
fi
echo
"Downloading MGnify..."
bash
"
${
SCRIPT_DIR
}
/download_mgnify.sh"
"
${
DOWNLOAD_DIR
}
"
...
...
scripts/download_alphafold_params.sh
View file @
0bab1bf8
...
...
@@ -26,7 +26,7 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
...
...
scripts/download_bfd.sh
View file @
0bab1bf8
...
...
@@ -26,11 +26,13 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/bfd"
# Mirror of:
# https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz.
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
...
...
scripts/download_mgnify.sh
View file @
0bab1bf8
...
...
@@ -26,12 +26,14 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/mgnify"
SOURCE_URL
=
"ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/mgy_clusters.fa.gz"
# Mirror of:
# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/mgy_clusters.fa.gz
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
mkdir
--parents
"
${
ROOT_DIR
}
"
...
...
scripts/download_pdb70.sh
View file @
0bab1bf8
...
...
@@ -26,7 +26,7 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
...
...
scripts/download_pdb_mmcif.sh
View file @
0bab1bf8
...
...
@@ -26,12 +26,12 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
if
!
command
-v
rsync &> /dev/null
;
then
echo
"Error: rsync could not be found. Please install rsync."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
...
...
scripts/download_small_bfd.sh
0 → 100755
View file @
0bab1bf8
#!/bin/bash
#
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Downloads and unzips the Small BFD database for AlphaFold.
#
# Usage: bash download_small_bfd.sh /path/to/download/directory
set
-e
if
[[
$#
-eq
0
]]
;
then
echo
"Error: download directory must be provided as an input argument."
exit
1
fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/small_bfd"
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
mkdir
--parents
"
${
ROOT_DIR
}
"
aria2c
"
${
SOURCE_URL
}
"
--dir
=
"
${
ROOT_DIR
}
"
pushd
"
${
ROOT_DIR
}
"
gunzip
"
${
ROOT_DIR
}
/
${
BASENAME
}
"
popd
scripts/download_uniclust30.sh
View file @
0bab1bf8
...
...
@@ -26,12 +26,14 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
ROOT_DIR
=
"
${
DOWNLOAD_DIR
}
/uniclust30"
SOURCE_URL
=
"http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/uniclust30_2018_08_hhsuite.tar.gz"
# Mirror of:
# http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/uniclust30_2018_08_hhsuite.tar.gz
SOURCE_URL
=
"https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz"
BASENAME
=
$(
basename
"
${
SOURCE_URL
}
"
)
mkdir
--parents
"
${
ROOT_DIR
}
"
...
...
scripts/download_uniref90.sh
View file @
0bab1bf8
...
...
@@ -26,7 +26,7 @@ fi
if
!
command
-v
aria2c &> /dev/null
;
then
echo
"Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit
1
fi
DOWNLOAD_DIR
=
"
$1
"
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment