Commit 0bab1bf8 authored by Saran Tunyasuvunakool's avatar Saran Tunyasuvunakool
Browse files

Add a Colab notebook, add reduced BFD, and various other fixes and improvements.

PiperOrigin-RevId: 386228948
parent d26287ea
......@@ -59,6 +59,8 @@ flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify '
'database for use by JackHMMER.')
flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD '
'database for use by HHblits.')
flags.DEFINE_string('small_bfd_database_path', None, 'Path to the small '
'version of BFD used with the "reduced_dbs" preset.')
flags.DEFINE_string('uniclust30_database_path', None, 'Path to the Uniclust30 '
'database for use by HHblits.')
flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 '
......@@ -70,9 +72,13 @@ flags.DEFINE_string('max_template_date', None, 'Maximum template release date '
flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a '
'mapping from obsolete PDB IDs to the PDB IDs of their '
'replacements.')
flags.DEFINE_enum('preset', 'full_dbs', ['full_dbs', 'casp14'],
'Choose preset model configuration - no ensembling '
'(full_dbs) or 8 model ensemblings (casp14).')
flags.DEFINE_enum('preset', 'full_dbs',
['reduced_dbs', 'full_dbs', 'casp14'],
'Choose preset model configuration - no ensembling and '
'smaller genetic database config (reduced_dbs), no '
'ensembling and full genetic database config (full_dbs) or '
'full genetic database config and 8 model ensemblings '
'(casp14).')
flags.DEFINE_boolean('benchmark', False, 'Run multiple JAX model evaluations '
'to obtain a timing that excludes the compilation time, '
'which should be more indicative of the time required for '
......@@ -92,6 +98,12 @@ RELAX_EXCLUDE_RESIDUES = []
RELAX_MAX_OUTER_ITERATIONS = 20
def _check_flag(flag_name: str, preset: str, should_be_set: bool):
if should_be_set != bool(FLAGS[flag_name].value):
verb = 'be' if should_be_set else 'not be'
raise ValueError(f'{flag_name} must {verb} set for preset "{preset}"')
def predict_structure(
fasta_path: str,
fasta_name: str,
......@@ -197,7 +209,15 @@ def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
if FLAGS.preset == 'full_dbs':
use_small_bfd = FLAGS.preset == 'reduced_dbs'
_check_flag('small_bfd_database_path', FLAGS.preset,
should_be_set=use_small_bfd)
_check_flag('bfd_database_path', FLAGS.preset,
should_be_set=not use_small_bfd)
_check_flag('uniclust30_database_path', FLAGS.preset,
should_be_set=not use_small_bfd)
if FLAGS.preset in ('reduced_dbs', 'full_dbs'):
num_ensemble = 1
elif FLAGS.preset == 'casp14':
num_ensemble = 8
......@@ -223,8 +243,10 @@ def main(argv):
mgnify_database_path=FLAGS.mgnify_database_path,
bfd_database_path=FLAGS.bfd_database_path,
uniclust30_database_path=FLAGS.uniclust30_database_path,
small_bfd_database_path=FLAGS.small_bfd_database_path,
pdb70_database_path=FLAGS.pdb70_database_path,
template_featurizer=template_featurizer)
template_featurizer=template_featurizer,
use_small_bfd=use_small_bfd)
model_runners = {}
for model_name in FLAGS.model_names:
......@@ -272,8 +294,6 @@ if __name__ == '__main__':
'preset',
'uniref90_database_path',
'mgnify_database_path',
'uniclust30_database_path',
'bfd_database_path',
'pdb70_database_path',
'template_mmcif_dir',
'max_template_date',
......
......@@ -24,24 +24,31 @@ if [[ $# -eq 0 ]]; then
exit 1
fi
if ! command -v rsync &> /dev/null ; then
echo "Error: rsync could not be found. Please install rsync."
exit
fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
DOWNLOAD_MODE="${2:-full_dbs}" # Default mode to full_dbs.
if [[ "${DOWNLOAD_MODE}" != full_dbs && "${DOWNLOAD_MODE}" != reduced_dbs ]]
then
echo "DOWNLOAD_MODE ${DOWNLOAD_MODE} not recognized."
exit 1
fi
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
echo "Downloading AlphaFold parameters..."
bash "${SCRIPT_DIR}/download_alphafold_params.sh" "${DOWNLOAD_DIR}"
echo "Downloading BFD..."
bash "${SCRIPT_DIR}/download_bfd.sh" "${DOWNLOAD_DIR}"
if [[ "${DOWNLOAD_MODE}" = full_dbs ]] ; then
echo "Downloading BFD..."
bash "${SCRIPT_DIR}/download_bfd.sh" "${DOWNLOAD_DIR}"
else
echo "Downloading Small BFD..."
bash "${SCRIPT_DIR}/download_small_bfd.sh" "${DOWNLOAD_DIR}"
fi
echo "Downloading MGnify..."
bash "${SCRIPT_DIR}/download_mgnify.sh" "${DOWNLOAD_DIR}"
......
......@@ -26,7 +26,7 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
......
......@@ -26,11 +26,13 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/bfd"
# Mirror of:
# https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz.
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz"
BASENAME=$(basename "${SOURCE_URL}")
......
......@@ -26,12 +26,14 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/mgnify"
SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/mgy_clusters.fa.gz"
# Mirror of:
# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/mgy_clusters.fa.gz
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
......
......@@ -26,7 +26,7 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
......
......@@ -26,12 +26,12 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
if ! command -v rsync &> /dev/null ; then
echo "Error: rsync could not be found. Please install rsync."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
......
#!/bin/bash
#
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Downloads and unzips the Small BFD database for AlphaFold.
#
# Usage: bash download_small_bfd.sh /path/to/download/directory
set -e
if [[ $# -eq 0 ]]; then
echo "Error: download directory must be provided as an input argument."
exit 1
fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit 1
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/small_bfd"
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
pushd "${ROOT_DIR}"
gunzip "${ROOT_DIR}/${BASENAME}"
popd
......@@ -26,12 +26,14 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/uniclust30"
SOURCE_URL="http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/uniclust30_2018_08_hhsuite.tar.gz"
# Mirror of:
# http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/uniclust30_2018_08_hhsuite.tar.gz
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
......
......@@ -26,7 +26,7 @@ fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit
exit 1
fi
DOWNLOAD_DIR="$1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment