Commit fb341b17 authored by Gustaf Ahdritz's avatar Gustaf Ahdritz
Browse files

Merge

parents f30d77b7 0067da9f
...@@ -61,7 +61,7 @@ To install the HH-suite to `/usr/bin`, run ...@@ -61,7 +61,7 @@ To install the HH-suite to `/usr/bin`, run
To download DeepMind's pretrained parameters and common ground truth data, run: To download DeepMind's pretrained parameters and common ground truth data, run:
```bash ```bash
scripts/download_data.sh data/ bash scripts/download_data.sh data/
``` ```
You have two choices for downloading protein databases, depending on whether You have two choices for downloading protein databases, depending on whether
...@@ -70,14 +70,14 @@ you want to use DeepMind's MSA generation pipeline (w/ HMMR & HHblits) or ...@@ -70,14 +70,14 @@ you want to use DeepMind's MSA generation pipeline (w/ HMMR & HHblits) or
MMseqs2 instead. For the former, run: MMseqs2 instead. For the former, run:
```bash ```bash
scripts/download_alphafold_dbs.sh data/ bash scripts/download_alphafold_dbs.sh data/
``` ```
For the latter, run: For the latter, run:
```bash ```bash
scripts/download_mmseqs_databases.sh data/ # downloads .tar files bash scripts/download_mmseqs_dbs.sh data/ # downloads .tar files
scripts/prep_mmseqs_databases.sh data/ # unpacks and preps the databases bash scripts/prep_mmseqs_dbs.sh data/ # unpacks and preps the databases
``` ```
Make sure to run the latter command on the machine that will be used for MSA Make sure to run the latter command on the machine that will be used for MSA
......
name: openfold_venv
channels:
- conda-forge
- bioconda
- pytorch
dependencies:
- pip:
- biopython==1.79
- deepspeed==0.5.3
- dm-tree==0.1.6
- ml-collections==0.1.0
- numpy==1.21.2
- PyYAML==5.4.1
- requests==2.26.0
- scipy==1.7.1
- tqdm==4.62.2
- typing-extensions==3.10.0.2
- pytorch_lightning==1.5.0
- nvidia-pyindex
- nvidia-dllogger
- pytorch::pytorch=1.10.*
- conda-forge::python=3.7
- conda-forge::setuptools=59.5.0
- conda-forge::pip
- conda-forge::openmm=7.5.1
- conda-forge::pdbfixer
- bioconda::aria2
- bioconda::hmmer==3.3.2
- bioconda::hhsuite==3.3.0
- bioconda::kalign2==2.04
...@@ -370,7 +370,7 @@ class OpenFoldDataLoader(torch.utils.data.DataLoader): ...@@ -370,7 +370,7 @@ class OpenFoldDataLoader(torch.utils.data.DataLoader):
keyed_probs.append( keyed_probs.append(
("use_clamped_fape", [1 - clamp_prob, clamp_prob]) ("use_clamped_fape", [1 - clamp_prob, clamp_prob])
) )
if(stage_cfg.uniform_recycling): if(stage_cfg.uniform_recycling):
recycling_probs = [ recycling_probs = [
1. / (max_iters + 1) for _ in range(max_iters + 1) 1. / (max_iters + 1) for _ in range(max_iters + 1)
...@@ -380,7 +380,7 @@ class OpenFoldDataLoader(torch.utils.data.DataLoader): ...@@ -380,7 +380,7 @@ class OpenFoldDataLoader(torch.utils.data.DataLoader):
0. for _ in range(max_iters + 1) 0. for _ in range(max_iters + 1)
] ]
recycling_probs[-1] = 1. recycling_probs[-1] = 1.
keyed_probs.append( keyed_probs.append(
("no_recycling_iters", recycling_probs) ("no_recycling_iters", recycling_probs)
) )
......
...@@ -1574,4 +1574,10 @@ class AlphaFoldLoss(nn.Module): ...@@ -1574,4 +1574,10 @@ class AlphaFoldLoss(nn.Module):
crop_len = batch["aatype"].shape[-1] crop_len = batch["aatype"].shape[-1]
cum_loss = cum_loss * torch.sqrt(min(seq_len, crop_len)) cum_loss = cum_loss * torch.sqrt(min(seq_len, crop_len))
# Scale the loss by the square root of the minimum of the crop size and
# the (average) sequence length. See subsection 1.9.
seq_len = torch.mean(batch["seq_length"].float())
crop_len = batch["aatype"].shape[-1]
cum_loss = cum_loss * torch.sqrt(min(seq_len, crop_len))
return cum_loss return cum_loss
biopython==1.79
deepspeed==0.5.3
dm-tree==0.1.6
ml-collections==0.1.0
numpy==1.21.2
PyYAML==5.4.1
requests==2.26.0
scipy==1.7.1
torch==1.10.0
tqdm==4.62.2
typing-extensions==3.10.0.2
pytorch_lightning==1.5.0
git+git://github.com/NVIDIA/dllogger.git
#!/bin/bash #!/bin/bash
CONDA_INSTALL_URL=${CONDA_INSTALL_URL:-"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"}
source scripts/vars.sh source scripts/vars.sh
# Install Miniconda locally # Install Miniconda locally
rm -rf lib/conda rm -rf lib/conda
rm -f /tmp/Miniconda3-latest-Linux-x86_64.sh rm -f /tmp/Miniconda3-latest-Linux-x86_64.sh
wget -q -P /tmp \ wget -P /tmp \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ "${CONDA_INSTALL_URL}" \
&& bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p lib/conda \ && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p lib/conda \
&& rm /tmp/Miniconda3-latest-Linux-x86_64.sh && rm /tmp/Miniconda3-latest-Linux-x86_64.sh
# Grab conda-only packages # Grab conda-only packages
PATH=lib/conda/bin:$PATH export PATH=lib/conda/bin:$PATH
conda update -qy conda \ conda env create --name=${ENV_NAME} -f environment.yml
&& conda create --name $ENV_NAME -y python==3.7 \ source activate ${ENV_NAME}
&& source lib/conda/etc/profile.d/conda.sh \
&& conda activate $ENV_NAME \
&& pip install -r requirements.txt \
&& conda install -qy -c conda-forge \
openmm=7.5.1 \
pdbfixer
# Comment out if you have these already installed on your system, for example in /usr/bin/
conda install -c bioconda aria2
conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04
pip install nvidia-pyindex
pip install nvidia-dllogger
# Install DeepMind's OpenMM patch # Install DeepMind's OpenMM patch
OPENFOLD_DIR=$PWD OPENFOLD_DIR=$PWD
......
...@@ -97,7 +97,7 @@ class OpenFoldWrapper(pl.LightningModule): ...@@ -97,7 +97,7 @@ class OpenFoldWrapper(pl.LightningModule):
def configure_optimizers(self, def configure_optimizers(self,
learning_rate: float = 1e-3, learning_rate: float = 1e-3,
eps: float = 1e-5 eps: float = 1e-5,
) -> torch.optim.Adam: ) -> torch.optim.Adam:
# Ignored as long as a DeepSpeed optimizer is configured # Ignored as long as a DeepSpeed optimizer is configured
return torch.optim.Adam( return torch.optim.Adam(
...@@ -293,6 +293,11 @@ if __name__ == "__main__": ...@@ -293,6 +293,11 @@ if __name__ == "__main__":
"--distillation_mapping_path", type=str, default=None, "--distillation_mapping_path", type=str, default=None,
help="""See --train_mapping_path""" help="""See --train_mapping_path"""
) )
parser.add_argument(
"--obsolete_pdbs_file_path", type=str, default=None,
help="""Path to obsolete.dat file containing list of obsolete PDBs and
their replacements."""
)
parser.add_argument( parser.add_argument(
"--template_release_dates_cache_path", type=str, default=None, "--template_release_dates_cache_path", type=str, default=None,
help="""Output of scripts/generate_mmcif_cache.py run on template mmCIF help="""Output of scripts/generate_mmcif_cache.py run on template mmCIF
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment