Unverified Commit 3bec3e9b authored by Jennifer Wei's avatar Jennifer Wei Committed by GitHub
Browse files

Merge pull request #438 from jnwei/pl_upgrades

Upgrades pl_upgrades to match main branch changes.
parents 49ab0539 c07075cf
This diff is collapsed.
name: openfold-venv
name: openfold-env
channels:
- conda-forge
- bioconda
- pytorch
- nvidia
dependencies:
- python=3.9
- python=3.10
- libgcc=7.2
- setuptools=59.5.0
- pip
- openmm=7.7
- pdbfixer
- pytorch-lightning
- biopython==1.79
- biopython
- numpy
- pandas
- PyYAML==5.4.1
......@@ -24,11 +24,12 @@ dependencies:
- modelcif==0.7
- awscli
- ml-collections
- mkl=2022.1
- aria2
- git
- bioconda::hmmer==3.3.2
- bioconda::hhsuite==3.3.0
- bioconda::kalign2==2.04
- bioconda::hmmer
- bioconda::hhsuite
- bioconda::kalign2
- pytorch::pytorch=2.1
- pytorch::pytorch-cuda=12.1
- pip:
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
>6KWC_1
GSTIQPGTGYNNGYFYSYWNDGHGGVTYTNGPGGQFSVNWSNSGEFVGGKGWQPGTKNKVINFSGSYNPNGNSYLSVYGWSRNPLIEYYIVENFGTYNPSTGATKLGEVTSDGSVYDIYRTQRVNQPSIIGTATFYQYWSVRRNHRSSGSVNTANHFNAWAQQGLTLGTMDYQIVAVQGYFSSGSASITVS
#!/bin/bash
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$CONDA_PREFIX/lib:$LIBRARY_PATH
export FASTA_DIR=./fasta_dir
export OUTPUT_DIR=./
export PRECOMPUTED_ALIGNMENT_DIR=./alignments
export MMCIF_DIR=/mmcifs # UPDATE with path to your mmcifs directory
python3 run_pretrained_openfold.py $FASTA_DIR \
$MMCIF_DIR \
--output_dir $OUTPUT_DIR \
--config_preset model_1_ptm \
--model_device "cuda:0" \
--data_random_seed 42 \
--use_precomputed_alignments $PRECOMPUTED_ALIGNMENT_DIR
This diff is collapsed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github"
},
"source": [
"<a href=\"https://colab.research.google.com/github/aqlaboratory/openfold/blob/main/notebooks/OpenFold.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
......@@ -136,7 +127,7 @@
"\n",
" %shell mkdir -p /content/openfold/openfold/resources\n",
"\n",
" commit = \"e2e19f16676b1a409f9ba3a6f69b11ee7f5887c2\"\n",
" commit = \"a96ffd67f8c96f8c4decc3abdd2cffbb57fc5764\"\n",
" os.system(f\"pip install -q git+https://github.com/aqlaboratory/openfold.git@{commit}\")\n",
"\n",
" os.system(f\"cp -f -p /content/stereo_chemical_props.txt /usr/local/lib/python{python_version}/site-packages/openfold/resources/\")\n",
......@@ -259,7 +250,7 @@
"from openfold.np import protein\n",
"from openfold.np.relax import relax\n",
"from openfold.np.relax.utils import overwrite_b_factors\n",
"from openfold.utils.import_weights import import_jax_weights_\n",
"from openfold.utils.import_weights import import_jax_weights_, import_openfold_weights_\n",
"from openfold.utils.tensor_utils import tensor_tree_map\n",
"\n",
"from IPython import display\n",
......@@ -582,7 +573,7 @@
" model_name,\n",
" )\n",
" d = torch.load(params_name)\n",
" openfold_model.load_state_dict(d)\n",
" import_openfold_weights_(model=openfold_model, state_dict=d)\n",
" else:\n",
" raise ValueError(f\"Invalid weight set: {weight_set}\")\n",
"\n",
......
......@@ -62,7 +62,8 @@ def model_config(
name,
train=False,
low_prec=False,
long_sequence_inference=False
long_sequence_inference=False,
use_deepspeed_evoformer_attention=False,
):
c = copy.deepcopy(config)
# TRAINING PRESETS
......@@ -237,6 +238,9 @@ def model_config(
c.model.extra_msa.extra_msa_stack.tune_chunk_size = False
c.model.evoformer_stack.tune_chunk_size = False
if use_deepspeed_evoformer_attention:
c.globals.use_deepspeed_evo_attention = True
if train:
c.globals.blocks_per_ckpt = 1
c.globals.chunk_size = None
......
......@@ -1053,7 +1053,6 @@ class OpenFoldDataModule(pl.LightningDataModule):
def val_dataloader(self):
if self.eval_dataset is not None:
return self._gen_dataloader("eval")
# Temp fix to pass the validation step
return []
def predict_dataloader(self):
......
......@@ -24,7 +24,7 @@ import os
from typing import Any, Mapping, Optional, Sequence, Tuple
from Bio import PDB
from Bio.Data import SCOPData
from Bio.Data import PDBData
import numpy as np
from openfold.data.errors import MultipleChainsError
......@@ -283,7 +283,7 @@ def parse(
author_chain = mmcif_to_author_chain_id[chain_id]
seq = []
for monomer in seq_info:
code = SCOPData.protein_letters_3to1.get(monomer.id, "X")
code = PDBData.protein_letters_3to1.get(monomer.id, "X")
seq.append(code if len(code) == 1 else "X")
seq = "".join(seq)
author_chain_to_sequence[author_chain] = seq
......@@ -347,6 +347,7 @@ def _get_header(parsed_info: MmCIFDict) -> PdbHeader:
try:
raw_resolution = parsed_info[res_key][0]
header["resolution"] = float(raw_resolution)
break
except ValueError:
logging.debug(
"Invalid resolution format: %s", parsed_info[res_key]
......
......@@ -14,6 +14,7 @@
# limitations under the License.
import re
import logging
from enum import Enum
from dataclasses import dataclass
from functools import partial
......@@ -681,15 +682,18 @@ def convert_deprecated_v1_keys(state_dict):
}
convert_key_re = re.compile("(%s)" % "|".join(map(re.escape, replacements.keys())))
template_emb_re = re.compile(r"^((module\.)?(model\.)?)(template(?!_embedder).*)")
converted_state_dict = {}
for key, value in state_dict.items():
# For each match, look-up replacement value in the dictionary
new_key = convert_key_re.sub(lambda m: replacements[m.group()], key)
new_key = convert_key_re.sub(lambda m: replacements[m.group(1)], key)
# Add prefix for template modules
if new_key.startswith('template'):
new_key = f'template_embedder.{new_key}'
# Add prefix for template layers
template_match = re.match(template_emb_re, new_key)
if template_match:
prefix = template_match.group(1)
new_key = f'{prefix if prefix else ""}template_embedder.{template_match.group(4)}'
converted_state_dict[new_key] = value
......
This diff is collapsed.
import os
import logging
import random
import numpy as np
from pytorch_lightning import seed_everything
from openfold.utils.suppress_output import SuppressLogging
def seed_globally(seed=None):
if("PL_GLOBAL_SEED" not in os.environ):
if(seed is None):
seed = random.randint(0, np.iinfo(np.uint32).max)
os.environ["PL_GLOBAL_SEED"] = str(seed)
logging.info(f'os.environ["PL_GLOBAL_SEED"] set to {seed}')
# seed_everything is a bit log-happy
with SuppressLogging(logging.INFO):
seed_everything(seed=None)
......@@ -35,10 +35,10 @@ def _superimpose_np(reference, coords):
def _superimpose_single(reference, coords):
reference_np = reference.detach().cpu().numpy()
coords_np = coords.detach().cpu().numpy()
superimposed, rmsd = _superimpose_np(reference_np, coords_np)
return coords.new_tensor(superimposed), coords.new_tensor(rmsd)
reference_np = reference.detach().to(torch.float).cpu().numpy()
coords_np = coords.detach().to(torch.float).cpu().numpy()
superimposed, rmsd = _superimpose_np(reference_np, coords_np)
return coords.new_tensor(superimposed), coords.new_tensor(rmsd)
def superimpose(reference, coords, mask):
......
import logging
import sys
class SuppressStdout:
def __enter__(self):
self.stdout = sys.stdout
dev_null = open("/dev/null", "w")
sys.stdout = dev_null
def __exit__(self, typ, value, traceback):
fp = sys.stdout
sys.stdout = self.stdout
fp.close()
class SuppressLogging:
def __init__(self, level):
self.level = level
def __enter__(self):
logging.disable(self.level)
def __exit__(self, typ, value, traceback):
logging.disable(logging.NOTSET)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment