Merge branch 'dtk24.04.1'

15cd3506 · mashun1 · 24e633dc · 19085464 · 15cd3506 · 15cd3506
Commit 15cd3506 authored Aug 29, 2024 by mashun1
20 changed files
--- a/alphafold/notebooks/notebook_utils_test.py
+++ b/alphafold/notebooks/notebook_utils_test.py
@@ -184,13 +184,6 @@ class NotebookUtilsTest(parameterized.TestCase):
        [np.array([], dtype=templates.TEMPLATE_FEATURES[feat_name]).dtype
         for feat_name in template_features])

-  def test_get_pae_json(self):
-    pae = np.array([[0.01, 13.12345], [20.0987, 0.0]])
-    pae_json = notebook_utils.get_pae_json(pae=pae, max_pae=31.75)
-    self.assertEqual(
-        pae_json, '[{"predicted_aligned_error":[[0.0,13.1],[20.1,0.0]],'
-        '"max_predicted_aligned_error":31.75}]')
-
  def test_check_cell_execution_order_correct(self):
    notebook_utils.check_cell_execution_order({1, 2}, 3)


--- a/alphafold/relax/amber_minimize.py
+++ b/alphafold/relax/amber_minimize.py
@@ -27,19 +27,10 @@ from alphafold.relax import utils
 import ml_collections
 import numpy as np
 import jax
-try:
-  # openmm >= 7.6
-  import openmm
-  from openmm import unit
-  from openmm import app as openmm_app
-  from openmm.app.internal.pdbstructure import PdbStructure
-except ImportError:
-  # openmm < 7.6
-  from simtk import openmm
-  from simtk import unit
-  from simtk.openmm import app as openmm_app
-  from simtk.openmm.app.internal.pdbstructure import PdbStructure
-
+import openmm
+from openmm import unit
+from openmm import app as openmm_app
+from openmm.app.internal.pdbstructure import PdbStructure


 ENERGY = unit.kilocalories_per_mole
@@ -101,7 +92,7 @@ def _openmm_minimize(
    _add_restraints(system, pdb, stiffness, restraint_set, exclude_residues)

  integrator = openmm.LangevinIntegrator(0, 0.01, 0.0)
-  platform = openmm.Platform.getPlatformByName("HIP" if use_gpu else "CPU")
+  platform = openmm.Platform.getPlatformByName("CUDA" if use_gpu else "CPU")
  simulation = openmm_app.Simulation(
      pdb.topology, system, integrator, platform)
  simulation.context.setPositions(pdb.positions)
@@ -497,7 +488,7 @@ def run_pipeline(
    else:
      pdb_string = ret["min_pdb"]
    # Calculation of violations can cause CUDA errors for some JAX versions.
-    with jax.default_device(jax.devices("cpu")[0]):
+    with jax.default_device(jax.local_devices(backend="cpu")[0]):
      ret.update(get_violation_metrics(prot))
    ret.update({
        "num_exclusions": len(exclude_residues),

--- a/alphafold/relax/cleanup.py
+++ b/alphafold/relax/cleanup.py
@@ -20,8 +20,8 @@ cases like removing chains of length one (see clean_structure).
 import io

 import pdbfixer
-from simtk.openmm import app
-from simtk.openmm.app import element
+from openmm import app
+from openmm.app import element


 def fix_pdb(pdbfile, alterations_info):

--- a/alphafold/relax/cleanup_test.py
+++ b/alphafold/relax/cleanup_test.py
@@ -17,7 +17,7 @@ import io

 from absl.testing import absltest
 from alphafold.relax import cleanup
-from simtk.openmm.app.internal import pdbstructure
+from openmm.app.internal import pdbstructure


 def _pdb_to_structure(pdb_str):

--- a/alphafold/version.py
+++ b/alphafold/version.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Single source of truth for the AlphaFold version."""
+
+__version__ = '2.3.2'
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-ARG CUDA=11.1.1
-FROM nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu18.04
+ARG CUDA=12.2.2
+FROM nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu20.04
 # FROM directive resets ARGS, so we specify again (the value is retained if
 # previously set).
 ARG CUDA
@@ -53,14 +53,11 @@ RUN wget -q -P /tmp \

 # Install conda packages.
 ENV PATH="/opt/conda/bin:$PATH"
-RUN conda install -qy conda==4.13.0 \
-    && conda install -y -c conda-forge \
-      openmm=7.5.1 \
-      cudatoolkit==${CUDA_VERSION} \
-      pdbfixer \
-      pip \
-      python=3.8 \
-      && conda clean --all --force-pkgs-dirs --yes
+ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
+RUN conda install -qy conda==24.1.2 pip python=3.11 \
+    && conda install -y -c nvidia cuda=${CUDA_VERSION} \
+    && conda install -y -c conda-forge openmm=8.0.0 pdbfixer \
+    && conda clean --all --force-pkgs-dirs --yes

 COPY . /app/alphafold
 RUN wget -q -P /app/alphafold/alphafold/common/ \
@@ -70,17 +67,16 @@ RUN wget -q -P /app/alphafold/alphafold/common/ \
 RUN pip3 install --upgrade pip --no-cache-dir \
    && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \
    && pip3 install --upgrade --no-cache-dir \
-      jax==0.3.25 \
-      jaxlib==0.3.25+cuda11.cudnn805 \
+      jax==0.4.26 \
+      jaxlib==0.4.26+cuda12.cudnn89 \
      -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html

-# Apply OpenMM patch.
-WORKDIR /opt/conda/lib/python3.8/site-packages
-RUN patch -p0 < /app/alphafold/docker/openmm.patch
-
 # Add SETUID bit to the ldconfig binary so that non-root users can run it.
 RUN chmod u+s /sbin/ldconfig.real

+# Currently needed to avoid undefined_symbol error.
+RUN ln -sf /usr/lib/x86_64-linux-gnu/libffi.so.7 /opt/conda/lib/libffi.so.7
+
 # We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk
 # with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for
 # details.

--- a/image.png
+++ b/image.png
--- a/notebooks/AlphaFold.ipynb
+++ b/notebooks/AlphaFold.ipynb
@@ -8,11 +8,11 @@
      "source": [
        "# AlphaFold Colab\n",
        "\n",
-        "This Colab notebook allows you to easily predict the structure of a protein using a slightly simplified version of [AlphaFold v2.3.1](https://doi.org/10.1038/s41586-021-03819-2). \n",
+        "This Colab notebook allows you to easily predict the structure of a protein using a slightly simplified version of [AlphaFold v2.3.2](https://doi.org/10.1038/s41586-021-03819-2). \n",
        "\n",
-        "**Differences to AlphaFold v2.3.1**\n",
+        "**Differences to AlphaFold v2.3.2**\n",
        "\n",
-        "In comparison to AlphaFold v2.3.1, this Colab notebook uses **no templates (homologous structures)** and a selected portion of the [BFD database](https://bfd.mmseqs.com/). We have validated these changes on several thousand recent PDB structures. While accuracy will be near-identical to the full AlphaFold system on many targets, a small fraction have a large drop in accuracy due to the smaller MSA and lack of templates. For best reliability, we recommend instead using the [full open source AlphaFold](https://github.com/deepmind/alphafold/), or the [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).\n",
+        "In comparison to AlphaFold v2.3.2, this Colab notebook uses **no templates (homologous structures)** and a selected portion of the [BFD database](https://bfd.mmseqs.com/). We have validated these changes on several thousand recent PDB structures. While accuracy will be near-identical to the full AlphaFold system on many targets, a small fraction have a large drop in accuracy due to the smaller MSA and lack of templates. For best reliability, we recommend instead using the [full open source AlphaFold](https://github.com/deepmind/alphafold/), or the [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).\n",
        "\n",
        "**This Colab has a small drop in average accuracy for multimers compared to local AlphaFold installation, for full multimer accuracy it is highly recommended to run [AlphaFold locally](https://github.com/deepmind/alphafold#running-alphafold).** Moreover, the AlphaFold-Multimer requires searching for MSA for every unique sequence in the complex, hence it is substantially slower. If your notebook times-out due to slow multimer MSA search, we recommend either using Colab Pro or running AlphaFold locally.\n",
        "\n",
@@ -90,7 +90,7 @@
        "  with tqdm.notebook.tqdm(total=100, bar_format=TQDM_BAR_FORMAT) as pbar:\n",
        "    with io.capture_output() as captured:\n",
        "      # Uninstall default Colab version of TF.\n",
-        "      %shell pip uninstall -y tensorflow\n",
+        "      %shell pip uninstall -y tensorflow keras\n",
        "\n",
        "      %shell sudo apt install --quiet --yes hmmer\n",
        "      pbar.update(6)\n",
@@ -109,10 +109,10 @@
        "\n",
        "      PATH=%env PATH\n",
        "      %env PATH=/opt/conda/bin:{PATH}\n",
-        "      %shell conda install -qy conda==4.13.0 \\\n",
+        "      %shell conda install -qy conda==24.1.2 \\\n",
        "          \u0026\u0026 conda install -qy -c conda-forge \\\n",
-        "            python=3.9 \\\n",
-        "            openmm=7.5.1 \\\n",
+        "            python=3.10 \\\n",
+        "            openmm=8.0.0 \\\n",
        "            pdbfixer\n",
        "      pbar.update(80)\n",
        "\n",
@@ -160,19 +160,14 @@
        "      %shell pip3 install -r ./alphafold/requirements.txt\n",
        "      # Run setup.py to install only AlphaFold.\n",
        "      %shell pip3 install --no-dependencies ./alphafold\n",
-        "      %shell pip3 install --upgrade pyopenssl\n",
+        "      %shell pip3 install pyopenssl==22.0.0\n",
        "      pbar.update(10)\n",
        "\n",
-        "      # Apply OpenMM patch.\n",
-        "      %shell pushd /opt/conda/lib/python3.9/site-packages/ \u0026\u0026 \\\n",
-        "          patch -p0 \u003c /content/alphafold/docker/openmm.patch \u0026\u0026 \\\n",
-        "          popd\n",
-        "\n",
        "      # Make sure stereo_chemical_props.txt is in all locations where it could be searched for.\n",
        "      %shell mkdir -p /content/alphafold/alphafold/common\n",
        "      %shell cp -f /content/stereo_chemical_props.txt /content/alphafold/alphafold/common\n",
-        "      %shell mkdir -p /opt/conda/lib/python3.9/site-packages/alphafold/common/\n",
-        "      %shell cp -f /content/stereo_chemical_props.txt /opt/conda/lib/python3.9/site-packages/alphafold/common/\n",
+        "      %shell mkdir -p /opt/conda/lib/python3.10/site-packages/alphafold/common/\n",
+        "      %shell cp -f /content/stereo_chemical_props.txt /opt/conda/lib/python3.10/site-packages/alphafold/common/\n",
        "\n",
        "      # Load parameters\n",
        "      %shell mkdir --parents \"{PARAMS_DIR}\"\n",
@@ -197,7 +192,7 @@
        "\n",
        "# Make sure everything we need is on the path.\n",
        "import sys\n",
-        "sys.path.append('/opt/conda/lib/python3.9/site-packages')\n",
+        "sys.path.append('/opt/conda/lib/python3.10/site-packages')\n",
        "sys.path.append('/content/alphafold')\n",
        "\n",
        "executed_cells.add(2)"
@@ -374,6 +369,7 @@
        "from alphafold.data import pipeline_multimer\n",
        "from alphafold.data.tools import jackhmmer\n",
        "\n",
+        "from alphafold.common import confidence\n",
        "from alphafold.common import protein\n",
        "\n",
        "from alphafold.relax import relax\n",
@@ -786,7 +782,7 @@
        "pae_output_path = os.path.join(output_dir, 'predicted_aligned_error.json')\n",
        "if pae_outputs:\n",
        "  # Save predicted aligned error in the same format as the AF EMBL DB.\n",
-        "  pae_data = notebook_utils.get_pae_json(pae=pae, max_pae=max_pae.item())\n",
+        "  pae_data = confidence.pae_json(pae=pae, max_pae=max_pae.item())\n",
        "  with open(pae_output_path, 'w') as f:\n",
        "    f.write(pae_data)\n",
        "\n",

--- a/readme_imgs/alphafold2.png
+++ b/readme_imgs/alphafold2.png
--- a/readme_imgs/alphafold2_1.png
+++ b/readme_imgs/alphafold2_1.png
--- a/requirements.txt
+++ b/requirements.txt
 absl-py==1.0.0
 biopython==1.79
-chex==0.0.7
-dm-haiku==0.0.9
-dm-tree==0.1.6
-# docker==5.0.0
+chex==0.1.86
+dm-haiku==0.0.12
+dm-tree==0.1.8
+docker==5.0.0
 immutabledict==2.0.0
-# jax==0.3.25
+jax==0.4.26
 ml-collections==0.1.0
-numpy==1.21.6
-pandas==1.3.4
-scipy==1.7.0
-# tensorflow-cpu==2.11.0
+numpy==1.24.3
+pandas==2.0.3
+scipy==1.11.1
+tensorflow-cpu==2.16.1
--- a/requirements_dcu.txt
+++ b/requirements_dcu.txt
+absl-py==1.0.0
+biopython==1.79
+chex==0.1.86
+dm-tree==0.1.8
+docker==5.0.0
+immutabledict==2.0.0
+ml-collections==0.1.0
+numpy==1.24.3
+pandas==2.0.3
+scipy==1.11.1
+tensorflow-cpu==2.16.1
+matplotlib
+cython
\ No newline at end of file
--- a/run_alphafold.py
+++ b/run_alphafold.py
@@ -22,11 +22,12 @@ import random
 import shutil
 import sys
 import time
-from typing import Any, Dict, Mapping, Union
+from typing import Any, Dict, Union

 from absl import app
 from absl import flags
 from absl import logging
+from alphafold.common import confidence
 from alphafold.common import protein
 from alphafold.common import residue_constants
 from alphafold.data import pipeline
@@ -60,7 +61,6 @@ flags.DEFINE_list(
    'basename is used to name the output directories for each prediction.')

 flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.')
-flags.DEFINE_list('model_names', None, 'Names of models to use.')
 flags.DEFINE_string('output_dir', None, 'Path to a directory that will '
                    'store the results.')
 flags.DEFINE_string('jackhmmer_binary_path', shutil.which('jackhmmer'),
@@ -172,6 +172,63 @@ def _jnp_to_np(output: Dict[str, Any]) -> Dict[str, Any]:
  return output


+def _save_confidence_json_file(
+    plddt: np.ndarray, output_dir: str, model_name: str
+) -> None:
+  confidence_json = confidence.confidence_json(plddt)
+
+  # Save the confidence json.
+  confidence_json_output_path = os.path.join(
+      output_dir, f'confidence_{model_name}.json'
+  )
+  with open(confidence_json_output_path, 'w') as f:
+    f.write(confidence_json)
+
+
+def _save_mmcif_file(
+    prot: protein.Protein,
+    output_dir: str,
+    model_name: str,
+    file_id: str,
+    model_type: str,
+) -> None:
+  """Crate mmCIF string and save to a file.
+
+  Args:
+    prot: Protein object.
+    output_dir: Directory to which files are saved.
+    model_name: Name of a model.
+    file_id: The file ID (usually the PDB ID) to be used in the mmCIF.
+    model_type: Monomer or multimer.
+  """
+
+  mmcif_string = protein.to_mmcif(prot, file_id, model_type)
+
+  # Save the MMCIF.
+  mmcif_output_path = os.path.join(output_dir, f'{model_name}.cif')
+  with open(mmcif_output_path, 'w') as f:
+    f.write(mmcif_string)
+
+
+def _save_pae_json_file(
+    pae: np.ndarray, max_pae: float, output_dir: str, model_name: str
+) -> None:
+  """Check prediction result for PAE data and save to a JSON file if present.
+
+  Args:
+    pae: The n_res x n_res PAE array.
+    max_pae: The maximum possible PAE value.
+    output_dir: Directory to which files are saved.
+    model_name: Name of a model.
+  """
+  pae_json = confidence.pae_json(pae, max_pae)
+
+  # Save the PAE json.
+  pae_json_output_path = os.path.join(output_dir, f'pae_{model_name}.json')
+  with open(pae_json_output_path, 'w') as f:
+    f.write(pae_json)
+
+
 def predict_structure(
    fasta_path: str,
    fasta_name: str,
@@ -181,7 +238,10 @@ def predict_structure(
    amber_relaxer: relax.AmberRelaxation,
    benchmark: bool,
    random_seed: int,
-    models_to_relax: ModelsToRelax):
+    models_to_relax: ModelsToRelax,
+    model_type: str,
+):
+  
  """Predicts structure using AlphaFold for the given sequence."""
  logging.info('Predicting %s', fasta_name)
  timings = {}
@@ -194,11 +254,6 @@ def predict_structure(

  # Get features.
  t_0 = time.time()
-  # features_output_path = os.path.join(output_dir, 'features.pkl')
-  # if os.path.exists(features_output_path):
-  #   feature_dict = pickle.load(open(features_output_path, 'rb'))
-  
-  # else:
  feature_dict = data_pipeline.process(
      input_fasta_path=fasta_path,
      msa_output_dir=msa_output_dir)
@@ -219,6 +274,7 @@ def predict_structure(
  num_models = len(model_runners)
  for model_index, (model_name, model_runner) in enumerate(
      model_runners.items()):
+    
    logging.info('Running model %s on %s', model_name, fasta_name)
    t_0 = time.time()
    model_random_seed = model_index + random_seed * num_models
@@ -246,8 +302,17 @@ def predict_structure(
          model_name, fasta_name, t_diff)

    plddt = prediction_result['plddt']
+    _save_confidence_json_file(plddt, output_dir, model_name)
    ranking_confidences[model_name] = prediction_result['ranking_confidence']

+    if (
+        'predicted_aligned_error' in prediction_result
+        and 'max_predicted_aligned_error' in prediction_result
+    ):
+      pae = prediction_result['predicted_aligned_error']
+      max_pae = prediction_result['max_predicted_aligned_error']
+      _save_pae_json_file(pae, float(max_pae), output_dir, model_name)
+
    # Remove jax dependency from results.
    np_prediction_result = _jnp_to_np(dict(prediction_result))

@@ -272,6 +337,14 @@ def predict_structure(
    with open(unrelaxed_pdb_path, 'w') as f:
      f.write(unrelaxed_pdbs[model_name])

+    _save_mmcif_file(
+        prot=unrelaxed_protein,
+        output_dir=output_dir,
+        model_name=f'unrelaxed_{model_name}',
+        file_id=str(model_index),
+        model_type=model_type,
+    )
+
  # Rank by model confidence.
  ranked_order = [
      model_name for model_name, confidence in
@@ -303,6 +376,15 @@ def predict_structure(
    with open(relaxed_output_path, 'w') as f:
      f.write(relaxed_pdb_str)

+    relaxed_protein = protein.from_pdb_string(relaxed_pdb_str)
+    _save_mmcif_file(
+        prot=relaxed_protein,
+        output_dir=output_dir,
+        model_name=f'relaxed_{model_name}',
+        file_id='0',
+        model_type=model_type,
+    )
+
  # Write out relaxed PDBs in rank order.
  for idx, model_name in enumerate(ranked_order):
    ranked_output_path = os.path.join(output_dir, f'ranked_{idx}.pdb')
@@ -312,6 +394,19 @@ def predict_structure(
      else:
        f.write(unrelaxed_pdbs[model_name])

+    if model_name in relaxed_pdbs:
+      protein_instance = protein.from_pdb_string(relaxed_pdbs[model_name])
+    else:
+      protein_instance = protein.from_pdb_string(unrelaxed_pdbs[model_name])
+
+    _save_mmcif_file(
+        prot=protein_instance,
+        output_dir=output_dir,
+        model_name=f'ranked_{idx}',
+        file_id=str(idx),
+        model_type=model_type,
+    )
+
  ranking_output_path = os.path.join(output_dir, 'ranking_debug.json')
  with open(ranking_output_path, 'w') as f:
    label = 'iptm+ptm' if 'iptm' in prediction_result else 'plddts'
@@ -348,6 +443,7 @@ def main(argv):
              should_be_set=not use_small_bfd)

  run_multimer_system = 'multimer' in FLAGS.model_preset
+  model_type = 'Multimer' if run_multimer_system else 'Monomer'
  _check_flag('pdb70_database_path', 'model_preset',
              should_be_set=not run_multimer_system)
  _check_flag('pdb_seqres_database_path', 'model_preset',
@@ -362,6 +458,7 @@ def main(argv):

  # Check for duplicate FASTA file names.
  fasta_names = [pathlib.Path(p).stem for p in FLAGS.fasta_paths]
+  
  if len(fasta_names) != len(set(fasta_names)):
    raise ValueError('All FASTA paths must have a unique basename.')

@@ -414,8 +511,7 @@ def main(argv):
    data_pipeline = monomer_data_pipeline

  model_runners = {}
-  # model_names = config.MODEL_PRESETS[FLAGS.model_preset]
-  model_names = FLAGS.model_names
+  model_names = config.MODEL_PRESETS[FLAGS.model_preset]
  for model_name in model_names:
    model_config = config.model_config(model_name)
    if run_multimer_system:
@@ -456,7 +552,9 @@ def main(argv):
        amber_relaxer=amber_relaxer,
        benchmark=FLAGS.benchmark,
        random_seed=random_seed,
-        models_to_relax=FLAGS.models_to_relax)
+        models_to_relax=FLAGS.models_to_relax,
+        model_type=model_type,
+    )


 if __name__ == '__main__':
@@ -464,7 +562,6 @@ if __name__ == '__main__':
      'fasta_paths',
      'output_dir',
      'data_dir',
-      'model_names',
      'uniref90_database_path',
      'mgnify_database_path',
      'template_mmcif_dir',

--- a/run_alphafold_test.py
+++ b/run_alphafold_test.py
@@ -24,6 +24,8 @@ import mock
 import numpy as np
 # Internal import (7716).

+TEST_DATA_DIR = 'alphafold/common/testdata/'
+

 class RunAlphafoldTest(parameterized.TestCase):

@@ -58,7 +60,18 @@ class RunAlphafoldTest(parameterized.TestCase):
        'max_predicted_aligned_error': np.array(0.),
    }
    model_runner_mock.multimer_mode = False
-    amber_relaxer_mock.process.return_value = ('RELAXED', None, [1., 0., 0.])
+
+    with open(
+        os.path.join(
+            absltest.get_default_test_srcdir(), TEST_DATA_DIR, 'glucagon.pdb'
+        )
+    ) as f:
+      pdb_string = f.read()
+    amber_relaxer_mock.process.return_value = (
+        pdb_string,
+        None,
+        [1.0, 0.0, 0.0],
+    )

    out_dir = self.create_tempdir().full_path
    fasta_path = os.path.join(out_dir, 'target.fasta')
@@ -76,7 +89,8 @@ class RunAlphafoldTest(parameterized.TestCase):
        benchmark=False,
        random_seed=0,
        models_to_relax=models_to_relax,
-        )
+        model_type='Monomer',
+    )

    base_output_files = os.listdir(out_dir)
    self.assertIn('target.fasta', base_output_files)
@@ -84,11 +98,22 @@ class RunAlphafoldTest(parameterized.TestCase):

    target_output_files = os.listdir(os.path.join(out_dir, 'test'))
    expected_files = [
-        'features.pkl', 'msas', 'ranked_0.pdb', 'ranking_debug.json',
-        'result_model1.pkl', 'timings.json', 'unrelaxed_model1.pdb',
+        'confidence_model1.json',
+        'features.pkl',
+        'msas',
+        'pae_model1.json',
+        'ranked_0.cif',
+        'ranked_0.pdb',
+        'ranking_debug.json',
+        'result_model1.pkl',
+        'timings.json',
+        'unrelaxed_model1.cif',
+        'unrelaxed_model1.pdb',
    ]
    if models_to_relax == run_alphafold.ModelsToRelax.ALL:
-      expected_files.extend(['relaxed_model1.pdb', 'relax_metrics.json'])
+      expected_files.extend(
+          ['relaxed_model1.cif', 'relaxed_model1.pdb', 'relax_metrics.json']
+      )
      with open(os.path.join(out_dir, 'test', 'relax_metrics.json')) as f:
        relax_metrics = json.loads(f.read())
      self.assertDictEqual({'model1': {'remaining_violations': [1.0, 0.0, 0.0],

--- a/run_monomer.sh
+++ b/run_monomer.sh
- download_dir=/data/alphafold2 
+ #!/bin/bash
+ 
+ download_dir=/home/chuangkj/alphafold2_jax/downloads
 python3 run_alphafold.py \
- --fasta_paths=monomer.fasta \
+ --fasta_paths=rcsb_pdb_8U23.fasta \
 --output_dir=./ \
 --use_precomputed_msas=false \
 --data_dir=$download_dir  \
- --model_names="model_1" \
 --uniref90_database_path=$download_dir/uniref90/uniref90.fasta \
- --mgnify_database_path=$download_dir/mgnify/mgy_clusters_2022_05.fa \
+ --mgnify_database_path=$download_dir/mgnify/mgy_clusters_2018_12.fa \
 --bfd_database_path=$download_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
- --uniref30_database_path=$download_dir/uniref30/UniRef30_2021_03 \
+ --uniref30_database_path=$download_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
 --pdb70_database_path=$download_dir/pdb70/pdb70 \
 --template_mmcif_dir=$download_dir/pdb_mmcif/mmcif_files \
 --obsolete_pdbs_path=$download_dir/pdb_mmcif/obsolete.dat \
- --max_template_date=2020-05-14 \
+ --max_template_date=2024-05-14 \
 --model_preset=monomer \
 --db_preset=full_dbs \
 --models_to_relax=best \

--- a/run_multimer.sh
+++ b/run_multimer.sh
-download_dir=/data/alphafold2 
-python3 run_alphafold.py \
+ #!/bin/bash
+ 
+ download_dir=/home/chuangkj/alphafold2_jax/downloads
+ python3 run_alphafold.py \
 --fasta_paths=multimer.fasta \
 --output_dir=./ \
 --use_precomputed_msas=false \
- --num_multimer_predictions_per_model=1 \
 --data_dir=$download_dir  \
- --model_names="model_1_multimer_v3" \
 --uniref90_database_path=$download_dir/uniref90/uniref90.fasta \
- --mgnify_database_path=$download_dir/mgnify/mgy_clusters_2022_05.fa \
+ --mgnify_database_path=$download_dir/mgnify/mgy_clusters_2018_12.fa \
 --bfd_database_path=$download_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
- --uniref30_database_path=$download_dir/uniref30/UniRef30_2021_03 \
- --uniprot_database_path=$download_dir/uniprot/uniprot.fasta \
+ --uniref30_database_path=$download_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+ --uniprot_database_path=$download_dir/uniprot/uniprot_trembl.fasta \
 --pdb_seqres_database_path=$download_dir/pdb_seqres/pdb_seqres.txt \
 --template_mmcif_dir=$download_dir/pdb_mmcif/mmcif_files \
 --obsolete_pdbs_path=$download_dir/pdb_mmcif/obsolete.dat \
- --max_template_date=2020-05-14 \
+ --max_template_date=2024-05-14 \
 --model_preset=multimer \
 --db_preset=full_dbs \
 --models_to_relax=best \
 --use_gpu_relax=false \
 --benchmark=true

+
+
+
+
+
+ 
+ 
+ 
+
--- a/scripts/download_mgnify.sh
+++ b/scripts/download_mgnify.sh
@@ -32,12 +32,12 @@ fi
 DOWNLOAD_DIR="$1"
 ROOT_DIR="${DOWNLOAD_DIR}/mgnify"
 # Mirror of:
-# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2022_05/mgy_clusters.fa.gz
+# https://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2022_05/mgy_clusters.fa.gz
 SOURCE_URL="https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz"
 BASENAME=$(basename "${SOURCE_URL}")

 mkdir --parents "${ROOT_DIR}"
 aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
-# pushd "${ROOT_DIR}"
+pushd "${ROOT_DIR}"
 gunzip "${ROOT_DIR}/${BASENAME}"
-# popd
+popd
--- a/scripts/download_pdb_mmcif.sh
+++ b/scripts/download_pdb_mmcif.sh
@@ -45,24 +45,9 @@ echo "  * rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ (Eur
 echo "  * ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ (Asia)"
 echo "or see https://www.wwpdb.org/ftp/pdb-ftp-sites for more download options."
 mkdir --parents "${RAW_DIR}"
-# rsync --recursive --links --perms --times --compress --info=progress2 --delete --port=33444 \
-#   rsync.rcsb.org::ftp_data/structures/divided/mmCIF/ \
-#   "${RAW_DIR}"
-
-# (Asia)
-rsync -rlpt -v -z --info=progress2 --delete \
-    ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ \
-    "${RAW_DIR}"
-
-# (Europe)
-# rsync -rlpt -v -z --info=progress2 --delete \
-#   rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ \
-#   "${RAW_DIR}"
-
-# fast
-# rsync --recursive --links --perms --times --compress --info=progress2 --delete \
-# data.pdbj.org::ftp_data/structures/divided/mmCIF/ "${RAW_DIR}"
-
+rsync --recursive --links --perms --times --compress --info=progress2 --delete --port=33444 \
+  rsync.rcsb.org::ftp_data/structures/divided/mmCIF/ \
+  "${RAW_DIR}"

 echo "Unzipping all mmCIF files..."
 find "${RAW_DIR}/" -type f -iname "*.gz" -exec gunzip {} +
@@ -77,4 +62,4 @@ done
 # Delete empty download directory structure.
 find "${RAW_DIR}" -type d -empty -delete

-aria2c "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" --dir="${ROOT_DIR}"
+aria2c "https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat" --dir="${ROOT_DIR}"
--- a/scripts/download_pdb_seqres.sh
+++ b/scripts/download_pdb_seqres.sh
@@ -31,7 +31,7 @@ fi

 DOWNLOAD_DIR="$1"
 ROOT_DIR="${DOWNLOAD_DIR}/pdb_seqres"
-SOURCE_URL="ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt"
+SOURCE_URL="https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt"
 BASENAME=$(basename "${SOURCE_URL}")

 mkdir --parents "${ROOT_DIR}"

--- a/scripts/download_small_bfd.sh
+++ b/scripts/download_small_bfd.sh
@@ -36,6 +36,6 @@ BASENAME=$(basename "${SOURCE_URL}")

 mkdir --parents "${ROOT_DIR}"
 aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
-# pushd "${ROOT_DIR}"
+pushd "${ROOT_DIR}"
 gunzip "${ROOT_DIR}/${BASENAME}"
-# popd
+popd