Initial commit

92f1932e · Gustaf Ahdritz · 3d9c2de3 · 92f1932e · 92f1932e · 92f1932e
Commit 92f1932e authored Sep 17, 2021 by Gustaf Ahdritz
20 changed files
--- a/alphafold/np/relax/cleanup.py
+++ b/alphafold/np/relax/cleanup.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Cleans up a PDB file using pdbfixer in preparation for OpenMM simulations.
+fix_pdb uses a third-party tool. We also support fixing some additional edge
+cases like removing chains of length one (see clean_structure).
+"""
+import io
+import pdbfixer
+from simtk.openmm import app
+from simtk.openmm.app import element
+def fix_pdb(pdbfile, alterations_info):
+  """Apply pdbfixer to the contents of a PDB file; return a PDB string result.
+  1) Replaces nonstandard residues.
+  2) Removes heterogens (non protein residues) including water.
+  3) Adds missing residues and missing atoms within existing residues.
+  4) Adds hydrogens assuming pH=7.0.
+  5) KeepIds is currently true, so the fixer must keep the existing chain and
+     residue identifiers. This will fail for some files in wider PDB that have
+     invalid IDs.
+  Args:
+    pdbfile: Input PDB file handle.
+    alterations_info: A dict that will store details of changes made.
+  Returns:
+    A PDB string representing the fixed structure.
+  """
+  fixer = pdbfixer.PDBFixer(pdbfile=pdbfile)
+  fixer.findNonstandardResidues()
+  alterations_info['nonstandard_residues'] = fixer.nonstandardResidues
+  fixer.replaceNonstandardResidues()
+  _remove_heterogens(fixer, alterations_info, keep_water=False)
+  fixer.findMissingResidues()
+  alterations_info['missing_residues'] = fixer.missingResidues
+  fixer.findMissingAtoms()
+  alterations_info['missing_heavy_atoms'] = fixer.missingAtoms
+  alterations_info['missing_terminals'] = fixer.missingTerminals
+  fixer.addMissingAtoms(seed=0)
+  fixer.addMissingHydrogens()
+  out_handle = io.StringIO()
+  app.PDBFile.writeFile(fixer.topology, fixer.positions, out_handle,
+                        keepIds=True)
+  return out_handle.getvalue()
+def clean_structure(pdb_structure, alterations_info):
+  """Applies additional fixes to an OpenMM structure, to handle edge cases.
+  Args:
+    pdb_structure: An OpenMM structure to modify and fix.
+    alterations_info: A dict that will store details of changes made.
+  """
+  _replace_met_se(pdb_structure, alterations_info)
+  _remove_chains_of_length_one(pdb_structure, alterations_info)
+def _remove_heterogens(fixer, alterations_info, keep_water):
+  """Removes the residues that Pdbfixer considers to be heterogens.
+  Args:
+    fixer: A Pdbfixer instance.
+    alterations_info: A dict that will store details of changes made.
+    keep_water: If True, water (HOH) is not considered to be a heterogen.
+  """
+  initial_resnames = set()
+  for chain in fixer.topology.chains():
+    for residue in chain.residues():
+      initial_resnames.add(residue.name)
+  fixer.removeHeterogens(keepWater=keep_water)
+  final_resnames = set()
+  for chain in fixer.topology.chains():
+    for residue in chain.residues():
+      final_resnames.add(residue.name)
+  alterations_info['removed_heterogens'] = (
+      initial_resnames.difference(final_resnames))
+def _replace_met_se(pdb_structure, alterations_info):
+  """Replace the Se in any MET residues that were not marked as modified."""
+  modified_met_residues = []
+  for res in pdb_structure.iter_residues():
+    name = res.get_name_with_spaces().strip()
+    if name == 'MET':
+      s_atom = res.get_atom('SD')
+      if s_atom.element_symbol == 'Se':
+        s_atom.element_symbol = 'S'
+        s_atom.element = element.get_by_symbol('S')
+        modified_met_residues.append(s_atom.residue_number)
+  alterations_info['Se_in_MET'] = modified_met_residues
+def _remove_chains_of_length_one(pdb_structure, alterations_info):
+  """Removes chains that correspond to a single amino acid.
+  A single amino acid in a chain is both N and C terminus. There is no force
+  template for this case.
+  Args:
+    pdb_structure: An OpenMM pdb_structure to modify and fix.
+    alterations_info: A dict that will store details of changes made.
+  """
+  removed_chains = {}
+  for model in pdb_structure.iter_models():
+    valid_chains = [c for c in model.iter_chains() if len(c) > 1]
+    invalid_chain_ids = [c.chain_id for c in model.iter_chains() if len(c) <= 1]
+    model.chains = valid_chains
+    for chain_id in invalid_chain_ids:
+      model.chains_by_id.pop(chain_id)
+    removed_chains[model.number] = invalid_chain_ids
+  alterations_info['removed_chains'] = removed_chains
--- a/alphafold/np/relax/relax.py
+++ b/alphafold/np/relax/relax.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Amber relaxation."""
+from typing import Any, Dict, Sequence, Tuple
+from alphafold.np import protein
+from alphafold.np.relax import amber_minimize, utils
+import numpy as np
+class AmberRelaxation(object):
+  """Amber relaxation."""
+  def __init__(self,
+               *,
+               max_iterations: int,
+               tolerance: float,
+               stiffness: float,
+               exclude_residues: Sequence[int],
+               max_outer_iterations: int):
+    """Initialize Amber Relaxer.
+    Args:
+      max_iterations: Maximum number of L-BFGS iterations. 0 means no max.
+      tolerance: kcal/mol, the energy tolerance of L-BFGS.
+      stiffness: kcal/mol A**2, spring constant of heavy atom restraining
+        potential.
+      exclude_residues: Residues to exclude from per-atom restraining.
+        Zero-indexed.
+      max_outer_iterations: Maximum number of violation-informed relax
+       iterations. A value of 1 will run the non-iterative procedure used in
+       CASP14. Use 20 so that >95% of the bad cases are relaxed. Relax finishes
+       as soon as there are no violations, hence in most cases this causes no
+       slowdown. In the worst case we do 20 outer iterations.
+    """
+    self._max_iterations = max_iterations
+    self._tolerance = tolerance
+    self._stiffness = stiffness
+    self._exclude_residues = exclude_residues
+    self._max_outer_iterations = max_outer_iterations
+  def process(self, *,
+              prot: protein.Protein) -> Tuple[str, Dict[str, Any], np.ndarray]:
+    """Runs Amber relax on a prediction, adds hydrogens, returns PDB string."""
+    out = amber_minimize.run_pipeline(
+        prot=prot, max_iterations=self._max_iterations,
+        tolerance=self._tolerance, stiffness=self._stiffness,
+        exclude_residues=self._exclude_residues,
+        max_outer_iterations=self._max_outer_iterations)
+    min_pos = out['pos']
+    start_pos = out['posinit']
+    rmsd = np.sqrt(np.sum((start_pos - min_pos)**2) / start_pos.shape[0])
+    debug_data = {
+        'initial_energy': out['einit'],
+        'final_energy': out['efinal'],
+        'attempts': out['min_attempts'],
+        'rmsd': rmsd
+    }
+    pdb_str = amber_minimize.clean_protein(prot)
+    min_pdb = utils.overwrite_pdb_coordinates(pdb_str, min_pos)
+    min_pdb = utils.overwrite_b_factors(min_pdb, prot.b_factors)
+    utils.assert_equal_nonterminal_atom_types(
+        protein.from_pdb_string(min_pdb).atom_mask,
+        prot.atom_mask)
+    violations = out['structural_violations'][
+        'total_per_residue_violations_mask']
+    return min_pdb, debug_data, violations
--- a/alphafold/np/relax/utils.py
+++ b/alphafold/np/relax/utils.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utils for minimization."""
+import io
+from alphafold.np import residue_constants
+from Bio import PDB
+import numpy as np
+from simtk.openmm import app as openmm_app
+from simtk.openmm.app.internal.pdbstructure import PdbStructure
+def overwrite_pdb_coordinates(pdb_str: str, pos) -> str:
+  pdb_file = io.StringIO(pdb_str)
+  structure = PdbStructure(pdb_file)
+  topology = openmm_app.PDBFile(structure).getTopology()
+  with io.StringIO() as f:
+    openmm_app.PDBFile.writeFile(topology, pos, f)
+    return f.getvalue()
+def overwrite_b_factors(pdb_str: str, bfactors: np.ndarray) -> str:
+  """Overwrites the B-factors in pdb_str with contents of bfactors array.
+  Args:
+    pdb_str: An input PDB string.
+    bfactors: A numpy array with shape [1, n_residues, 37]. We assume that the
+      B-factors are per residue; i.e. that the nonzero entries are identical in
+      [0, i, :].
+  Returns:
+    A new PDB string with the B-factors replaced.
+  """
+  if bfactors.shape[-1] != residue_constants.atom_type_num:
+    raise ValueError(
+        f'Invalid final dimension size for bfactors: {bfactors.shape[-1]}.')
+  parser = PDB.PDBParser(QUIET=True)
+  handle = io.StringIO(pdb_str)
+  structure = parser.get_structure('', handle)
+  curr_resid = ('', '', '')
+  idx = -1
+  for atom in structure.get_atoms():
+    atom_resid = atom.parent.get_id()
+    if atom_resid != curr_resid:
+      idx += 1
+      if idx >= bfactors.shape[0]:
+        raise ValueError('Index into bfactors exceeds number of residues. '
+                         'B-factors shape: {shape}, idx: {idx}.')
+    curr_resid = atom_resid
+    atom.bfactor = bfactors[idx, residue_constants.atom_order['CA']]
+  new_pdb = io.StringIO()
+  pdb_io = PDB.PDBIO()
+  pdb_io.set_structure(structure)
+  pdb_io.save(new_pdb)
+  return new_pdb.getvalue()
+def assert_equal_nonterminal_atom_types(
+    atom_mask: np.ndarray, ref_atom_mask: np.ndarray):
+  """Checks that pre- and post-minimized proteins have same atom set."""
+  # Ignore any terminal OXT atoms which may have been added by minimization.
+  oxt = residue_constants.atom_order['OXT']
+  no_oxt_mask = np.ones(shape=atom_mask.shape, dtype=np.bool)
+  no_oxt_mask[..., oxt] = False
+  np.testing.assert_almost_equal(ref_atom_mask[no_oxt_mask],
+                                 atom_mask[no_oxt_mask])
--- a/alphafold/np/residue_constants.py
+++ b/alphafold/np/residue_constants.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Constants used in AlphaFold."""
+import collections
+import functools
+from typing import Mapping, List, Tuple
+import numpy as np
+import tree
+# Internal import (35fd).
+# Distance from one CA to next CA [trans configuration: omega = 180].
+ca_ca = 3.80209737096
+# Format: The list for each AA type contains chi1, chi2, chi3, chi4 in
+# this order (or a relevant subset from chi1 onwards). ALA and GLY don't have
+# chi angles so their chi angle lists are empty.
+chi_angles_atoms = {
+    'ALA': [],
+    # Chi5 in arginine is always 0 +- 5 degrees, so ignore it.
+    'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
+            ['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']],
+    'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
+    'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
+    'CYS': [['N', 'CA', 'CB', 'SG']],
+    'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
+            ['CB', 'CG', 'CD', 'OE1']],
+    'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
+            ['CB', 'CG', 'CD', 'OE1']],
+    'GLY': [],
+    'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']],
+    'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']],
+    'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
+    'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
+            ['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']],
+    'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'],
+            ['CB', 'CG', 'SD', 'CE']],
+    'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
+    'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']],
+    'SER': [['N', 'CA', 'CB', 'OG']],
+    'THR': [['N', 'CA', 'CB', 'OG1']],
+    'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
+    'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
+    'VAL': [['N', 'CA', 'CB', 'CG1']],
+}
+# If chi angles given in fixed-length array, this matrix determines how to mask
+# them for each AA type. The order is as per restype_order (see below).
+chi_angles_mask = [
+    [0.0, 0.0, 0.0, 0.0],  # ALA
+    [1.0, 1.0, 1.0, 1.0],  # ARG
+    [1.0, 1.0, 0.0, 0.0],  # ASN
+    [1.0, 1.0, 0.0, 0.0],  # ASP
+    [1.0, 0.0, 0.0, 0.0],  # CYS
+    [1.0, 1.0, 1.0, 0.0],  # GLN
+    [1.0, 1.0, 1.0, 0.0],  # GLU
+    [0.0, 0.0, 0.0, 0.0],  # GLY
+    [1.0, 1.0, 0.0, 0.0],  # HIS
+    [1.0, 1.0, 0.0, 0.0],  # ILE
+    [1.0, 1.0, 0.0, 0.0],  # LEU
+    [1.0, 1.0, 1.0, 1.0],  # LYS
+    [1.0, 1.0, 1.0, 0.0],  # MET
+    [1.0, 1.0, 0.0, 0.0],  # PHE
+    [1.0, 1.0, 0.0, 0.0],  # PRO
+    [1.0, 0.0, 0.0, 0.0],  # SER
+    [1.0, 0.0, 0.0, 0.0],  # THR
+    [1.0, 1.0, 0.0, 0.0],  # TRP
+    [1.0, 1.0, 0.0, 0.0],  # TYR
+    [1.0, 0.0, 0.0, 0.0],  # VAL
+]
+# The following chi angles are pi periodic: they can be rotated by a multiple
+# of pi without affecting the structure.
+chi_pi_periodic = [
+    [0.0, 0.0, 0.0, 0.0],  # ALA
+    [0.0, 0.0, 0.0, 0.0],  # ARG
+    [0.0, 0.0, 0.0, 0.0],  # ASN
+    [0.0, 1.0, 0.0, 0.0],  # ASP
+    [0.0, 0.0, 0.0, 0.0],  # CYS
+    [0.0, 0.0, 0.0, 0.0],  # GLN
+    [0.0, 0.0, 1.0, 0.0],  # GLU
+    [0.0, 0.0, 0.0, 0.0],  # GLY
+    [0.0, 0.0, 0.0, 0.0],  # HIS
+    [0.0, 0.0, 0.0, 0.0],  # ILE
+    [0.0, 0.0, 0.0, 0.0],  # LEU
+    [0.0, 0.0, 0.0, 0.0],  # LYS
+    [0.0, 0.0, 0.0, 0.0],  # MET
+    [0.0, 1.0, 0.0, 0.0],  # PHE
+    [0.0, 0.0, 0.0, 0.0],  # PRO
+    [0.0, 0.0, 0.0, 0.0],  # SER
+    [0.0, 0.0, 0.0, 0.0],  # THR
+    [0.0, 0.0, 0.0, 0.0],  # TRP
+    [0.0, 1.0, 0.0, 0.0],  # TYR
+    [0.0, 0.0, 0.0, 0.0],  # VAL
+    [0.0, 0.0, 0.0, 0.0],  # UNK
+]
+# Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi,
+# psi and chi angles:
+# 0: 'backbone group',
+# 1: 'pre-omega-group', (empty)
+# 2: 'phi-group', (currently empty, because it defines only hydrogens)
+# 3: 'psi-group',
+# 4,5,6,7: 'chi1,2,3,4-group'
+# The atom positions are relative to the axis-end-atom of the corresponding
+# rotation axis. The x-axis is in direction of the rotation axis, and the y-axis
+# is defined such that the dihedral-angle-definiting atom (the last entry in
+# chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate).
+# format: [atomname, group_idx, rel_position]
+rigid_group_atom_positions = {
+    'ALA': [
+        ['N', 0, (-0.525, 1.363, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.526, -0.000, -0.000)],
+        ['CB', 0, (-0.529, -0.774, -1.205)],
+        ['O', 3, (0.627, 1.062, 0.000)],
+    ],
+    'ARG': [
+        ['N', 0, (-0.524, 1.362, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.525, -0.000, -0.000)],
+        ['CB', 0, (-0.524, -0.778, -1.209)],
+        ['O', 3, (0.626, 1.062, 0.000)],
+        ['CG', 4, (0.616, 1.390, -0.000)],
+        ['CD', 5, (0.564, 1.414, 0.000)],
+        ['NE', 6, (0.539, 1.357, -0.000)],
+        ['NH1', 7, (0.206, 2.301, 0.000)],
+        ['NH2', 7, (2.078, 0.978, -0.000)],
+        ['CZ', 7, (0.758, 1.093, -0.000)],
+    ],
+    'ASN': [
+        ['N', 0, (-0.536, 1.357, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.526, -0.000, -0.000)],
+        ['CB', 0, (-0.531, -0.787, -1.200)],
+        ['O', 3, (0.625, 1.062, 0.000)],
+        ['CG', 4, (0.584, 1.399, 0.000)],
+        ['ND2', 5, (0.593, -1.188, 0.001)],
+        ['OD1', 5, (0.633, 1.059, 0.000)],
+    ],
+    'ASP': [
+        ['N', 0, (-0.525, 1.362, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.527, 0.000, -0.000)],
+        ['CB', 0, (-0.526, -0.778, -1.208)],
+        ['O', 3, (0.626, 1.062, -0.000)],
+        ['CG', 4, (0.593, 1.398, -0.000)],
+        ['OD1', 5, (0.610, 1.091, 0.000)],
+        ['OD2', 5, (0.592, -1.101, -0.003)],
+    ],
+    'CYS': [
+        ['N', 0, (-0.522, 1.362, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.524, 0.000, 0.000)],
+        ['CB', 0, (-0.519, -0.773, -1.212)],
+        ['O', 3, (0.625, 1.062, -0.000)],
+        ['SG', 4, (0.728, 1.653, 0.000)],
+    ],
+    'GLN': [
+        ['N', 0, (-0.526, 1.361, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.526, 0.000, 0.000)],
+        ['CB', 0, (-0.525, -0.779, -1.207)],
+        ['O', 3, (0.626, 1.062, -0.000)],
+        ['CG', 4, (0.615, 1.393, 0.000)],
+        ['CD', 5, (0.587, 1.399, -0.000)],
+        ['NE2', 6, (0.593, -1.189, -0.001)],
+        ['OE1', 6, (0.634, 1.060, 0.000)],
+    ],
+    'GLU': [
+        ['N', 0, (-0.528, 1.361, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.526, -0.000, -0.000)],
+        ['CB', 0, (-0.526, -0.781, -1.207)],
+        ['O', 3, (0.626, 1.062, 0.000)],
+        ['CG', 4, (0.615, 1.392, 0.000)],
+        ['CD', 5, (0.600, 1.397, 0.000)],
+        ['OE1', 6, (0.607, 1.095, -0.000)],
+        ['OE2', 6, (0.589, -1.104, -0.001)],
+    ],
+    'GLY': [
+        ['N', 0, (-0.572, 1.337, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.517, -0.000, -0.000)],
+        ['O', 3, (0.626, 1.062, -0.000)],
+    ],
+    'HIS': [
+        ['N', 0, (-0.527, 1.360, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.525, 0.000, 0.000)],
+        ['CB', 0, (-0.525, -0.778, -1.208)],
+        ['O', 3, (0.625, 1.063, 0.000)],
+        ['CG', 4, (0.600, 1.370, -0.000)],
+        ['CD2', 5, (0.889, -1.021, 0.003)],
+        ['ND1', 5, (0.744, 1.160, -0.000)],
+        ['CE1', 5, (2.030, 0.851, 0.002)],
+        ['NE2', 5, (2.145, -0.466, 0.004)],
+    ],
+    'ILE': [
+        ['N', 0, (-0.493, 1.373, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.527, -0.000, -0.000)],
+        ['CB', 0, (-0.536, -0.793, -1.213)],
+        ['O', 3, (0.627, 1.062, -0.000)],
+        ['CG1', 4, (0.534, 1.437, -0.000)],
+        ['CG2', 4, (0.540, -0.785, -1.199)],
+        ['CD1', 5, (0.619, 1.391, 0.000)],
+    ],
+    'LEU': [
+        ['N', 0, (-0.520, 1.363, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.525, -0.000, -0.000)],
+        ['CB', 0, (-0.522, -0.773, -1.214)],
+        ['O', 3, (0.625, 1.063, -0.000)],
+        ['CG', 4, (0.678, 1.371, 0.000)],
+        ['CD1', 5, (0.530, 1.430, -0.000)],
+        ['CD2', 5, (0.535, -0.774, 1.200)],
+    ],
+    'LYS': [
+        ['N', 0, (-0.526, 1.362, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.526, 0.000, 0.000)],
+        ['CB', 0, (-0.524, -0.778, -1.208)],
+        ['O', 3, (0.626, 1.062, -0.000)],
+        ['CG', 4, (0.619, 1.390, 0.000)],
+        ['CD', 5, (0.559, 1.417, 0.000)],
+        ['CE', 6, (0.560, 1.416, 0.000)],
+        ['NZ', 7, (0.554, 1.387, 0.000)],
+    ],
+    'MET': [
+        ['N', 0, (-0.521, 1.364, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.525, 0.000, 0.000)],
+        ['CB', 0, (-0.523, -0.776, -1.210)],
+        ['O', 3, (0.625, 1.062, -0.000)],
+        ['CG', 4, (0.613, 1.391, -0.000)],
+        ['SD', 5, (0.703, 1.695, 0.000)],
+        ['CE', 6, (0.320, 1.786, -0.000)],
+    ],
+    'PHE': [
+        ['N', 0, (-0.518, 1.363, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.524, 0.000, -0.000)],
+        ['CB', 0, (-0.525, -0.776, -1.212)],
+        ['O', 3, (0.626, 1.062, -0.000)],
+        ['CG', 4, (0.607, 1.377, 0.000)],
+        ['CD1', 5, (0.709, 1.195, -0.000)],
+        ['CD2', 5, (0.706, -1.196, 0.000)],
+        ['CE1', 5, (2.102, 1.198, -0.000)],
+        ['CE2', 5, (2.098, -1.201, -0.000)],
+        ['CZ', 5, (2.794, -0.003, -0.001)],
+    ],
+    'PRO': [
+        ['N', 0, (-0.566, 1.351, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.527, -0.000, 0.000)],
+        ['CB', 0, (-0.546, -0.611, -1.293)],
+        ['O', 3, (0.621, 1.066, 0.000)],
+        ['CG', 4, (0.382, 1.445, 0.0)],
+        # ['CD', 5, (0.427, 1.440, 0.0)],
+        ['CD', 5, (0.477, 1.424, 0.0)],  # manually made angle 2 degrees larger
+    ],
+    'SER': [
+        ['N', 0, (-0.529, 1.360, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.525, -0.000, -0.000)],
+        ['CB', 0, (-0.518, -0.777, -1.211)],
+        ['O', 3, (0.626, 1.062, -0.000)],
+        ['OG', 4, (0.503, 1.325, 0.000)],
+    ],
+    'THR': [
+        ['N', 0, (-0.517, 1.364, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.526, 0.000, -0.000)],
+        ['CB', 0, (-0.516, -0.793, -1.215)],
+        ['O', 3, (0.626, 1.062, 0.000)],
+        ['CG2', 4, (0.550, -0.718, -1.228)],
+        ['OG1', 4, (0.472, 1.353, 0.000)],
+    ],
+    'TRP': [
+        ['N', 0, (-0.521, 1.363, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.525, -0.000, 0.000)],
+        ['CB', 0, (-0.523, -0.776, -1.212)],
+        ['O', 3, (0.627, 1.062, 0.000)],
+        ['CG', 4, (0.609, 1.370, -0.000)],
+        ['CD1', 5, (0.824, 1.091, 0.000)],
+        ['CD2', 5, (0.854, -1.148, -0.005)],
+        ['CE2', 5, (2.186, -0.678, -0.007)],
+        ['CE3', 5, (0.622, -2.530, -0.007)],
+        ['NE1', 5, (2.140, 0.690, -0.004)],
+        ['CH2', 5, (3.028, -2.890, -0.013)],
+        ['CZ2', 5, (3.283, -1.543, -0.011)],
+        ['CZ3', 5, (1.715, -3.389, -0.011)],
+    ],
+    'TYR': [
+        ['N', 0, (-0.522, 1.362, 0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.524, -0.000, -0.000)],
+        ['CB', 0, (-0.522, -0.776, -1.213)],
+        ['O', 3, (0.627, 1.062, -0.000)],
+        ['CG', 4, (0.607, 1.382, -0.000)],
+        ['CD1', 5, (0.716, 1.195, -0.000)],
+        ['CD2', 5, (0.713, -1.194, -0.001)],
+        ['CE1', 5, (2.107, 1.200, -0.002)],
+        ['CE2', 5, (2.104, -1.201, -0.003)],
+        ['OH', 5, (4.168, -0.002, -0.005)],
+        ['CZ', 5, (2.791, -0.001, -0.003)],
+    ],
+    'VAL': [
+        ['N', 0, (-0.494, 1.373, -0.000)],
+        ['CA', 0, (0.000, 0.000, 0.000)],
+        ['C', 0, (1.527, -0.000, -0.000)],
+        ['CB', 0, (-0.533, -0.795, -1.213)],
+        ['O', 3, (0.627, 1.062, -0.000)],
+        ['CG1', 4, (0.540, 1.429, -0.000)],
+        ['CG2', 4, (0.533, -0.776, 1.203)],
+    ],
+}
+# A list of atoms (excluding hydrogen) for each AA type. PDB naming convention.
+residue_atoms = {
+    'ALA': ['C', 'CA', 'CB', 'N', 'O'],
+    'ARG': ['C', 'CA', 'CB', 'CG', 'CD', 'CZ', 'N', 'NE', 'O', 'NH1', 'NH2'],
+    'ASP': ['C', 'CA', 'CB', 'CG', 'N', 'O', 'OD1', 'OD2'],
+    'ASN': ['C', 'CA', 'CB', 'CG', 'N', 'ND2', 'O', 'OD1'],
+    'CYS': ['C', 'CA', 'CB', 'N', 'O', 'SG'],
+    'GLU': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O', 'OE1', 'OE2'],
+    'GLN': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'NE2', 'O', 'OE1'],
+    'GLY': ['C', 'CA', 'N', 'O'],
+    'HIS': ['C', 'CA', 'CB', 'CG', 'CD2', 'CE1', 'N', 'ND1', 'NE2', 'O'],
+    'ILE': ['C', 'CA', 'CB', 'CG1', 'CG2', 'CD1', 'N', 'O'],
+    'LEU': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'N', 'O'],
+    'LYS': ['C', 'CA', 'CB', 'CG', 'CD', 'CE', 'N', 'NZ', 'O'],
+    'MET': ['C', 'CA', 'CB', 'CG', 'CE', 'N', 'O', 'SD'],
+    'PHE': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O'],
+    'PRO': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O'],
+    'SER': ['C', 'CA', 'CB', 'N', 'O', 'OG'],
+    'THR': ['C', 'CA', 'CB', 'CG2', 'N', 'O', 'OG1'],
+    'TRP': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE2', 'CE3', 'CZ2', 'CZ3',
+            'CH2', 'N', 'NE1', 'O'],
+    'TYR': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O',
+            'OH'],
+    'VAL': ['C', 'CA', 'CB', 'CG1', 'CG2', 'N', 'O']
+}
+# Naming swaps for ambiguous atom names.
+# Due to symmetries in the amino acids the naming of atoms is ambiguous in
+# 4 of the 20 amino acids.
+# (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities
+# in LEU, VAL and ARG can be resolved by using the 3d constellations of
+# the 'ambiguous' atoms and their neighbours)
+residue_atom_renaming_swaps = {
+    'ASP': {'OD1': 'OD2'},
+    'GLU': {'OE1': 'OE2'},
+    'PHE': {'CD1': 'CD2', 'CE1': 'CE2'},
+    'TYR': {'CD1': 'CD2', 'CE1': 'CE2'},
+}
+# Van der Waals radii [Angstroem] of the atoms (from Wikipedia)
+van_der_waals_radius = {
+    'C': 1.7,
+    'N': 1.55,
+    'O': 1.52,
+    'S': 1.8,
+}
+Bond = collections.namedtuple(
+    'Bond', ['atom1_name', 'atom2_name', 'length', 'stddev'])
+BondAngle = collections.namedtuple(
+    'BondAngle',
+    ['atom1_name', 'atom2_name', 'atom3name', 'angle_rad', 'stddev'])
+@functools.lru_cache(maxsize=None)
+def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]],
+                                          Mapping[str, List[Bond]],
+                                          Mapping[str, List[BondAngle]]]:
+  """Load stereo_chemical_props.txt into a nice structure.
+  Load literature values for bond lengths and bond angles and translate
+  bond angles into the length of the opposite edge of the triangle
+  ("residue_virtual_bonds").
+  Returns:
+    residue_bonds:  dict that maps resname --> list of Bond tuples
+    residue_virtual_bonds: dict that maps resname --> list of Bond tuples
+    residue_bond_angles: dict that maps resname --> list of BondAngle tuples
+  """
+  # TODO: this file should be downloaded in a setup script
+  stereo_chemical_props_path = (
+      'alphafold/resources/stereo_chemical_props.txt')
+  with open(stereo_chemical_props_path, 'rt') as f:
+    stereo_chemical_props = f.read()
+  lines_iter = iter(stereo_chemical_props.splitlines())
+  # Load bond lengths.
+  residue_bonds = {}
+  next(lines_iter)  # Skip header line.
+  for line in lines_iter:
+    if line.strip() == '-':
+      break
+    bond, resname, length, stddev = line.split()
+    atom1, atom2 = bond.split('-')
+    if resname not in residue_bonds:
+      residue_bonds[resname] = []
+    residue_bonds[resname].append(
+        Bond(atom1, atom2, float(length), float(stddev)))
+  residue_bonds['UNK'] = []
+  # Load bond angles.
+  residue_bond_angles = {}
+  next(lines_iter)  # Skip empty line.
+  next(lines_iter)  # Skip header line.
+  for line in lines_iter:
+    if line.strip() == '-':
+      break
+    bond, resname, angle_degree, stddev_degree = line.split()
+    atom1, atom2, atom3 = bond.split('-')
+    if resname not in residue_bond_angles:
+      residue_bond_angles[resname] = []
+    residue_bond_angles[resname].append(
+        BondAngle(atom1, atom2, atom3,
+                  float(angle_degree) / 180. * np.pi,
+                  float(stddev_degree) / 180. * np.pi))
+  residue_bond_angles['UNK'] = []
+  def make_bond_key(atom1_name, atom2_name):
+    """Unique key to lookup bonds."""
+    return '-'.join(sorted([atom1_name, atom2_name]))
+  # Translate bond angles into distances ("virtual bonds").
+  residue_virtual_bonds = {}
+  for resname, bond_angles in residue_bond_angles.items():
+    # Create a fast lookup dict for bond lengths.
+    bond_cache = {}
+    for b in residue_bonds[resname]:
+      bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b
+    residue_virtual_bonds[resname] = []
+    for ba in bond_angles:
+      bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)]
+      bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)]
+      # Compute distance between atom1 and atom3 using the law of cosines
+      # c^2 = a^2 + b^2 - 2ab*cos(gamma).
+      gamma = ba.angle_rad
+      length = np.sqrt(bond1.length**2 + bond2.length**2
+                       - 2 * bond1.length * bond2.length * np.cos(gamma))
+      # Propagation of uncertainty assuming uncorrelated errors.
+      dl_outer = 0.5 / length
+      dl_dgamma = (2 * bond1.length * bond2.length * np.sin(gamma)) * dl_outer
+      dl_db1 = (2 * bond1.length - 2 * bond2.length * np.cos(gamma)) * dl_outer
+      dl_db2 = (2 * bond2.length - 2 * bond1.length * np.cos(gamma)) * dl_outer
+      stddev = np.sqrt((dl_dgamma * ba.stddev)**2 +
+                       (dl_db1 * bond1.stddev)**2 +
+                       (dl_db2 * bond2.stddev)**2)
+      residue_virtual_bonds[resname].append(
+          Bond(ba.atom1_name, ba.atom3name, length, stddev))
+  return (residue_bonds,
+          residue_virtual_bonds,
+          residue_bond_angles)
+# Between-residue bond lengths for general bonds (first element) and for Proline
+# (second element).
+between_res_bond_length_c_n = [1.329, 1.341]
+between_res_bond_length_stddev_c_n = [0.014, 0.016]
+# Between-residue cos_angles.
+between_res_cos_angles_c_n_ca = [-0.5203, 0.0353]  # degrees: 121.352 +- 2.315
+between_res_cos_angles_ca_c_n = [-0.4473, 0.0311]  # degrees: 116.568 +- 1.995
+# This mapping is used when we need to store atom data in a format that requires
+# fixed atom data size for every residue (e.g. a numpy array).
+atom_types = [
+    'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD',
+    'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3',
+    'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2',
+    'CZ3', 'NZ', 'OXT'
+]
+atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)}
+atom_type_num = len(atom_types)  # := 37.
+# A compact atom encoding with 14 columns
+# pylint: disable=line-too-long
+# pylint: disable=bad-whitespace
+restype_name_to_atom14_names = {
+    'ALA': ['N', 'CA', 'C', 'O', 'CB', '',    '',    '',    '',    '',    '',    '',    '',    ''],
+    'ARG': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD',  'NE',  'CZ',  'NH1', 'NH2', '',    '',    ''],
+    'ASN': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'OD1', 'ND2', '',    '',    '',    '',    '',    ''],
+    'ASP': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'OD1', 'OD2', '',    '',    '',    '',    '',    ''],
+    'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG',  '',    '',    '',    '',    '',    '',    '',    ''],
+    'GLN': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD',  'OE1', 'NE2', '',    '',    '',    '',    ''],
+    'GLU': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD',  'OE1', 'OE2', '',    '',    '',    '',    ''],
+    'GLY': ['N', 'CA', 'C', 'O', '',   '',    '',    '',    '',    '',    '',    '',    '',    ''],
+    'HIS': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'ND1', 'CD2', 'CE1', 'NE2', '',    '',    '',    ''],
+    'ILE': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '',    '',    '',    '',    '',    ''],
+    'LEU': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD1', 'CD2', '',    '',    '',    '',    '',    ''],
+    'LYS': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD',  'CE',  'NZ',  '',    '',    '',    '',    ''],
+    'MET': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'SD',  'CE',  '',    '',    '',    '',    '',    ''],
+    'PHE': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD1', 'CD2', 'CE1', 'CE2', 'CZ',  '',    '',    ''],
+    'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD',  '',    '',    '',    '',    '',    '',    ''],
+    'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG',  '',    '',    '',    '',    '',    '',    '',    ''],
+    'THR': ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '',    '',    '',    '',    '',    '',    ''],
+    'TRP': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
+    'TYR': ['N', 'CA', 'C', 'O', 'CB', 'CG',  'CD1', 'CD2', 'CE1', 'CE2', 'CZ',  'OH',  '',    ''],
+    'VAL': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '',    '',    '',    '',    '',    '',    ''],
+    'UNK': ['',  '',   '',  '',  '',   '',    '',    '',    '',    '',    '',    '',    '',    ''],
+}
+# pylint: enable=line-too-long
+# pylint: enable=bad-whitespace
+# This is the standard residue order when coding AA type as a number.
+# Reproduce it by taking 3-letter AA codes and sorting them alphabetically.
+restypes = [
+    'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P',
+    'S', 'T', 'W', 'Y', 'V'
+]
+restype_order = {restype: i for i, restype in enumerate(restypes)}
+restype_num = len(restypes)  # := 20.
+unk_restype_index = restype_num  # Catch-all index for unknown restypes.
+restypes_with_x = restypes + ['X']
+restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)}
+def sequence_to_onehot(
+    sequence: str,
+    mapping: Mapping[str, int],
+    map_unknown_to_x: bool = False) -> np.ndarray:
+  """Maps the given sequence into a one-hot encoded matrix.
+  Args:
+    sequence: An amino acid sequence.
+    mapping: A dictionary mapping amino acids to integers.
+    map_unknown_to_x: If True, any amino acid that is not in the mapping will be
+      mapped to the unknown amino acid 'X'. If the mapping doesn't contain
+      amino acid 'X', an error will be thrown. If False, any amino acid not in
+      the mapping will throw an error.
+  Returns:
+    A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of
+    the sequence.
+  Raises:
+    ValueError: If the mapping doesn't contain values from 0 to
+      num_unique_aas - 1 without any gaps.
+  """
+  num_entries = max(mapping.values()) + 1
+  if sorted(set(mapping.values())) != list(range(num_entries)):
+    raise ValueError('The mapping must have values from 0 to num_unique_aas-1 '
+                     'without any gaps. Got: %s' % sorted(mapping.values()))
+  one_hot_arr = np.zeros((len(sequence), num_entries), dtype=np.int32)
+  for aa_index, aa_type in enumerate(sequence):
+    if map_unknown_to_x:
+      if aa_type.isalpha() and aa_type.isupper():
+        aa_id = mapping.get(aa_type, mapping['X'])
+      else:
+        raise ValueError(f'Invalid character in the sequence: {aa_type}')
+    else:
+      aa_id = mapping[aa_type]
+    one_hot_arr[aa_index, aa_id] = 1
+  return one_hot_arr
+restype_1to3 = {
+    'A': 'ALA',
+    'R': 'ARG',
+    'N': 'ASN',
+    'D': 'ASP',
+    'C': 'CYS',
+    'Q': 'GLN',
+    'E': 'GLU',
+    'G': 'GLY',
+    'H': 'HIS',
+    'I': 'ILE',
+    'L': 'LEU',
+    'K': 'LYS',
+    'M': 'MET',
+    'F': 'PHE',
+    'P': 'PRO',
+    'S': 'SER',
+    'T': 'THR',
+    'W': 'TRP',
+    'Y': 'TYR',
+    'V': 'VAL',
+}
+# NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple
+# 1-to-1 mapping of 3 letter names to one letter names. The latter contains
+# many more, and less common, three letter names as keys and maps many of these
+# to the same one letter name (including 'X' and 'U' which we don't use here).
+restype_3to1 = {v: k for k, v in restype_1to3.items()}
+# Define a restype name for all unknown residues.
+unk_restype = 'UNK'
+resnames = [restype_1to3[r] for r in restypes] + [unk_restype]
+resname_to_idx = {resname: i for i, resname in enumerate(resnames)}
+# The mapping here uses hhblits convention, so that B is mapped to D, J and O
+# are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the
+# remaining 20 amino acids are kept in alphabetical order.
+# There are 2 non-amino acid codes, X (representing any amino acid) and
+# "-" representing a missing amino acid in an alignment.  The id for these
+# codes is put at the end (20 and 21) so that they can easily be ignored if
+# desired.
+HHBLITS_AA_TO_ID = {
+    'A': 0,
+    'B': 2,
+    'C': 1,
+    'D': 2,
+    'E': 3,
+    'F': 4,
+    'G': 5,
+    'H': 6,
+    'I': 7,
+    'J': 20,
+    'K': 8,
+    'L': 9,
+    'M': 10,
+    'N': 11,
+    'O': 20,
+    'P': 12,
+    'Q': 13,
+    'R': 14,
+    'S': 15,
+    'T': 16,
+    'U': 1,
+    'V': 17,
+    'W': 18,
+    'X': 20,
+    'Y': 19,
+    'Z': 3,
+    '-': 21,
+}
+# Partial inversion of HHBLITS_AA_TO_ID.
+ID_TO_HHBLITS_AA = {
+    0: 'A',
+    1: 'C',  # Also U.
+    2: 'D',  # Also B.
+    3: 'E',  # Also Z.
+    4: 'F',
+    5: 'G',
+    6: 'H',
+    7: 'I',
+    8: 'K',
+    9: 'L',
+    10: 'M',
+    11: 'N',
+    12: 'P',
+    13: 'Q',
+    14: 'R',
+    15: 'S',
+    16: 'T',
+    17: 'V',
+    18: 'W',
+    19: 'Y',
+    20: 'X',  # Includes J and O.
+    21: '-',
+}
+restypes_with_x_and_gap = restypes + ['X', '-']
+MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple(
+    restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i])
+    for i in range(len(restypes_with_x_and_gap)))
+def _make_standard_atom_mask() -> np.ndarray:
+  """Returns [num_res_types, num_atom_types] mask array."""
+  # +1 to account for unknown (all 0s).
+  mask = np.zeros([restype_num + 1, atom_type_num], dtype=np.int32)
+  for restype, restype_letter in enumerate(restypes):
+    restype_name = restype_1to3[restype_letter]
+    atom_names = residue_atoms[restype_name]
+    for atom_name in atom_names:
+      atom_type = atom_order[atom_name]
+      mask[restype, atom_type] = 1
+  return mask
+STANDARD_ATOM_MASK = _make_standard_atom_mask()
+# A one hot representation for the first and second atoms defining the axis
+# of rotation for each chi-angle in each residue.
+def chi_angle_atom(atom_index: int) -> np.ndarray:
+  """Define chi-angle rigid groups via one-hot representations."""
+  chi_angles_index = {}
+  one_hots = []
+  for k, v in chi_angles_atoms.items():
+    indices = [atom_types.index(s[atom_index]) for s in v]
+    indices.extend([-1]*(4-len(indices)))
+    chi_angles_index[k] = indices
+  for r in restypes:
+    res3 = restype_1to3[r]
+    one_hot = np.eye(atom_type_num)[chi_angles_index[res3]]
+    one_hots.append(one_hot)
+  one_hots.append(np.zeros([4, atom_type_num]))  # Add zeros for residue `X`.
+  one_hot = np.stack(one_hots, axis=0)
+  one_hot = np.transpose(one_hot, [0, 2, 1])
+  return one_hot
+chi_atom_1_one_hot = chi_angle_atom(1)
+chi_atom_2_one_hot = chi_angle_atom(2)
+# An array like chi_angles_atoms but using indices rather than names.
+chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes]
+chi_angles_atom_indices = tree.map_structure(
+    lambda atom_name: atom_order[atom_name], chi_angles_atom_indices)
+chi_angles_atom_indices = np.array([
+    chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms)))
+    for chi_atoms in chi_angles_atom_indices])
+# Mapping from (res_name, atom_name) pairs to the atom's chi group index
+# and atom index within that group.
+chi_groups_for_atom = collections.defaultdict(list)
+for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items():
+  for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res):
+    for atom_i, atom in enumerate(chi_group):
+      chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i))
+chi_groups_for_atom = dict(chi_groups_for_atom)
+def _make_rigid_transformation_4x4(ex, ey, translation):
+  """Create a rigid 4x4 transformation matrix from two axes and transl."""
+  # Normalize ex.
+  ex_normalized = ex / np.linalg.norm(ex)
+  # make ey perpendicular to ex
+  ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized
+  ey_normalized /= np.linalg.norm(ey_normalized)
+  # compute ez as cross product
+  eznorm = np.cross(ex_normalized, ey_normalized)
+  m = np.stack([ex_normalized, ey_normalized, eznorm, translation]).transpose()
+  m = np.concatenate([m, [[0., 0., 0., 1.]]], axis=0)
+  return m
+# create an array with (restype, atomtype) --> rigid_group_idx
+# and an array with (restype, atomtype, coord) for the atom positions
+# and compute affine transformation matrices (4,4) from one rigid group to the
+# previous group
+restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=np.int)
+restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
+restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32)
+restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=np.int)
+restype_atom14_mask = np.zeros([21, 14], dtype=np.float32)
+restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32)
+restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32)
+def _make_rigid_group_constants():
+  """Fill the arrays above."""
+  for restype, restype_letter in enumerate(restypes):
+    resname = restype_1to3[restype_letter]
+    for atomname, group_idx, atom_position in rigid_group_atom_positions[
+        resname]:
+      atomtype = atom_order[atomname]
+      restype_atom37_to_rigid_group[restype, atomtype] = group_idx
+      restype_atom37_mask[restype, atomtype] = 1
+      restype_atom37_rigid_group_positions[restype, atomtype, :] = atom_position
+      atom14idx = restype_name_to_atom14_names[resname].index(atomname)
+      restype_atom14_to_rigid_group[restype, atom14idx] = group_idx
+      restype_atom14_mask[restype, atom14idx] = 1
+      restype_atom14_rigid_group_positions[restype,
+                                           atom14idx, :] = atom_position
+  for restype, restype_letter in enumerate(restypes):
+    resname = restype_1to3[restype_letter]
+    atom_positions = {name: np.array(pos) for name, _, pos
+                      in rigid_group_atom_positions[resname]}
+    # backbone to backbone is the identity transform
+    restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4)
+    # pre-omega-frame to backbone (currently dummy identity matrix)
+    restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4)
+    # phi-frame to backbone
+    mat = _make_rigid_transformation_4x4(
+        ex=atom_positions['N'] - atom_positions['CA'],
+        ey=np.array([1., 0., 0.]),
+        translation=atom_positions['N'])
+    restype_rigid_group_default_frame[restype, 2, :, :] = mat
+    # psi-frame to backbone
+    mat = _make_rigid_transformation_4x4(
+        ex=atom_positions['C'] - atom_positions['CA'],
+        ey=atom_positions['CA'] - atom_positions['N'],
+        translation=atom_positions['C'])
+    restype_rigid_group_default_frame[restype, 3, :, :] = mat
+    # chi1-frame to backbone
+    if chi_angles_mask[restype][0]:
+      base_atom_names = chi_angles_atoms[resname][0]
+      base_atom_positions = [atom_positions[name] for name in base_atom_names]
+      mat = _make_rigid_transformation_4x4(
+          ex=base_atom_positions[2] - base_atom_positions[1],
+          ey=base_atom_positions[0] - base_atom_positions[1],
+          translation=base_atom_positions[2])
+      restype_rigid_group_default_frame[restype, 4, :, :] = mat
+    # chi2-frame to chi1-frame
+    # chi3-frame to chi2-frame
+    # chi4-frame to chi3-frame
+    # luckily all rotation axes for the next frame start at (0,0,0) of the
+    # previous frame
+    for chi_idx in range(1, 4):
+      if chi_angles_mask[restype][chi_idx]:
+        axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2]
+        axis_end_atom_position = atom_positions[axis_end_atom_name]
+        mat = _make_rigid_transformation_4x4(
+            ex=axis_end_atom_position,
+            ey=np.array([-1., 0., 0.]),
+            translation=axis_end_atom_position)
+        restype_rigid_group_default_frame[restype, 4 + chi_idx, :, :] = mat
+_make_rigid_group_constants()
+def make_atom14_dists_bounds(overlap_tolerance=1.5,
+                             bond_length_tolerance_factor=15):
+  """compute upper and lower bounds for bonds to assess violations."""
+  restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32)
+  restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32)
+  restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32)
+  residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props()
+  for restype, restype_letter in enumerate(restypes):
+    resname = restype_1to3[restype_letter]
+    atom_list = restype_name_to_atom14_names[resname]
+    # create lower and upper bounds for clashes
+    for atom1_idx, atom1_name in enumerate(atom_list):
+      if not atom1_name:
+        continue
+      atom1_radius = van_der_waals_radius[atom1_name[0]]
+      for atom2_idx, atom2_name in enumerate(atom_list):
+        if (not atom2_name) or atom1_idx == atom2_idx:
+          continue
+        atom2_radius = van_der_waals_radius[atom2_name[0]]
+        lower = atom1_radius + atom2_radius - overlap_tolerance
+        upper = 1e10
+        restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower
+        restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower
+        restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper
+        restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper
+    # overwrite lower and upper bounds for bonds and angles
+    for b in residue_bonds[resname] + residue_virtual_bonds[resname]:
+      atom1_idx = atom_list.index(b.atom1_name)
+      atom2_idx = atom_list.index(b.atom2_name)
+      lower = b.length - bond_length_tolerance_factor * b.stddev
+      upper = b.length + bond_length_tolerance_factor * b.stddev
+      restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower
+      restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower
+      restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper
+      restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper
+      restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev
+      restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev
+  return {'lower_bound': restype_atom14_bond_lower_bound,  # shape (21,14,14)
+          'upper_bound': restype_atom14_bond_upper_bound,  # shape (21,14,14)
+          'stddev': restype_atom14_bond_stddev,  # shape (21,14,14)
+         }
--- a/alphafold/utils/__init__.py
+++ b/alphafold/utils/__init__.py
--- a/alphafold/utils/affine_utils.py
+++ b/alphafold/utils/affine_utils.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+# According to DeepMind, this prevents rotation compositions from being
+# computed on low-precision tensor cores. I'm personally skeptical that it
+# makes a difference, but to get as close as possible to their outputs, I'm
+# adding it.
+def rot_matmul(a, b):
+    e = ...
+    row_1 = torch.stack([
+        a[e,0,0]*b[e,0,0] + a[e,0,1]*b[e,1,0] + a[e,0,2]*b[e,2,0],
+        a[e,0,0]*b[e,0,1] + a[e,0,1]*b[e,1,1] + a[e,0,2]*b[e,2,1],
+        a[e,0,0]*b[e,0,2] + a[e,0,1]*b[e,1,2] + a[e,0,2]*b[e,2,2],
+    ], dim=-1)
+    row_2 = torch.stack([
+        a[e,1,0]*b[e,0,0] + a[e,1,1]*b[e,1,0] + a[e,1,2]*b[e,2,0],
+        a[e,1,0]*b[e,0,1] + a[e,1,1]*b[e,1,1] + a[e,1,2]*b[e,2,1],
+        a[e,1,0]*b[e,0,2] + a[e,1,1]*b[e,1,2] + a[e,1,2]*b[e,2,2],
+    ], dim=-1)
+    row_3 = torch.stack([
+        a[e,2,0]*b[e,0,0] + a[e,2,1]*b[e,1,0] + a[e,2,2]*b[e,2,0],
+        a[e,2,0]*b[e,0,1] + a[e,2,1]*b[e,1,1] + a[e,2,2]*b[e,2,1],
+        a[e,2,0]*b[e,0,2] + a[e,2,1]*b[e,1,2] + a[e,2,2]*b[e,2,2],
+    ], dim=-1)
+    return torch.stack([row_1, row_2, row_3], dim=-2)
+def rot_vec_mul(r, t):
+    x = t[..., 0]
+    y = t[..., 1]
+    z = t[..., 2]
+    return torch.stack([
+        r[..., 0, 0]*x + r[..., 0, 1]*y + r[..., 0, 2]*z,
+        r[..., 1, 0]*x + r[..., 1, 1]*y + r[..., 1, 2]*z,
+        r[..., 2, 0]*x + r[..., 2, 1]*y + r[..., 2, 2]*z,
+    ], dim=-1)
+class T:
+    def __init__(self, rots, trans):
+        self.rots = rots
+        self.trans = trans
+        if(self.rots is None and self.trans is None):
+            raise ValueError("Only one of rots and trans can be None")
+        elif(self.rots is None):
+            self.rots = T.identity_rot(
+                self.trans.shape[:-1], self.trans.dtype, self.trans.device
+            )
+        elif(self.trans is None):
+            self.trans = T.identity_trans(
+                self.rots.shape[:-2], self.rots.dtype, self.rots.device
+            )
+        if(self.rots.shape[-2:] != (3, 3) or
+            self.trans.shape[-1] != 3 or
+            self.rots.shape[:-2] != self.trans.shape[:-1]):
+            raise ValueError("Incorrectly shaped input")
+    def __getitem__(self, index):
+        if(type(index) != tuple):
+            index = (index,)
+        return T(
+            self.rots[index + (slice(None), slice(None))], 
+            self.trans[index + (slice(None),)]
+        )
+    def __eq__(self, obj):
+        return (
+            torch.all(self.rots == obj.rots) and 
+            torch.all(self.trans == obj.trans)
+        )
+    def __mul__(self, right):
+        rots = self.rots * right[..., None, None]
+        trans = self.trans * right[..., None]
+        return T(rots, trans)
+    def __rmul__(self, left):
+        return self.__mul__(left)
+    @property
+    def shape(self):
+        s = self.rots.shape[:-2]
+        return s if len(s) > 0 else torch.Size([1])
+    def get_trans(self):
+        return self.trans
+    def get_rots(self):
+        return self.rots
+    def compose(self, t):
+        rot_1, trn_1 = self.rots, self.trans
+        rot_2, trn_2 = t.rots, t.trans
+        rot = rot_matmul(rot_1, rot_2)
+        trn = rot_vec_mul(rot_1, trn_2) + trn_1
+        return T(rot, trn)
+    def apply(self, pts):
+        r, t = self.rots, self.trans
+        rotated = rot_vec_mul(r, pts)
+        return rotated + t
+    def invert_apply(self, pts):
+        r, t = self.rots, self.trans
+        pts = pts - t
+        return rot_vec_mul(r.transpose(-1, -2), pts)
+    def invert(self):
+        rot_inv = self.rots.transpose(-1, -2)
+        trn_inv = rot_vec_mul(rot_inv, self.trans)
+        return T(rot_inv, -1 * trn_inv)
+    def unsqueeze(self, dim):
+        if(dim >= len(self.shape)):
+            raise ValueError("Invalid dimension")
+        rots = self.rots.unsqueeze(dim if dim >= 0 else dim - 2)
+        trans = self.trans.unsqueeze(dim if dim >= 0 else dim - 1)
+        return T(rots, trans)
+    @staticmethod
+    def identity_rot(shape, dtype, device, requires_grad=False):
+        rots = torch.eye(
+            3, dtype=dtype, device=device, requires_grad=requires_grad
+        )
+        rots = rots.view(*((1,) * len(shape)), 3, 3)
+        rots = rots.expand(*shape, -1, -1)
+        return rots
+    @staticmethod
+    def identity_trans(shape, dtype, device, requires_grad=False):
+        trans = torch.zeros(
+            (*shape, 3), 
+            dtype=dtype, 
+            device=device, 
+            requires_grad=requires_grad
+        ) 
+        return trans
+    @staticmethod
+    def identity(shape, dtype, device, requires_grad=False):
+        return T(
+            T.identity_rot(shape, dtype, device, requires_grad), 
+            T.identity_trans(shape, dtype, device, requires_grad),
+        )
+    @staticmethod
+    def from_4x4(t):
+        rots = t[..., :3, :3]
+        trans = t[..., :3, 3]
+        return T(rots, trans)
+    def to_4x4(self):
+        tensor = torch.zeros((*self.shape, 4, 4), device=self.rots.device)
+        tensor[..., :3, :3] = self.rots
+        tensor[..., :3, 3] = self.trans
+        tensor[..., 3, 3] = 1
+        return tensor
+    @staticmethod
+    def from_tensor(t):
+        return T.from_4x4(t)
+    @staticmethod
+    def from_3_points(p_neg_x_axis, origin, p_xy_plane, eps=1e-8):
+        v1 = origin - p_neg_x_axis
+        v2 = p_xy_plane - origin
+        e1 = v1 / torch.sqrt(torch.sum(v1 ** 2, dim=-1) + eps)[..., None]
+        u2 = v2 - e1 * (torch.einsum('...i,...i->...', v2, e1)[..., None])
+        e2 = u2 / torch.sqrt(torch.sum(u2 ** 2, dim=-1) + eps)[..., None]
+        e3 = torch.cross(e1, e2, dim=-1)
+        rots = torch.cat(
+            (
+                e1.unsqueeze(-1),
+                e2.unsqueeze(-1),
+                e3.unsqueeze(-1),
+            ), dim=-1,
+        )
+        return T(rots, origin)
+    @staticmethod
+    def concat(ts, dim):
+        rots = torch.cat(
+            [t.rots for t in ts], 
+            dim=dim if dim >= 0 else dim - 2
+        )
+        trans = torch.cat(
+            [t.trans for t in ts],
+            dim=dim if dim >= 0 else dim - 1
+        )
+        return T(rots, trans)
+    def map_tensor_fn(self, fn):
+        """ Apply a function that takes a tensor as its only argument to the
+            rotations and translations, treating the final two/one
+            dimension(s), respectively, as batch dimensions.
+            E.g.: Given t, an instance of T of shape [N, M], this function can
+            be used to sum out the second dimension thereof as follows:
+                t = t.map_tensor_fn(lambda x: torch.sum(x, dim=-1))
+            The resulting object has rotations of shape [N, 3, 3] and
+            translations of shape [N, 3] 
+        """
+        rots = self.rots.view(*self.rots.shape[:-2], 9)
+        rots = torch.stack(list(map(fn, torch.unbind(rots, -1))), dim=-1)
+        rots = rots.view(*rots.shape[:-1], 3, 3)
+        trans = torch.stack(list(map(fn, torch.unbind(self.trans, -1))), dim=-1)
+        return T(rots, trans)
+    def stop_rot_gradient(self):
+        return T(self.rots.detach(), self.trans)
+    def scale_translation(self, factor):
+        return T(self.rots, self.trans * factor)
+    @staticmethod
+    def make_transform_from_reference(n_xyz, ca_xyz, c_xyz, eps=1e-20):
+        translation = -1 * c_xyz
+        n_xyz = n_xyz + translation
+        c_xyz = c_xyz + translation
+        c_x, c_y, c_z = [c_xyz[...,i] for i in range(3)]
+        norm = torch.sqrt(eps + c_x**2 + c_y**2)
+        sin_c1 = -c_y / norm
+        cos_c1 = c_x / norm
+        zeros = sin_c1.new_zeros(sin_c1.shape)
+        ones = sin_c1.new_ones(sin_c1.shape)
+        c1_rots = sin_c1.new_zeros((*sin_c1.shape, 3, 3))
+        c1_rots[..., 0, 0] = cos_c1
+        c1_rots[..., 0, 1] = -1 * sin_c1
+        c1_rots[..., 1, 0] = sin_c1
+        c1_rots[..., 1, 1] = cos_c1
+        c1_rots[..., 2, 2] = 1
+        norm = torch.sqrt(eps + c_x**2 + c_y**2 + c_z**2)
+        sin_c2 = c_z / norm
+        cos_c2 = torch.sqrt(c_x**2 + c_y**2) / norm
+        c2_rots = sin_c2.new_zeros((*sin_c2.shape, 3, 3))
+        c2_rots[..., 0, 0] = cos_c2
+        c2_rots[..., 0, 2] = sin_c2
+        c2_rots[..., 1, 1] = 1
+        c1_rots[..., 2, 0] = -1 * sin_c2
+        c1_rots[..., 2, 2] = cos_c2
+        c_rots = rot_matmul(c2_rot_matrix, c1_rot_matrix)
+        n_xyz = rot_vec_mul(c_rots, n_xyz)
+        _, n_y, n_z = [n_xyz[..., i] for i in range(3)]
+        norm = torch.sqrt(eps + n_y**2 + n_z**2)
+        sin_n = -n_z / norm
+        cos_n = n_y / norm
+        n_rots = sin_c2.new_zeros((*sin_c2.shape, 3, 3))
+        n_rots[..., 0, 0] = 1
+        n_rots[..., 1, 1] = cos_n
+        n_rots[..., 1, 2] = -1 * sin_n 
+        n_rots[..., 2, 1] = sin_n
+        n_rots[..., 2, 2] = cos_n
+        rots = rot_matmul(n_rots, c_rots)
+        rots = rots.transpose(-1, -2)
+        translation = -1 * translation
+        return T(rots, translation)
+_quat_elements = ['a', 'b', 'c', 'd']
+_qtr_keys = [l1 + l2 for l1 in _quat_elements for l2 in _quat_elements]
+_qtr_ind_dict = {key:ind for ind, key in enumerate(_qtr_keys)}
+def _to_mat(pairs):
+    mat = torch.zeros((4, 4))
+    for pair in pairs:
+        key, value = pair
+        ind = _qtr_ind_dict[key]
+        mat[ind // 4][ind % 4] = value
+    return mat
+_qtr_mat = torch.zeros((4, 4, 3, 3))
+_qtr_mat[..., 0, 0] = _to_mat([('aa', 1), ('bb', 1), ('cc', -1), ('dd', -1)])
+_qtr_mat[..., 0, 1] = _to_mat([('bc', 2), ('ad', -2)])
+_qtr_mat[..., 0, 2] = _to_mat([('bd', 2), ('ac', 2)])
+_qtr_mat[..., 1, 0] = _to_mat([('bc', 2), ('ad', 2)])
+_qtr_mat[..., 1, 1] = _to_mat([('aa', 1), ('bb', -1), ('cc', 1), ('dd', -1)])
+_qtr_mat[..., 1, 2] = _to_mat([('cd', 2), ('ab', -2)])
+_qtr_mat[..., 2, 0] = _to_mat([('bd', 2), ('ac', -2)])
+_qtr_mat[..., 2, 1] = _to_mat([('cd', 2), ('ab', 2)])
+_qtr_mat[..., 2, 2] = _to_mat([('aa', 1), ('bb', -1), ('cc', -1), ('dd', 1)])
+def quat_to_rot(
+    quat           # [*, 4]
+):
+    # [*, 4, 4]
+    quat = quat[..., None] * quat[..., None, :]
+    # [*, 4, 4, 3, 3]
+    shaped_qtr_mat = _qtr_mat.view((1,) * len(quat.shape[:-2]) + (4, 4, 3, 3))
+    quat = quat[..., None, None] * shaped_qtr_mat.to(quat.device)
+    # [*, 3, 3]
+    return torch.sum(quat, dim=(-3, -4))
--- a/alphafold/utils/deepspeed.py
+++ b/alphafold/utils/deepspeed.py
+# Copyright 2021 AlQuraishi Laboratory
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import deepspeed
+import torch
+from typing import Any, Tuple, List, Callable
+BLOCK_ARG = Any
+BLOCK_ARGS = Tuple[BLOCK_ARG, ...]
+def checkpoint_blocks(
+    blocks: List[Callable[BLOCK_ARGS, BLOCK_ARGS]], 
+    args: BLOCK_ARGS, 
+    blocks_per_ckpt: int,
+) -> BLOCK_ARGS:
+    """
+        Chunk a list of blocks and run each chunk with activation 
+        checkpointing. We define a "block" as a callable whose only inputs are 
+        the outputs of the previous block.
+        This function assumes that deepspeed has already been initialized.
+        Implements Subsection 1.11.8
+        Args:
+            blocks:
+                List of blocks
+            args:
+                Tuple of arguments for the first block.
+            blocks_per_ckpt:
+                Size of each chunk. A higher value corresponds to higher memory
+                consumption but fewer checkpoints. If None, no checkpointing is
+                performed.
+        Returns:
+            The output of the final block
+    """
+    def wrap(a):
+        return (a,) if type(a) is not tuple else a
+    def exec(b, a):
+        for block in b:
+            a = wrap(block(*a))
+        return a
+    def chunker(s, e):
+        def exec_sliced(a):
+            return exec(blocks[s:e], a)
+        return exec_sliced
+    # Avoids mishaps when the blocks take just one argument
+    args = wrap(args)
+    if(blocks_per_ckpt is None):
+        return exec(blocks, args)
+    elif(blocks_per_ckpt < 1 or blocks_per_ckpt > len(blocks)):
+        raise ValueError("blocks_per_ckpt must be between 1 and len(blocks)")
+    for s in range(0, len(blocks), blocks_per_ckpt):
+        e = s + blocks_per_ckpt
+        args = deepspeed.checkpointing.checkpoint(chunker(s, e), args)
+    return args
--- a/alphafold/utils/feats.py
+++ b/alphafold/utils/feats.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import torch
+import torch.nn as nn
+from typing import Dict
+import alphafold.np.residue_constants as residue_constants
+from alphafold.utils.affine_utils import T
+from alphafold.utils.tensor_utils import (
+    batched_gather, 
+    one_hot,
+)
+def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
+    is_gly = (aatype == residue_constants.restype_order['G'])
+    ca_idx = residue_constants.atom_order['CA']
+    cb_idx = residue_constants.atom_order['CB']
+    pseudo_beta = torch.where(
+        is_gly[..., None].expand(*((-1,) * len(is_gly.shape)), 3),
+        all_atom_positions[..., ca_idx, :],
+        all_atom_positions[..., cb_idx, :]
+    )
+    if(all_atom_masks is not None):
+        pseudo_beta_mask = torch.where(
+            is_gly,
+            all_atom_masks[..., ca_idx],
+            all_atom_masks[..., cb_idx],
+        )
+        return pseudo_beta, pseudo_beta_mask
+    else:
+        return pseudo_beta
+def get_chi_atom_indices():
+  """Returns atom indices needed to compute chi angles for all residue types.
+  Returns:
+    A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are
+    in the order specified in residue_constants.restypes + unknown residue type
+    at the end. For chi angles which are not defined on the residue, the
+    positions indices are by default set to 0.
+  """
+  chi_atom_indices = []
+  for residue_name in residue_constants.restypes:
+    residue_name = residue_constants.restype_1to3[residue_name]
+    residue_chi_angles = residue_constants.chi_angles_atoms[residue_name]
+    atom_indices = []
+    for chi_angle in residue_chi_angles:
+      atom_indices.append(
+          [residue_constants.atom_order[atom] for atom in chi_angle])
+    for _ in range(4 - len(atom_indices)):
+      atom_indices.append([0, 0, 0, 0])  # For chi angles not defined on the AA.
+    chi_atom_indices.append(atom_indices)
+  chi_atom_indices.append([[0, 0, 0, 0]] * 4)  # For UNKNOWN residue.
+  return chi_atom_indices
+def compute_residx(batch):
+    aatype = batch["aatype"]
+    restype_atom14_to_atom37 = []  # mapping (restype, atom37) --> atom14
+    restype_atom37_to_atom14 = []  # mapping (restype, atom37) --> atom14
+    restype_atom14_mask = []
+    for rt in residue_constants.restypes:
+        atom_names = residue_constants.restype_name_to_atom14_names[
+          residue_constants.restype_1to3[rt]]
+        restype_atom14_to_atom37.append([
+            (residue_constants.atom_order[name] if name else 0)
+            for name in atom_names
+        ])
+        atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
+        restype_atom37_to_atom14.append([
+            (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
+            for name in residue_constants.atom_types
+        ])
+        restype_atom14_mask.append(
+            [(1. if name else 0.) for name in atom_names]
+        )
+    # Add dummy mapping for restype 'UNK'
+    restype_atom14_to_atom37.append([0] * 14)
+    restype_atom37_to_atom14.append([0] * 37)
+    restype_atom14_mask.append([0.] * 14)
+    restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
+    restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
+    restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
+    residx_atom14_to_atom37 = np.take_along_axis(
+        restype_atom14_to_atom37,
+        aatype[..., None],
+        axis=0
+    )
+    residx_atom14_mask = np.take_along_axis(
+        restype_atom14_mask,
+        aatype[..., None],
+        axis=0,
+    )
+    batch['atom14_atom_exists'] = residx_atom14_mask
+    batch['residx_atom14_to_atom37'] = residx_atom14_to_atom37.long()
+    # create the gather indices for mapping back
+    residx_atom37_to_atom14 = np.take_along_axis(
+        restype_atom37_to_atom14,
+        aatype[..., None],
+        axis=0,
+    )
+    batch['residx_atom37_to_atom14'] = residx_atom37_to_atom14.long()
+    # create the corresponding mask
+    restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
+    for restype, restype_letter in enumerate(residue_constants.restypes):
+      restype_name = residue_constants.restype_1to3[restype_letter]
+      atom_names = residue_constants.residue_atoms[restype_name]
+      for atom_name in atom_names:
+        atom_type = residue_constants.atom_order[atom_name]
+        restype_atom37_mask[restype, atom_type] = 1
+    residx_atom37_mask = np.take_along_axis(
+        restype_atom37_mask,
+        aatype[..., None],
+        axis=0,
+    )
+    batch['atom37_atom_exists'] = residx_atom37_mask
+def atom14_to_atom37(atom14, batch):
+    atom37_data = batched_gather(
+        atom14,
+        batch["residx_atom37_to_atom14"],
+        dim=-2,
+        no_batch_dims=len(atom14.shape[:-2]),
+    )
+    atom37_data *= batch["atom37_atom_exists"][..., None]
+    return atom37_data
+def atom37_to_torsion_angles(
+    aatype: torch.Tensor, 
+    all_atom_pos: torch.Tensor, 
+    all_atom_mask: torch.Tensor, 
+    eps: float = 1e-8,
+) -> Dict[str, torch.Tensor]:
+    """
+        Args:
+            aatype:
+                [*, N_res] residue indices
+            all_atom_pos:
+                [*, N_res, 37, 3] atom positions (in atom37 
+                format)
+            all_atom_mask:
+                [*, N_res, 37] atom position mask
+        Returns:
+            Dictionary of the following features:
+            "torsion_angles_sin_cos" ([*, N_res, 7, 2])
+                Torsion angles
+            "alt_torsion_angles_sin_cos" ([*, N_res, 7, 2])
+                Alternate torsion angles (accounting for 180-degree symmetry)
+            "torsion_angles_mask" ([*, N_res, 7])
+                Torsion angles mask
+    """
+    aatype = torch.clamp(aatype, max=20)
+    pad = all_atom_pos.new_zeros([*all_atom_pos.shape[:-3], 1, 37, 3])
+    prev_all_atom_pos = torch.cat([pad, all_atom_pos[..., :-1, :, :]], dim=-3)
+    pad = all_atom_mask.new_zeros([*all_atom_mask.shape[:-2], 1, 37])
+    prev_all_atom_mask = torch.cat([pad, all_atom_mask[..., :-1, :]], dim=-2)
+    pre_omega_atom_pos = torch.cat(
+        [
+            prev_all_atom_pos[..., 1:3, :],
+            all_atom_pos[..., :2, :]
+        ], dim=-2
+    )
+    phi_atom_pos = torch.cat(
+        [
+            prev_all_atom_pos[..., 2:3, :],
+            all_atom_pos[..., :3, :]
+        ], dim=-2
+    )
+    psi_atom_pos = torch.cat(
+        [
+            all_atom_pos[..., :3, :],
+            all_atom_pos[..., 4:5, :]
+        ], dim=-2
+    )
+    pre_omega_mask = (
+        torch.prod(prev_all_atom_mask[..., 1:3], dim=-1) *
+        torch.prod(all_atom_mask[..., :2], dim=-1)
+    )
+    phi_mask = (
+        prev_all_atom_mask[..., 2] *
+        torch.prod(all_atom_mask[..., :3], dim=-1)
+    )
+    psi_mask = (
+        torch.prod(all_atom_mask[..., :3], dim=-1) *
+        all_atom_mask[..., 4]
+    )
+    chi_atom_indices = torch.as_tensor(
+        get_chi_atom_indices(), device=aatype.device
+    )
+    atom_indices = chi_atom_indices[..., aatype, :, :]
+    chis_atom_pos = batched_gather(
+        all_atom_pos, atom_indices, -2, len(atom_indices.shape[:-2])
+    )
+    chi_angles_mask = list(residue_constants.chi_angles_mask)
+    chi_angles_mask.append([0., 0., 0., 0.])
+    chi_angles_mask = all_atom_pos.new_tensor(chi_angles_mask)
+    chis_mask = chi_angles_mask[aatype, :]
+    chi_angle_atoms_mask = batched_gather(
+        all_atom_mask, 
+        atom_indices, 
+        dim=-1, 
+        no_batch_dims=len(atom_indices.shape[:-2])
+    )
+    chi_angle_atoms_mask = torch.prod(chi_angle_atoms_mask, dim=-1)
+    chis_mask = chis_mask * chi_angle_atoms_mask
+    torsions_atom_pos = torch.cat(
+        [
+            pre_omega_atom_pos[..., None, :, :],
+            phi_atom_pos[..., None, :, :],
+            psi_atom_pos[..., None, :, :],
+            chis_atom_pos,
+        ], dim=-3
+    )
+    torsion_angles_mask = torch.cat(
+        [
+            pre_omega_mask[..., None],
+            phi_mask[..., None],
+            psi_mask[..., None],
+            chis_mask,
+        ], dim=-1
+    )
+    torsion_frames = T.from_3_points(
+        torsions_atom_pos[..., 1, :],
+        torsions_atom_pos[..., 2, :],
+        torsions_atom_pos[..., 0, :],
+    )
+    fourth_atom_rel_pos = torsion_frames.invert().apply(
+        torsions_atom_pos[..., 3, :]
+    )
+    torsion_angles_sin_cos = torch.stack(
+        [fourth_atom_rel_pos[..., 2], fourth_atom_rel_pos[..., 1]], dim=-1)
+    denom = torch.sqrt(
+        torch.sum(
+            torch.square(torsion_angles_sin_cos), dim=-1, keepdims=True
+        ) + eps
+    )
+    torsion_angles_sin_cos /= denom
+    torsion_angles_sin_cos *= torch.tensor(
+        [1., 1., -1., 1., 1., 1., 1.], device=aatype.device,
+    )[((None,) * len(torsion_angles_sin_cos.shape[:-2])) + (slice(None), None)]
+    chi_is_ambiguous = torsion_angles_sin_cos.new_tensor(
+        residue_constants.chi_pi_periodic,
+    )[aatype, ...]
+    mirror_torsion_angles = torch.cat(
+        [
+            aatype.new_ones(*aatype.shape, 3),
+            1. - 2. * chi_is_ambiguous
+        ], dim=-1
+    )
+    alt_torsion_angles_sin_cos = (
+        torsion_angles_sin_cos * mirror_torsion_angles[..., None]
+    )
+    return {
+        "torsion_angles_sin_cos": torsion_angles_sin_cos,
+        "alt_torsion_angles_sin_cos": alt_torsion_angles_sin_cos,
+        "torsion_angles_mask": torsion_angles_mask,
+    }
+def atom37_to_frames(
+    aatype: torch.Tensor,
+    all_atom_positions: torch.Tensor,
+    all_atom_mask: torch.Tensor,
+) -> Dict[str, torch.Tensor]:
+    batch_dims = len(aatype.shape[:-1])
+    restype_rigidgroup_base_atom_names = np.full([21, 8, 3], '', dtype=object)
+    restype_rigidgroup_base_atom_names[:, 0, :] = ['C', 'CA', 'N']
+    restype_rigidgroup_base_atom_names[:, 3, :] = ['CA', 'C', 'O']
+    for restype, restype_letter in enumerate(residue_constants.restypes):
+        resname = residue_constants.restype_1to3[restype_letter]
+        for chi_idx in range(4):
+            if(residue_constants.chi_angles_mask[restype][chi_idx]):
+                names = residue_constants.chi_angles_atoms[resname][chi_idx]
+                restype_rigidgroup_base_atom_names[
+                    restype, chi_idx + 4, :] = atom_names[1:]
+    restype_rigidgroup_mask = torch.zeros(
+        (*aatype.shape[:-1], 21, 8), 
+        dtype=torch.float, 
+        device=aatype.device, 
+        requires_grad=False
+    )
+    restype_rigidgroup_mask[:, 0] = 1
+    restype_rigidgroup_mask[:, 3] = 1
+    restype_rigidgroup_mask[:20, 4:] = residue_constants.chi_angles_mask
+    lookuptable = residue_constants.atom_order.copy()
+    lookuptable[''] = 0
+    lookup = np.vectorize(lambda x: lookuptable[x])
+    restype_rigidgroup_base_atom37_idx = lookup(
+        restype_rigidgroup_base_atom_names,
+    )
+    restype_rigidgroup_base_atom37_idx = aatype.new_tensor(
+        restype_rigidgroup_base_atom37_idx,
+    )
+    restype_rigidgroup_base_atom37_idx = (
+        restype_rigidgroup_base_atom37_idx.view(
+            *((1,) * batch_dims), 
+            *restype_rigidgroup_base_atom37_idx.shape
+        )
+    )
+    residx_rigidgroup_base_atom37_idx = batched_gather(
+        residx_rigidgroup_base_atom37_idx,
+        aatype,
+        dim=-3,
+        no_batch_dims=batch_dims,
+    )
+    base_atom_pos = batched_gather(
+        all_atom_positions,
+        residx_rigidgroup_base_atom37_idx,
+        dim=-2,
+        no_batch_dims=len(all_atom_positions.shape[:-2]),
+    )
+    gt_frames = T.from_3_points(
+        point_on_neg_x_axis=base_atom_pos[..., 0, :],
+        origin=base_atom_pos[..., 1, :],
+        point_on_xy_plane=base_atom_pos[..., 2, :],
+    )
+    group_exists = batched_gather(
+        restype_rigidgroup_mask, 
+        aatype, 
+        dim=-2, 
+        no_batch_dims=batch_dims,
+    )
+    gt_atoms_exist = batched_gather(
+        all_atom_mask.float(),
+        residx_rigidgroup_base_atom37_idx,
+        dim=-1,
+        no_batch_dims=len(all_atom_mask.shape[:-1])
+    )
+    gt_exists = torch.min(gt_atoms_exist, dim=-1) * group_exists
+    rots = torch.eye(3, device=aatype.device, requires_grad=False)
+    rots = rots.view(*((1,) * batch_dims), 1, 3, 3)
+    rots = rots.expand(*((-1,) * batch_dims), 8, -1, -1)
+    rots[..., 0, 0, 0] = -1
+    rots[..., 0, 2, 2] = -1
+    gt_frames = gt_frames.compose(T(rots, None)) 
+    restype_rigidgroup_is_ambiguous = all_atom_mask.new_zeros(
+        *((1,) * batch_dims), 21, 8
+    )
+    restype_rigidgroup_rots = torch.eye(
+        3, device=aatype.device, requires_grad=False
+    )
+    restype_rigidgroup_rots = restype_rigidgroup_rots.view(
+        *((1,) * batch_dims), 1, 1, 3, 3
+    )
+    restype_rigidgroup_rots = restype_rigidgroup_rots.expand(
+        *((-1,) * batch_dims), 21, 8, 3, 3
+    )
+    for resname, _ in residue_constants.residue_atom_renaming_swaps.items():
+        restype = residue_constants.restype_order[
+            residue_constants.restype3to1[resname]
+        ]
+        chi_idx = int(sum(residue_constants.chi_angles_mask[restype]) - 1)
+        restype_rigidgroup_is_ambiguous[..., restype, chi_idx + 4] = 1
+        restype_rigidgroup_rots[..., restype, chi_idx + 4,  1, 1] = -1
+        restype_rigidgroup_rots[..., restype, chi_idx + 4, 2, 2] = -1
+    residx_rigidgroup_is_ambiguous = batched_gather(
+        restype_rigidgroup_is_ambiguous,
+        aatype,
+        dim=-2,
+        no_batch_dims=batch_dims,
+    )
+    residx_rigidgroup_ambiguity_rot = utils.batched_gather(
+        restype_rigidgroup_rots,
+        aatype,
+        dim=-4,
+        no_batch_dims=batch_dims,
+    )
+    alt_gt_frames = gt_frames.apply(T(residx_rigidgroup_ambiguity_rot, None))
+    # TODO: Verify that I can get away with skipping the flat12 format
+    gt_frames_tensor = gt_frames.to_tensor()
+    alt_gt_frames_tensor = alt_gt_frames.to_tensor()
+    return {
+        'rigidgroups_gt_frames': gt_frames_tensor,
+        'rigidgroups_gt_exists': gt_exists,
+        'rigidgroups_group_exists': group_exists,
+        'rigidgroups_group_is_ambiguous': residx_rigidgroup_is_ambiguous,
+        'rigidgroups_alt_gt_frames': alt_gt_frames_tensor,
+    }
+def build_template_angle_feat(angle_feats, template_aatype):
+    torsion_angles_sin_cos = angle_feats["torsion_angles_sin_cos"]
+    alt_torsion_angles_sin_cos = angle_feats["alt_torsion_angles_sin_cos"]
+    torsion_angles_mask = angle_feats["torsion_angles_mask"]
+    template_angle_feat = torch.cat(
+        [
+            nn.functional.one_hot(template_aatype, 22),
+            torsion_angles_sin_cos.reshape(
+                *torsion_angles_sin_cos.shape[:-2], 14
+            ),
+            alt_torsion_angles_sin_cos.reshape(
+                *alt_torsion_angles_sin_cos.shape[:-2], 14
+            ),
+            torsion_angles_mask,
+        ], 
+        dim=-1,
+    )
+    return template_angle_feat
+def build_template_pair_feat(batch, min_bin, max_bin, no_bins, eps=1e-6, inf=1e8):
+    template_mask = batch["template_pseudo_beta_mask"]
+    template_mask_2d = template_mask[..., None] * template_mask[..., None, :]
+    # Compute distogram (this seems to differ slightly from Alg. 5)
+    tpb = batch["template_pseudo_beta"]
+    dgram = torch.sum(
+        (tpb[..., None, :] - tpb[..., None, :, :]) ** 2, dim=-1, keepdim=True)
+    lower = torch.linspace(min_bin, max_bin, no_bins, device=tpb.device) ** 2
+    upper = torch.cat([lower[:-1], lower.new_tensor([inf])], dim=-1)
+    dgram = ((dgram > lower) * (dgram < upper)).type(dgram.dtype)
+    to_concat = [dgram, template_mask_2d[..., None]]
+    aatype_one_hot = nn.functional.one_hot(
+        batch["template_aatype"], batch["target_feat"].shape[-1]
+    )
+    n_res = batch["template_aatype"].shape[-1]
+    to_concat.append(
+        aatype_one_hot[..., None, :, :].expand(
+            *aatype_one_hot.shape[:-2], n_res, -1, -1
+        )
+    )
+    to_concat.append(
+        aatype_one_hot[..., None, :].expand(
+            *aatype_one_hot.shape[:-2], -1, n_res, -1
+        )
+    )
+    n, ca, c = [residue_constants.atom_order[a] for a in ['N', 'CA', 'C']]
+    #t_aa_pos = batch["template_all_atom_positions"]
+    #affines = T.make_transform_from_reference(
+    #    n_xyz=t_aa_pos[..., n],
+    #    ca_xyz=t_aa_pos[..., ca],
+    #    c_xyz=t_aa_pos[..., c],
+    #)
+    #rots = affines.rots
+    #trans = affines.trans
+    #affine_vec = rot_mul_vec(
+    #    rots.transpose(-1, -2), 
+    #    trans[..., None, :, :] - trans[..., None, :],
+    #)
+    #inverted_dists = torch.rsqrt(eps + torch.sum(inverted_dists**2, dim=-1))
+    t_aa_masks = batch["template_all_atom_masks"]
+    template_mask = (
+        t_aa_masks[..., n] * t_aa_masks[..., ca] * t_aa_masks[..., c]
+    )
+    template_mask_2d = template_mask[..., None] * template_mask[..., None, :]
+    #inverted_dists *= template_mask_2d
+    #unit_vector = affine_vec * inverted_dists.unsqueeze(-1)
+    #unit_vector = unit_vector.unsqueeze(-2)
+    unit_vector = template_mask_2d.new_zeros(*template_mask_2d.shape, 3)
+    to_concat.append(unit_vector)
+    to_concat.append(template_mask_2d[..., None])
+    act = torch.cat(to_concat, dim=-1)
+    act *= template_mask_2d[..., None]
+    return act
+def build_extra_msa_feat(batch):
+    msa_1hot = nn.functional.one_hot(batch["extra_msa"], 23)
+    msa_feat = [
+        msa_1hot,
+        batch["extra_has_deletion"].unsqueeze(-1),
+        batch["extra_deletion_value"].unsqueeze(-1),
+    ]
+    return torch.cat(msa_feat, dim=-1)
+# adapted from model/tf/data_transforms.py
+def build_msa_feat(protein):
+  """Create and concatenate MSA features."""
+  # Whether there is a domain break. Always zero for chains, but keeping
+  # for compatibility with domain datasets.
+  has_break = batch["between_segment_residues"] 
+  aatype_1hot = nn.functional.one_hot(batch['aatype'], num_classes=21)
+  target_feat = [
+      has_break.unsqueeze(-1),
+      aatype_1hot,  # Everyone gets the original sequence.
+  ]
+  msa_1hot = nn.functional.one_hot(batch['msa'], num_classes=23)
+  has_deletion = batch["deletion_matrix"]
+  deletion_value = torch.atan(batch['deletion_matrix'] / 3.) * (2. / math.pi)
+  msa_feat = [
+      msa_1hot,
+      has_deletion.unsqueeze(-1),
+      deletion_value.unsqueeze(-1),
+  ]
+  if 'cluster_profile' in protein:
+    deletion_mean_value = (
+        tf.atan(batch['cluster_deletion_mean'] / 3.) * (2. / np.pi))
+    msa_feat.extend([
+        batch['cluster_profile'],
+        tf.expand_dims(deletion_mean_value, axis=-1),
+    ])
+  if 'extra_deletion_matrix' in protein:
+    batch['extra_has_deletion'] = tf.clip_by_value(
+        batch['extra_deletion_matrix'], 0., 1.)
+    batch['extra_deletion_value'] = tf.atan(
+        batch['extra_deletion_matrix'] / 3.) * (2. / np.pi)
+  batch['msa_feat'] = torch.cat(msa_feat, dim=-1)
+  batch['target_feat'] = torch.cat(target_feat, dim=-1)
+  return protein
--- a/alphafold/utils/import_weights.py
+++ b/alphafold/utils/import_weights.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from dataclasses import dataclass
+from functools import partial
+import numpy as np
+import torch
+from typing import Union, List
+_NPZ_KEY_PREFIX = "alphafold/alphafold_iteration/"
+# With Param, a poor man's enum with attributes (Rust-style)
+class ParamType(Enum):
+    LinearWeight = partial( # hack: partial prevents fns from becoming methods
+        lambda w: w.transpose(-1, -2)
+    )
+    LinearWeightMHA = partial(
+        lambda w: w.reshape(*w.shape[:-2], -1).transpose(-1, -2) 
+    )
+    LinearMHAOutputWeight = partial(
+        lambda w: w.reshape(*w.shape[:-3], -1, w.shape[-1]).transpose(-1, -2)
+    )
+    LinearBiasMHA = partial(
+        lambda w: w.reshape(*w.shape[:-2], -1)
+    )
+    LinearWeightOPM = partial(
+        lambda w: w.reshape(*w.shape[:-3], -1, w.shape[-1]).transpose(-1, -2)
+    )
+    Other = partial(
+        lambda w: w
+    )
+    def __init__(self, fn):
+        self.transformation = fn
+@dataclass
+class Param:
+    param: Union[torch.Tensor, List[torch.Tensor]]
+    param_type: ParamType = ParamType.Other
+    stacked: bool = False
+def _process_translations_dict(d, top_layer=True):
+    flat = {}
+    for k, v in d.items():
+        if(type(v) == dict):
+            prefix = _NPZ_KEY_PREFIX if top_layer else ''
+            sub_flat = {
+                (prefix + '/'.join([k, k_prime])):v_prime
+                for k_prime, v_prime in 
+                _process_translations_dict(v, top_layer=False).items()
+            }
+            flat.update(sub_flat)
+        else:
+            k = '/' + k if not top_layer else k
+            flat[k] = v
+    return flat
+def stacked(param_dict_list, out=None):
+    """
+        Args:
+            param_dict_list:
+                A list of (nested) Param dicts to stack. The structure of 
+                each dict must be the identical (down to the ParamTypes of
+                "parallel" Params). There must be at least one dict 
+                in the list.
+    """
+    if(out is None):
+        out = {}
+    template = param_dict_list[0]
+    for k, _ in template.items():
+        v = [d[k] for d in param_dict_list]
+        if(type(v[0]) is dict):
+            out[k] = {}
+            stacked(v, out=out[k])
+        elif(type(v[0]) is Param):
+            stacked_param = Param(
+                param=[param.param for param in v],
+                param_type=v[0].param_type,
+                stacked=True
+            )
+            out[k] = stacked_param 
+    return out
+def assign(translation_dict, orig_weights):
+    for k, param in translation_dict.items():
+        with torch.no_grad():
+            weights = torch.as_tensor(orig_weights[k])
+            ref, param_type = param.param, param.param_type
+            if(param.stacked):
+                weights = torch.unbind(weights, 0)
+            else:
+                weights = [weights]
+                ref = [ref]
+            try:
+                weights = list(map(param_type.transformation, weights))
+                for p, w in zip(ref, weights):
+                    p.copy_(w)
+            except:
+                print(k)
+                print(ref[0].shape)
+                print(weights[0].shape)
+                raise  
+def import_jax_weights_(model, npz_path, version="model_1"):
+    data = np.load(npz_path)      
+    #######################
+    # Some templates
+    #######################
+    LinearWeight = lambda l: (
+        Param(l, param_type=ParamType.LinearWeight)
+    )
+    LinearBias = lambda l: (
+        Param(l)
+    )
+    LinearWeightMHA = lambda l: (
+        Param(l, param_type=ParamType.LinearWeightMHA)
+    )
+    LinearBiasMHA = lambda b: (
+        Param(b, param_type=ParamType.LinearBiasMHA)
+    )
+    LinearWeightOPM = lambda l: (
+        Param(l, param_type=ParamType.LinearWeightOPM)
+    )
+    LinearParams = lambda l: {
+        "weights": LinearWeight(l.weight),
+        "bias": LinearBias(l.bias),
+    }
+    LayerNormParams = lambda l: {
+        "scale": Param(l.weight),
+        "offset": Param(l.bias),
+    }
+    AttentionParams = lambda att: {
+        "query_w": LinearWeightMHA(att.linear_q.weight),
+        "key_w": LinearWeightMHA(att.linear_k.weight),
+        "value_w": LinearWeightMHA(att.linear_v.weight),
+        "output_w": Param(
+            att.linear_o.weight, param_type=ParamType.LinearMHAOutputWeight,
+        ),
+        "output_b": LinearBias(att.linear_o.bias),
+    }
+    AttentionGatedParams = lambda att: dict(
+        **AttentionParams(att),
+        **{
+            "gating_w": LinearWeightMHA(att.linear_g.weight),
+            "gating_b": LinearBiasMHA(att.linear_g.bias),
+        },
+    )
+    GlobalAttentionParams = lambda att: dict(
+        AttentionGatedParams(att),
+        key_w=LinearWeight(att.linear_k.weight),
+        value_w=LinearWeight(att.linear_v.weight),
+    )
+    TriAttParams = lambda tri_att: {
+        "query_norm": LayerNormParams(tri_att.layer_norm),
+        "feat_2d_weights": LinearWeight(tri_att.linear.weight),
+        "attention": AttentionGatedParams(tri_att.mha),
+    }
+    TriMulOutParams = lambda tri_mul: {
+        "layer_norm_input": LayerNormParams(tri_mul.layer_norm_in),
+        "left_projection": LinearParams(tri_mul.linear_a_p),
+        "right_projection": LinearParams(tri_mul.linear_b_p),
+        "left_gate": LinearParams(tri_mul.linear_a_g),
+        "right_gate": LinearParams(tri_mul.linear_b_g),
+        "center_layer_norm": LayerNormParams(tri_mul.layer_norm_out),
+        "output_projection": LinearParams(tri_mul.linear_z),
+        "gating_linear": LinearParams(tri_mul.linear_g),
+    }
+    # see commit b88f8da on the Alphafold repo
+    # Alphafold swaps the pseudocode's a and b between the incoming/outcoming
+    # iterations of triangle multiplication, which is confusing and not 
+    # reproduced in our implementation.
+    TriMulInParams = lambda tri_mul: {
+        "layer_norm_input": LayerNormParams(tri_mul.layer_norm_in),
+        "left_projection": LinearParams(tri_mul.linear_b_p),
+        "right_projection": LinearParams(tri_mul.linear_a_p),
+        "left_gate": LinearParams(tri_mul.linear_b_g),
+        "right_gate": LinearParams(tri_mul.linear_a_g),
+        "center_layer_norm": LayerNormParams(tri_mul.layer_norm_out),
+        "output_projection": LinearParams(tri_mul.linear_z),
+        "gating_linear": LinearParams(tri_mul.linear_g),
+    }
+    PairTransitionParams = lambda pt: {
+        "input_layer_norm": LayerNormParams(pt.layer_norm),
+        "transition1": LinearParams(pt.linear_1),
+        "transition2": LinearParams(pt.linear_2),
+    }
+    MSAAttParams = lambda matt: {
+        "query_norm": LayerNormParams(matt.layer_norm_m),
+        "attention": AttentionGatedParams(matt.mha),
+    }
+    MSAGlobalAttParams = lambda matt: {
+        "query_norm": LayerNormParams(matt.layer_norm_m),
+        "attention": GlobalAttentionParams(matt)
+    }
+    MSAAttPairBiasParams = lambda matt: dict(
+        **MSAAttParams(matt),
+        **{
+            "feat_2d_norm": LayerNormParams(matt.layer_norm_z),
+            "feat_2d_weights": LinearWeight(matt.linear_z.weight),
+        },
+    )
+    IPAParams = lambda ipa: {
+        "q_scalar": LinearParams(ipa.linear_q),
+        "kv_scalar": LinearParams(ipa.linear_kv),
+        "q_point_local": LinearParams(ipa.linear_q_points),
+        "kv_point_local": LinearParams(ipa.linear_kv_points),
+        "trainable_point_weights": 
+            Param(param=ipa.head_weights, param_type=ParamType.Other), 
+        "attention_2d": LinearParams(ipa.linear_b),
+        "output_projection": LinearParams(ipa.linear_out),
+    }
+    TemplatePairBlockParams = lambda b: {
+        "triangle_attention_starting_node": TriAttParams(b.tri_att_start),
+        "triangle_attention_ending_node": TriAttParams(b.tri_att_end),
+        "triangle_multiplication_outgoing": TriMulOutParams(b.tri_mul_out),
+        "triangle_multiplication_incoming": TriMulInParams(b.tri_mul_in),
+        "pair_transition": PairTransitionParams(b.pair_transition),
+    }
+    MSATransitionParams = lambda m: {
+        "input_layer_norm": LayerNormParams(m.layer_norm),
+        "transition1": LinearParams(m.linear_1),
+        "transition2": LinearParams(m.linear_2),
+    }
+    OuterProductMeanParams = lambda o: {
+        "layer_norm_input": LayerNormParams(o.layer_norm),
+        "left_projection": LinearParams(o.linear_1),
+        "right_projection": LinearParams(o.linear_2),
+        "output_w": LinearWeightOPM(o.linear_out.weight),
+        "output_b": LinearBias(o.linear_out.bias),
+    }
+    def EvoformerBlockParams(b, is_extra_msa=False):
+        if(is_extra_msa):
+            col_att_name = "msa_column_global_attention"
+            msa_col_att_params = MSAGlobalAttParams(b.msa_att_col)
+        else:
+            col_att_name = "msa_column_attention"
+            msa_col_att_params = MSAAttParams(b.msa_att_col)
+        d = {
+            "msa_row_attention_with_pair_bias": 
+                MSAAttPairBiasParams(b.msa_att_row),
+            col_att_name: msa_col_att_params,
+            "msa_transition": MSATransitionParams(b.msa_transition),
+            "outer_product_mean": OuterProductMeanParams(b.outer_product_mean),
+            "triangle_multiplication_outgoing": TriMulOutParams(b.tri_mul_out),
+            "triangle_multiplication_incoming": TriMulInParams(b.tri_mul_in),
+            "triangle_attention_starting_node": TriAttParams(b.tri_att_start),
+            "triangle_attention_ending_node": TriAttParams(b.tri_att_end),
+            "pair_transition": PairTransitionParams(b.pair_transition),
+        }
+        return d
+    ExtraMSABlockParams = partial(EvoformerBlockParams, is_extra_msa=True)
+    FoldIterationParams = lambda sm: {
+        "invariant_point_attention": IPAParams(sm.ipa),
+        "attention_layer_norm": LayerNormParams(sm.layer_norm_ipa),
+        "transition": LinearParams(sm.transition.layers[0].linear_1),
+        "transition_1": LinearParams(sm.transition.layers[0].linear_2),
+        "transition_2": LinearParams(sm.transition.layers[0].linear_3),
+        "transition_layer_norm": LayerNormParams(sm.transition.layer_norm),
+        "affine_update": LinearParams(sm.bb_update.linear),
+        "rigid_sidechain": {
+            "input_projection": LinearParams(sm.angle_resnet.linear_in),
+            "input_projection_1": LinearParams(sm.angle_resnet.linear_initial),
+            "resblock1": LinearParams(sm.angle_resnet.layers[0].linear_1),
+            "resblock2": LinearParams(sm.angle_resnet.layers[0].linear_2),
+            "resblock1_1": LinearParams(sm.angle_resnet.layers[1].linear_1),
+            "resblock2_1": LinearParams(sm.angle_resnet.layers[1].linear_2),
+            "unnormalized_angles": LinearParams(sm.angle_resnet.linear_out),
+        }
+    }
+    ############################
+    # translations dict overflow
+    ############################
+    tps_blocks = model.template_pair_stack.blocks
+    tps_blocks_params = stacked(
+        [TemplatePairBlockParams(b) for b in tps_blocks]
+    )
+    ems_blocks = model.extra_msa_stack.stack.blocks
+    ems_blocks_params = stacked(
+        [ExtraMSABlockParams(b) for b in ems_blocks]
+    )
+    evo_blocks = model.evoformer.blocks
+    evo_blocks_params = stacked(
+        [EvoformerBlockParams(b) for b in evo_blocks]
+    )
+    translations = {
+        "evoformer": {
+            "preprocess_1d": LinearParams(model.input_embedder.linear_tf_m),
+            "preprocess_msa": LinearParams(model.input_embedder.linear_msa_m),
+            "left_single": LinearParams(model.input_embedder.linear_tf_z_i),
+            "right_single": LinearParams(model.input_embedder.linear_tf_z_j),
+            "prev_pos_linear": LinearParams(model.recycling_embedder.linear),
+            "prev_msa_first_row_norm": 
+                LayerNormParams(model.recycling_embedder.layer_norm_m),
+            "prev_pair_norm":
+                LayerNormParams(model.recycling_embedder.layer_norm_z),
+            "pair_activiations": 
+                LinearParams(model.input_embedder.linear_relpos),
+            "template_embedding": {
+                "single_template_embedding": {
+                    "embedding2d":
+                        LinearParams(model.template_pair_embedder.linear),
+                    "template_pair_stack": {
+                        "__layer_stack_no_state": tps_blocks_params,
+                    },
+                    "output_layer_norm": 
+                        LayerNormParams(model.template_pair_stack.layer_norm),
+                },
+                "attention": AttentionParams(model.template_pointwise_att.mha),
+            },
+            "extra_msa_activations": 
+                LinearParams(model.extra_msa_embedder.linear),
+            "extra_msa_stack": ems_blocks_params,
+            "template_single_embedding": 
+                LinearParams(model.template_angle_embedder.linear_1),
+            "template_projection": 
+                LinearParams(model.template_angle_embedder.linear_2),
+            "evoformer_iteration": evo_blocks_params,
+            "single_activations": LinearParams(model.evoformer.linear),
+        },
+        "structure_module": {
+            "single_layer_norm": 
+                LayerNormParams(model.structure_module.layer_norm_s),
+            "initial_projection":
+                LinearParams(model.structure_module.linear_in),
+            "pair_layer_norm":
+                LayerNormParams(model.structure_module.layer_norm_z),
+            "fold_iteration": FoldIterationParams(model.structure_module),
+        },
+        "predicted_lddt_head": {
+            "input_layer_norm":
+                LayerNormParams(model.aux_heads.plddt.layer_norm),
+            "act_0":
+                LinearParams(model.aux_heads.plddt.linear_1),
+            "act_1":
+                LinearParams(model.aux_heads.plddt.linear_2),
+            "logits":
+                LinearParams(model.aux_heads.plddt.linear_3),
+        },
+        "distogram_head": {
+            "half_logits":
+                LinearParams(model.aux_heads.distogram.linear),
+        },
+        "experimentally_resolved_head": {
+            "logits":
+                LinearParams(model.aux_heads.experimentally_resolved.linear),
+        },
+        "masked_msa_head": {
+            "logits":
+                LinearParams(model.aux_heads.masked_msa.linear),
+        },
+    }
+    if(version not in ["model_1", "model_2"]):
+        evo_dict = translations["evoformer"]
+        keys = list(evo_dict.keys())
+        for k in keys:
+            if("template_" in k):
+                evo_dict.pop(k)
+    if("_ptm" in version):
+        translations["predicted_aligned_error_head"] = {
+            "logits":
+                LinearParams(model.aux_heads.tm_score.linear)
+        }
+    # Flatten keys and insert missing key prefixes
+    flat = _process_translations_dict(translations)
+    # Sanity check
+    keys = list(data.keys())
+    flat_keys = list(flat.keys())
+    incorrect = [k for k in flat_keys if k not in keys]
+    missing = [k for k in keys if k not in flat_keys]
+    #print(f"Incorrect: {incorrect}")
+    #print(f"Missing: {missing}")
+    assert(len(incorrect) == 0)
+    # assert(sorted(list(flat.keys())) == sorted(list(data.keys())))
+    # Set weights
+    assign(flat, data)
--- a/alphafold/utils/loss.py
+++ b/alphafold/utils/loss.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ml_collections
+import numpy as np
+import torch
+import torch.nn as nn
+from typing import Dict, Optional
+from alphafold.np import residue_constants
+from alphafold.model.primitives import Linear
+from alphafold.utils.affine_utils import T
+from alphafold.utils.tensor_utils import (
+    tree_map, 
+    tensor_tree_map, 
+    masked_mean,
+)
+def softmax_cross_entropy(logits, labels):
+    loss = -1 * torch.sum(
+        labels * torch.nn.functional.log_softmax(logits),
+        dim=-1,
+    )
+    return loss
+def torsion_angle_loss(
+    a,           # [*, N, 7, 2]
+    a_gt,        # [*, N, 7, 2]
+    a_alt_gt,    # [*, N, 7, 2]
+):
+    # [*, N, 7]
+    norm = torch.norm(a, dim=-1)
+    # [*, N, 7, 2]
+    a = a / norm.unsqueeze(-1)
+    # [*, N, 7]
+    diff_norm_gt = torch.norm(a - a_gt, dim=-1)
+    diff_norm_alt_gt = torch.norm(a - a_alt_gt, dim=-1)
+    min_diff = torch.minimum(diff_norm_gt ** 2, diff_norm_alt_gt ** 2)
+    # [*]
+    l_torsion = torch.mean(min_diff, dim=(-1, -2))
+    l_angle_norm = torch.mean(torch.abs(norm - 1), dim=(-1, -2))
+    an_weight = 0.02
+    return l_torsion + an_weight * l_angle_norm
+def compute_fape(
+    pred_frames: T,
+    target_frames: T,
+    frames_mask: torch.Tensor,
+    pred_positions: torch.Tensor,
+    target_positions: torch.Tensor,
+    positions_mask: torch.Tensor,
+    length_scale: float,
+    l1_clamp_distance: Optional[float] = None,
+    eps=1e-4
+) -> torch.Tensor:
+    # [*, N_frames, N_pts, 3]
+    local_pred_pos = pred_frames.invert()[..., None].apply(
+        pred_positions[..., None, :, :],
+    )
+    local_target_pos = target_frames.invert()[..., None].apply(
+        target_positions[..., None, :, :],
+    )
+    error_dist = torch.sqrt(
+        (pred_positions - target_positions)**2 + eps
+    )
+    if(l1_clamp_distance is not None):
+        error_dist = torch.clamp(error_dist, min=0, max=l1_clamp_distance)
+    normed_error = error_dist / length_scale
+    normed_error *= frames_mask.unsqueeze(-1)
+    normed_error *= positions_mask.unsqueeze(-2)
+    norm_factor = (
+        torch.sum(frames_mask, dim=-1) *
+        torch.sum(positions_mask, dim=-1)
+    )
+    normed_error = torch.sum(normed_error, dim=(-1, -2)) / (eps + norm_factor)
+    return normed_error
+def backbone_loss(
+    batch: Dict[str, torch.Tensor],
+    pred_aff: T,
+    clamp_distance: float = 10.,
+    loss_unit_distance: float = 10.,
+) -> torch.Tensor:
+    gt_aff = T.from_tensor(batch['backbone_affine_tensor'])
+    backbone_mask = batch['backbone_affine_mask']
+    fape_loss = compute_fape(
+        pred_aff,
+        gt_aff,
+        backbone_mask,
+        pred_aff.get_trans(),
+        gt_aff.get_trans(),
+        backbone_mask,
+        l1_clamp_distance=clamp_distance,
+        length_scale=loss_unit_distance,
+    )
+    if('use_clamped_fape' in batch):
+        use_clamped_fape = batch["use_clamped_fape"]
+        unclamped_fape_loss = compute_fape(
+            pred_aff,
+            gt_aff,
+            backbone_mask,
+            pred_aff.get_trans(),
+            gt_aff.get_trans(),
+            backbone_mask,
+            l1_clamp_distance=None,
+            length_scale=loss_unit_distance,
+        )
+        fape_loss = (
+            fape_loss * use_clamped_fape +
+            fape_loss_unclamped * (1 - use_clamped_fape)
+        )
+    return torch.mean(fape_loss, dim=backbone_mask.shape[:-1])
+def sidechain_loss(
+    sidechain_frames,
+    sidechain_atom_pos,
+    gt_frames,
+    alt_gt_frames,
+    gt_exists,
+    renamed_atom14_gt_positions,
+    renamed_atom14_gt_exists,
+    alt_naming_is_better,
+    clamp_distance=10.,
+    length_scale=10.,
+):
+    renamed_gt_frames = (
+        (1. - alt_naming_is_better[..., None, None, None, None]) *
+        gt_frames +
+        alt_naming_is_better[..., None, None, None, None] *
+        alt_gt_frames
+    )
+    renamed_gt_frames = T.from_4x4(renamed_gt_frames) 
+    fape = compute_fape(
+        sidechain_frames,
+        renamed_gt_frames,
+        gt_exists,
+        sidechain_atom_pos,
+        renamed_atom14_gt_positions,
+        renamed_atom14_gt_exists,
+        l1_clamp_distance=clamp_distance,
+        length_scale=length_scale,
+    )
+    return fape
+def compute_plddt(logits: torch.Tensor) -> torch.Tensor:
+    num_bins = logits.shape[-1]
+    bin_width = 1. / num_bins
+    bounds = torch.arange(
+        start=0.5 * bin_width, end=1.0, step=bin_width, device=logits.device
+    )
+    probs = torch.nn.functional.softmax(logits, dim=-1)
+    pred_lddt_ca = torch.sum(
+        probs * 
+        bounds.view(*((1,) * len(probs.shape[:-1])), *bounds.shape),
+        dim=-1,
+    )
+    return pred_lddt_ca * 100
+def lddt_loss(
+    batch: Dict[str, torch.Tensor],
+    cutoff: float = 15.,
+    num_bins: int = 50,
+    min_resolution: float = 0.1,
+    max_resolution: float = 3.0,
+    eps: float = 1e-10,
+) -> torch.Tensor:
+    all_atom_pred_pos = batch["sm"]["pred_pos"][-1]
+    all_atom_true_pos = batch["all_atom_positions"]
+    all_atom_mask = batch["all_atom_mask"]
+    logits = batch["predicted_lddt_logits"]
+    n = all_atom_mask.shape[-1]
+    ca_pos = residue_constants.atom_order['CA']
+    all_atom_pred_pos = all_atom_pred_pos[..., :, ca_pos, :]
+    all_atom_true_pos = all_atom_true_pos[..., :, ca_pos, :]
+    all_atom_mask = all_atom_mask[..., :, ca_pos:(ca_pos + 1)] # keep dim
+    dmat_true = torch.sqrt(
+        eps +
+        torch.sum(
+            (
+                all_atom_true_pos[..., None] - 
+                all_atom_true_pos[..., None, :]
+            )**2,
+            dim=-1,
+        )
+    )
+    dmat_pred = torch.sqrt(
+        eps +
+        torch.sum(
+            (
+                all_atom_pred_pos[..., None] - 
+                all_atom_pred_pos[..., None, :]
+            )**2,
+            dim=-1,
+        )
+    )
+    dists_to_score = (
+        (dmat_true < cutoff) * all_atom_mask *
+        permute_final_dims(all_atom_mask, 1, 0) *
+        (1. - torch.eye(n, device=all_atom_mask.device))
+    )
+    dist_l1 = torch.abs(dmat_true - dmat_pred)
+    score = (
+        (dist_l1 < 0.5) + 
+        (dist_l1 < 1.0) +
+        (dist_l1 < 2.0) +
+        (dist_l1 < 4.0)
+    )
+    score *= 0.25
+    norm = 1. / (eps + torch.sum(dists_to_score, dim=-1))
+    score = norm * (eps + torch.sum(dists_to_score * score, dim=-1))
+    # TODO: this feels a bit weird, but it's in the source
+    score = score.detach() 
+    bin_index = torch.floor(lddt_ca * num_bins)
+    lddt_ca_one_hot = torch.nn.functional.one_hot(
+        bin_index, num_classes=num_bins
+    )
+    errors = softmax_cross_entropy(logits, lddt_ca_one_hot)
+    loss = torch.sum(errors * all_atom_mask) / (torch.sum(mask_ca) + eps)
+    loss *= (
+        (batch["resolution"] >= min_resolution) &
+        (batch["resolution"] <= max_resolution)
+    )
+    return loss
+def distogram_loss(
+    pred_distr, 
+    gt, 
+    mask, 
+    min_bin=2.3125, max_bin=21.6875, no_bins=64, eps=1e-6
+):
+    boundaries = torch.linspace(
+        min_bin, max_bin, no_bins - 1, device=pred_distr.device,
+    )
+    boundaries = boundaries ** 2
+    dists = torch.sum(
+        (gt[..., None, :] - gt[..., None, :, :]) ** 2, dim=-1, keepdims=True
+    )
+    true_bins = torch.sum(dists > sq_breaks, dim=-1)
+    errors = softmax_cross_entropy(
+        pred_distr,
+        torch.nn.functional.one_hot(true_bins, num_bins),
+    )
+    square_mask = mask[..., None] * mask[..., None, :]
+    mean = (
+        torch.sum(errors * square_mask, dim=(-1, -2)) /
+        (eps + torch.sum(square_mask, dim=(-1, -2)))
+    )
+    return mean
+def tm_score(
+    logits,
+    t_pred, 
+    t_gt, 
+    mask, 
+    resolution,
+    max_bin=31, 
+    no_bins=64, 
+    min_resolution: float = 0.1,
+    max_resolution: float = 3.0,
+    eps=1e-8
+):
+    boundaries = torch.linspace(
+        min=0, 
+        max=max_bin, 
+        steps=(no_bins - 1), 
+        device=logits.device
+    )
+    boundaries = boundaries ** 2
+    def _points(affine):
+        pts = affine.trans.unsqueeze(-3)
+        return affine.invert().apply(pts, addl_dims=1)
+    sq_diff = torch.sum((_points(t_pred) - _points(t_gt)) ** 2, dim=-1)
+    sq_diff = sq_diff.detach()
+    true_bins = torch.sum(
+        sq_diff[..., None] > boundaries
+    ).float()
+    errors = softmax_cross_entropy(
+        logits,
+        torch.nn.functional.one_hot(true_bins, no_bins)
+    )
+    square_mask = mask[..., None] * mask[..., None, :]
+    loss = (
+        torch.sum(loss, dim=(-1, -2)) /
+        (eps + torch.sum(square_mask, dim=(-1, -2)))
+    )
+    loss *= (
+        (resolution >= min_resolution) &
+        (resolution <= max_resolution)
+    )
+    return loss
+def between_residue_bond_loss(
+    pred_atom_positions: torch.Tensor,  # (N, 37(14), 3)
+    pred_atom_mask: torch.Tensor,  # (N, 37(14))
+    residue_index: torch.Tensor,  # (N)
+    aatype: torch.Tensor,  # (N)
+    tolerance_factor_soft=12.0,
+    tolerance_factor_hard=12.0,
+    eps=1e-6,
+) -> Dict[str, torch.Tensor]:
+    """Flat-bottom loss to penalize structural violations between residues.
+    This is a loss penalizing any violation of the geometry around the peptide
+    bond between consecutive amino acids. This loss corresponds to
+    Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 44, 45.
+    Args:
+      pred_atom_positions: Atom positions in atom37/14 representation
+      pred_atom_mask: Atom mask in atom37/14 representation
+      residue_index: Residue index for given amino acid, this is assumed to be
+        monotonically increasing.
+      aatype: Amino acid type of given residue
+      tolerance_factor_soft: soft tolerance factor measured in standard deviations
+        of pdb distributions
+      tolerance_factor_hard: hard tolerance factor measured in standard deviations
+        of pdb distributions
+    Returns:
+      Dict containing:
+        * 'c_n_loss_mean': Loss for peptide bond length violations
+        * 'ca_c_n_loss_mean': Loss for violations of bond angle around C spanned
+            by CA, C, N
+        * 'c_n_ca_loss_mean': Loss for violations of bond angle around N spanned
+            by C, N, CA
+        * 'per_residue_loss_sum': sum of all losses for each residue
+        * 'per_residue_violation_mask': mask denoting all residues with violation
+            present.
+    """
+    # Get the positions of the relevant backbone atoms.
+    this_ca_pos = pred_atom_positions[..., :-1, 1, :]
+    this_ca_mask = pred_atom_mask[..., :-1, 1]
+    this_c_pos = pred_atom_positions[..., :-1, 2, :]
+    this_c_mask = pred_atom_mask[..., :-1, 2]
+    next_n_pos = pred_atom_positions[..., 1:, 0, :]
+    next_n_mask = pred_atom_mask[..., 1:, 0]
+    next_ca_pos = pred_atom_positions[..., 1:, 1, :]
+    next_ca_mask = pred_atom_mask[..., 1:, 1]
+    has_no_gap_mask = (
+        (residue_index[..., 1:] - residue_index[..., :-1]) == 1.0
+    )
+    # Compute loss for the C--N bond.
+    c_n_bond_length = torch.sqrt(
+        eps + 
+        torch.sum(
+            (this_c_pos - next_n_pos)**2, dim=-1
+        )
+    )
+    # The C-N bond to proline has slightly different length because of the ring.
+    next_is_proline = (
+        aatype[..., 1:] == residue_constants.resname_to_idx['PRO']
+    )
+    gt_length = (
+        (~next_is_proline) * residue_constants.between_res_bond_length_c_n[0]
+        + next_is_proline * residue_constants.between_res_bond_length_c_n[1]
+    )
+    gt_stddev = (
+        (~next_is_proline) *
+        residue_constants.between_res_bond_length_stddev_c_n[0] +
+        next_is_proline * 
+        residue_constants.between_res_bond_length_stddev_c_n[1]
+    )
+    c_n_bond_length_error = torch.sqrt(
+        eps + (c_n_bond_length - gt_length)**2
+    )
+    c_n_loss_per_residue = torch.nn.functional.relu(
+        c_n_bond_length_error - tolerance_factor_soft * gt_stddev
+    )
+    mask = this_c_mask * next_n_mask * has_no_gap_mask
+    c_n_loss = torch.sum(mask * c_n_loss_per_residue) / (torch.sum(mask) + eps)
+    c_n_violation_mask = mask * (
+        c_n_bond_length_error > (tolerance_factor_hard * gt_stddev)
+    )
+    # Compute loss for the angles.
+    ca_c_bond_length = torch.sqrt(
+        eps + torch.sum((this_ca_pos - this_c_pos)**2, dim=-1)
+    )
+    n_ca_bond_length = torch.sqrt(
+        eps + torch.sum((next_n_pos - next_ca_pos)**2, dim=-1)
+    )
+    c_ca_unit_vec = (this_ca_pos - this_c_pos) / ca_c_bond_length[..., None]
+    c_n_unit_vec = (next_n_pos - this_c_pos) / c_n_bond_length[..., None]
+    n_ca_unit_vec = (next_ca_pos - next_n_pos) / n_ca_bond_length[..., None]
+    ca_c_n_cos_angle = torch.sum(c_ca_unit_vec * c_n_unit_vec, dim=-1)
+    gt_angle = residue_constants.between_res_cos_angles_ca_c_n[0]
+    gt_stddev = residue_constants.between_res_bond_length_stddev_c_n[0]
+    ca_c_n_cos_angle_error = torch.sqrt(
+        eps + (ca_c_n_cos_angle - gt_angle)**2
+    )
+    ca_c_n_loss_per_residue = torch.nn.functional.relu(
+        ca_c_n_cos_angle_error - tolerance_factor_soft * gt_stddev
+    )
+    mask = this_ca_mask * this_c_mask * next_n_mask * has_no_gap_mask
+    ca_c_n_loss = (
+        torch.sum(mask * ca_c_n_loss_per_residue) / (torch.sum(mask) + eps)
+    )
+    ca_c_n_violation_mask = mask * (ca_c_n_cos_angle_error >
+                                    (tolerance_factor_hard * gt_stddev))
+    c_n_ca_cos_angle = torch.sum((-c_n_unit_vec) * n_ca_unit_vec, dim=-1)
+    gt_angle = residue_constants.between_res_cos_angles_c_n_ca[0]
+    gt_stddev = residue_constants.between_res_cos_angles_c_n_ca[1]
+    c_n_ca_cos_angle_error = torch.sqrt(
+        eps + torch.square(c_n_ca_cos_angle - gt_angle))
+    c_n_ca_loss_per_residue = torch.nn.functional.relu(
+        c_n_ca_cos_angle_error - tolerance_factor_soft * gt_stddev
+    )
+    mask = this_c_mask * next_n_mask * next_ca_mask * has_no_gap_mask
+    c_n_ca_loss = (
+        torch.sum(mask * c_n_ca_loss_per_residue) / (torch.sum(mask) + eps)
+    )
+    c_n_ca_violation_mask = mask * (
+        c_n_ca_cos_angle_error > (tolerance_factor_hard * gt_stddev)
+    )
+    # Compute a per residue loss (equally distribute the loss to both
+    # neighbouring residues).
+    per_residue_loss_sum = (c_n_loss_per_residue +
+                            ca_c_n_loss_per_residue +
+                            c_n_ca_loss_per_residue)
+    per_residue_loss_sum = 0.5 * (
+        torch.nn.functional.pad(per_residue_loss_sum, (0, 1)) +
+        torch.nn.functional.pad(per_residue_loss_sum, (1, 0))
+    )
+    # Compute hard violations.
+    violation_mask = torch.max(
+        torch.stack(
+            [
+                c_n_violation_mask,
+                ca_c_n_violation_mask,
+                c_n_ca_violation_mask
+            ]
+        ), 
+        dim=-2
+    )[0]
+    violation_mask = torch.maximum(
+        torch.nn.functional.pad(violation_mask, (0, 1)),
+        torch.nn.functional.pad(violation_mask, (1, 0))
+    )
+    return {
+        'c_n_loss_mean': c_n_loss,
+        'ca_c_n_loss_mean': ca_c_n_loss,
+        'c_n_ca_loss_mean': c_n_ca_loss,
+        'per_residue_loss_sum': per_residue_loss_sum,
+        'per_residue_violation_mask': violation_mask
+    }
+def between_residue_clash_loss(
+    atom14_pred_positions: torch.Tensor,
+    atom14_atom_exists: torch.Tensor,
+    atom14_atom_radius: torch.Tensor,
+    residue_index: torch.Tensor,
+    overlap_tolerance_soft=1.5,
+    overlap_tolerance_hard=1.5,
+    eps=1e-10,
+) -> Dict[str, torch.Tensor]:
+    """Loss to penalize steric clashes between residues.
+    This is a loss penalizing any steric clashes due to non bonded atoms in
+    different peptides coming too close. This loss corresponds to the part with
+    different residues of
+    Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46.
+    Args:
+      atom14_pred_positions: Predicted positions of atoms in
+        global prediction frame
+      atom14_atom_exists: Mask denoting whether atom at positions exists for given
+        amino acid type
+      atom14_atom_radius: Van der Waals radius for each atom.
+      residue_index: Residue index for given amino acid.
+      overlap_tolerance_soft: Soft tolerance factor.
+      overlap_tolerance_hard: Hard tolerance factor.
+    Returns:
+      Dict containing:
+        * 'mean_loss': average clash loss
+        * 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14)
+        * 'per_atom_clash_mask': mask whether atom clashes with any other atom
+            shape (N, 14)
+    """
+    fp_type = atom14_pred_positions.dtype
+    # Create the distance matrix.
+    # (N, N, 14, 14)
+    dists = torch.sqrt(
+        eps + 
+        torch.sum(
+            (
+                atom14_pred_positions[..., :, None, :, None, :] -
+                atom14_pred_positions[..., None, :, None, :, :]
+            )**2,
+            dim=-1)
+    )
+    # Create the mask for valid distances.
+    # shape (N, N, 14, 14)
+    dists_mask = (
+        atom14_atom_exists[..., :, None, :, None] *
+        atom14_atom_exists[..., None, :, None, :]
+    ).type(fp_type)
+    # Mask out all the duplicate entries in the lower triangular matrix.
+    # Also mask out the diagonal (atom-pairs from the same residue) -- these atoms
+    # are handled separately.
+    dists_mask *= (
+        residue_index[..., :, None, None, None] < residue_index[..., None, :, None, None]
+    )
+    # Backbone C--N bond between subsequent residues is no clash.
+    c_one_hot = torch.nn.functional.one_hot(
+        residue_index.new_tensor(2), num_classes=14
+    )
+    c_one_hot = c_one_hot.reshape(
+        *((1,) * len(residue_index.shape[:-1])), *c_one_hot.shape
+    )
+    c_one_hot = c_one_hot.type(fp_type)
+    n_one_hot = torch.nn.functional.one_hot(
+        residue_index.new_tensor(0), num_classes=14
+    )
+    n_one_hot = n_one_hot.reshape(
+        *((1,) * len(residue_index.shape[:-1])), *n_one_hot.shape
+    )
+    n_one_hot = n_one_hot.type(fp_type)
+    neighbour_mask = (
+        (residue_index[..., :, None, None, None] + 1) == 
+        residue_index[..., None, :, None, None]
+    )
+    c_n_bonds = (
+        neighbour_mask * 
+        c_one_hot[..., None, None, :, None] * 
+        n_one_hot[..., None, None, None, :]
+    )
+    dists_mask *= (1. - c_n_bonds)
+    # Disulfide bridge between two cysteines is no clash.
+    cys = residue_constants.restype_name_to_atom14_names['CYS']
+    cys_sg_idx = cys.index('SG')
+    cys_sg_idx = residue_index.new_tensor(cys_sg_idx)
+    cys_sg_idx = cys_sg_idx.reshape(
+        *((1,) * len(residue_index.shape[:-1])), 1 
+    ).squeeze(-1)
+    cys_sg_one_hot = torch.nn.functional.one_hot(
+        cys_sg_idx, num_classes=14
+    )
+    disulfide_bonds = (
+        cys_sg_one_hot[..., None, None, :, None] *
+        cys_sg_one_hot[..., None, None, None, :])
+    dists_mask *= (1. - disulfide_bonds)
+    # Compute the lower bound for the allowed distances.
+    # shape (N, N, 14, 14)
+    dists_lower_bound = dists_mask * (
+        atom14_atom_radius[..., :, None, :, None] +
+        atom14_atom_radius[..., None, :, None, :]
+    )
+    # Compute the error.
+    # shape (N, N, 14, 14)
+    dists_to_low_error = dists_mask * torch.nn.functional.relu(
+        dists_lower_bound - overlap_tolerance_soft - dists
+    )
+    # Compute the mean loss.
+    # shape ()
+    mean_loss = (
+        torch.sum(dists_to_low_error) / (1e-6 + torch.sum(dists_mask))
+    )
+    # Compute the per atom loss sum.
+    # shape (N, 14)
+    per_atom_loss_sum = (
+        torch.sum(dists_to_low_error, dim=(-4, -2)) +
+        torch.sum(dists_to_low_error, axis=(-3, -1))
+    )
+    # Compute the hard clash mask.
+    # shape (N, N, 14, 14)
+    clash_mask = dists_mask * (
+        dists < (dists_lower_bound - overlap_tolerance_hard)
+    )
+    # Compute the per atom clash.
+    # shape (N, 14)
+    per_atom_clash_mask = torch.maximum(
+        torch.amax(clash_mask, axis=(-4, -2)),
+        torch.amax(clash_mask, axis=(-3, -1)),
+    )
+    return {
+        'mean_loss': mean_loss,  # shape ()
+        'per_atom_loss_sum': per_atom_loss_sum,  # shape (N, 14)
+        'per_atom_clash_mask': per_atom_clash_mask  # shape (N, 14)
+    }
+def within_residue_violations(
+    atom14_pred_positions: torch.Tensor,
+    atom14_atom_exists: torch.Tensor,
+    atom14_dists_lower_bound: torch.Tensor,
+    atom14_dists_upper_bound: torch.Tensor,
+    tighten_bounds_for_loss=0.0,
+    eps=1e-10,
+) -> Dict[str, torch.Tensor]:
+    """Loss to penalize steric clashes within residues.
+    This is a loss penalizing any steric violations or clashes of non-bonded atoms
+    in a given peptide. This loss corresponds to the part with
+    the same residues of
+    Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46.
+    Args:
+        atom14_pred_positions ([*, N, 14, 3]): 
+            Predicted positions of atoms in global prediction frame.
+        atom14_atom_exists ([*, N, 14]): 
+            Mask denoting whether atom at positions exists for given
+            amino acid type
+        atom14_dists_lower_bound ([*, N, 14]): 
+            Lower bound on allowed distances.
+        atom14_dists_upper_bound ([*, N, 14]): 
+            Upper bound on allowed distances
+        tighten_bounds_for_loss ([*, N]): 
+            Extra factor to tighten loss
+    Returns:
+      Dict containing:
+        * 'per_atom_loss_sum' ([*, N, 14]): 
+              sum of all clash losses per atom, shape
+        * 'per_atom_clash_mask' ([*, N, 14]): 
+              mask whether atom clashes with any other atom shape 
+    """  
+    # Compute the mask for each residue.
+    dists_masks = (
+        1. - torch.eye(14, device=atom14_atom_exists.device)[None]
+    )
+    dists_masks = dists_masks.reshape(
+        *((1,) * len(atom14_atom_exists.shape[:-2])), *dists_masks.shape
+    )
+    dists_masks = (
+        atom14_atom_exists[..., :, :, None] *
+        atom14_atom_exists[..., :, None, :] *
+        dists_masks
+    )
+    # Distance matrix
+    dists = torch.sqrt(
+        eps + 
+        torch.sum(
+            (
+                atom14_pred_positions[..., :, :, None, :] -
+                atom14_pred_positions[..., :, None, :, :]
+            )**2,
+            dim=-1
+        )
+    )
+    # Compute the loss.
+    dists_to_low_error = torch.nn.functional.relu(
+        atom14_dists_lower_bound + tighten_bounds_for_loss - dists
+    )
+    dists_to_high_error = torch.nn.functional.relu(
+        dists - (atom14_dists_upper_bound - tighten_bounds_for_loss)
+    )
+    loss = dists_masks * (dists_to_low_error + dists_to_high_error)
+    # Compute the per atom loss sum.
+    per_atom_loss_sum = (
+        torch.sum(loss, dim=-2) +
+        torch.sum(loss, dim=-1)
+    )
+    # Compute the violations mask.
+    violations = (
+        dists_masks * 
+        (
+            (dists < atom14_dists_lower_bound) |
+            (dists > atom14_dists_upper_bound)
+        )
+    )
+    # Compute the per atom violations.
+    per_atom_violations = torch.maximum(
+        torch.max(violations, dim=-2)[0], torch.max(violations, axis=-1)[0]
+    )
+    return {
+        'per_atom_loss_sum': per_atom_loss_sum,
+        'per_atom_violations': per_atom_violations
+    }
+def find_structural_violations(
+    batch: Dict[str, torch.Tensor],
+    atom14_pred_positions: torch.Tensor,
+    config: ml_collections.ConfigDict
+) -> Dict[str, torch.Tensor]:
+    """Computes several checks for structural violations."""
+    # Compute between residue backbone violations of bonds and angles.
+    connection_violations = between_residue_bond_loss(
+        pred_atom_positions=atom14_pred_positions,
+        pred_atom_mask=batch['atom14_atom_exists'],
+        residue_index=batch['residue_index'],
+        aatype=batch['aatype'],
+        tolerance_factor_soft=config.violation_tolerance_factor,
+        tolerance_factor_hard=config.violation_tolerance_factor
+    )
+    # Compute the Van der Waals radius for every atom
+    # (the first letter of the atom name is the element type).
+    # Shape: (N, 14).
+    atomtype_radius = [
+        residue_constants.van_der_waals_radius[name[0]]
+        for name in residue_constants.atom_types
+    ]
+    atomtype_radius = atom14_pred_positions.new_tensor(
+        atomtype_radius 
+    )
+    atom14_atom_radius = (
+        batch['atom14_atom_exists'] *
+        atomtype_radius[batch['residx_atom14_to_atom37']]
+    )
+    # Compute the between residue clash loss.
+    between_residue_clashes = between_residue_clash_loss(
+        atom14_pred_positions=atom14_pred_positions,
+        atom14_atom_exists=batch['atom14_atom_exists'],
+        atom14_atom_radius=atom14_atom_radius,
+        residue_index=batch['residue_index'],
+        overlap_tolerance_soft=config.clash_overlap_tolerance,
+        overlap_tolerance_hard=config.clash_overlap_tolerance
+    )
+    # Compute all within-residue violations (clashes,
+    # bond length and angle violations).
+    restype_atom14_bounds = residue_constants.make_atom14_dists_bounds(
+        overlap_tolerance=config.clash_overlap_tolerance,
+        bond_length_tolerance_factor=config.violation_tolerance_factor
+    )
+    atom14_dists_lower_bound = restype_atom14_bounds['lower_bound'][
+        batch['aatype']
+    ]
+    atom14_dists_upper_bound = restype_atom14_bounds['upper_bound'][
+        batch['aatype']
+    ]
+    atom14_dists_lower_bound = atom14_pred_positions.new_tensor(
+        atom14_dists_lower_bound
+    )
+    atom14_dists_upper_bound = atom14_pred_positions.new_tensor(
+        atom14_dists_upper_bound
+    )
+    residue_violations = within_residue_violations(
+        atom14_pred_positions=atom14_pred_positions,
+        atom14_atom_exists=batch['atom14_atom_exists'],
+        atom14_dists_lower_bound=atom14_dists_lower_bound,
+        atom14_dists_upper_bound=atom14_dists_upper_bound,
+        tighten_bounds_for_loss=0.0
+    )
+    # Combine them to a single per-residue violation mask (used later for LDDT).
+    per_residue_violations_mask = torch.max(
+        torch.stack(
+            [
+                connection_violations['per_residue_violation_mask'],
+                torch.max(
+                    between_residue_clashes['per_atom_clash_mask'], dim=-1
+                )[0],
+                torch.max(
+                    residue_violations['per_atom_violations'], dim=-1
+                )[0],
+            ], 
+            dim=-1,
+        ), 
+        dim=-1,
+    )[0]
+    return {
+        'between_residues': {
+            'bonds_c_n_loss_mean':
+                connection_violations['c_n_loss_mean'],  # ()
+            'angles_ca_c_n_loss_mean':
+                connection_violations['ca_c_n_loss_mean'],  # ()
+            'angles_c_n_ca_loss_mean':
+                connection_violations['c_n_ca_loss_mean'],  # ()
+            'connections_per_residue_loss_sum':
+                connection_violations['per_residue_loss_sum'],  # (N)
+            'connections_per_residue_violation_mask':
+                connection_violations['per_residue_violation_mask'],  # (N)
+            'clashes_mean_loss':
+                between_residue_clashes['mean_loss'],  # ()
+            'clashes_per_atom_loss_sum':
+                between_residue_clashes['per_atom_loss_sum'],  # (N, 14)
+            'clashes_per_atom_clash_mask':
+                between_residue_clashes['per_atom_clash_mask'],  # (N, 14)
+        },
+        'within_residues': {
+            'per_atom_loss_sum':
+                residue_violations['per_atom_loss_sum'],  # (N, 14)
+            'per_atom_violations':
+                residue_violations['per_atom_violations'],  # (N, 14),
+        },
+        'total_per_residue_violations_mask':
+            per_residue_violations_mask,  # (N)
+    }
+def find_structural_violations_np(
+    batch: Dict[str, np.ndarray],
+    atom14_pred_positions: np.ndarray,
+    config: ml_collections.ConfigDict
+) -> Dict[str, np.ndarray]:
+    to_tensor = lambda x: torch.tensor(x, requires_grad=False)
+    batch = tree_map(to_tensor, batch, np.ndarray)
+    atom14_pred_positions = to_tensor(atom14_pred_positions)
+    out = find_structural_violations(batch, atom14_pred_positions, config)
+    to_np = lambda x: np.array(x)
+    np_out = tensor_tree_map(to_np, out)
+    return np_out
+def extreme_ca_ca_distance_violations(
+      pred_atom_positions: torch.Tensor,  # (N, 37(14), 3)
+      pred_atom_mask: torch.Tensor,  # (N, 37(14))
+      residue_index: torch.Tensor,  # (N)
+      max_angstrom_tolerance=1.5,
+      eps=1e-6,
+) -> torch.Tensor:
+    """Counts residues whose Ca is a large distance from its neighbour.
+    Measures the fraction of CA-CA pairs between consecutive amino acids that are
+    more than 'max_angstrom_tolerance' apart.
+    Args:
+      pred_atom_positions: Atom positions in atom37/14 representation
+      pred_atom_mask: Atom mask in atom37/14 representation
+      residue_index: Residue index for given amino acid, this is assumed to be
+        monotonically increasing.
+      max_angstrom_tolerance: Maximum distance allowed to not count as violation.
+    Returns:
+      Fraction of consecutive CA-CA pairs with violation.
+    """
+    this_ca_pos = pred_atom_positions[..., :-1, 1, :]
+    this_ca_mask = pred_atom_mask[..., :-1, 1]
+    next_ca_pos = pred_atom_positions[..., 1:, 1, :]
+    next_ca_mask = pred_atom_mask[..., 1:, 1]
+    has_no_gap_mask = ((residue_index[..., 1:] - residue_index[..., :-1]) == 1.0)
+    ca_ca_distance = torch.sqrt(
+        eps + torch.sum((this_ca_pos - next_ca_pos)**2, dim=-1)
+    )
+    violations = (
+        (ca_ca_distance - residue_constants.ca_ca) > max_angstrom_tolerance
+    )
+    mask = this_ca_mask * next_ca_mask * has_no_gap_mask
+    mean = masked_mean(mask, violations, -1)
+    return mean
+def compute_violation_metrics(
+    batch: Dict[str, torch.Tensor],
+    atom14_pred_positions: torch.Tensor,  # (N, 14, 3)
+    violations: Dict[str, torch.Tensor],
+) -> Dict[str, torch.Tensor]:
+    """Compute several metrics to assess the structural violations.""" 
+    ret = {}
+    extreme_ca_ca_violations = extreme_ca_ca_distance_violations(
+        pred_atom_positions=atom14_pred_positions,
+        pred_atom_mask=batch['atom14_atom_exists'],
+        residue_index=batch['residue_index']
+    )
+    ret['violations_extreme_ca_ca_distance'] = extreme_ca_ca_violations
+    ret['violations_between_residue_bond'] = masked_mean(
+        batch['seq_mask'],
+        violations['between_residues'][
+            'connections_per_residue_violation_mask'
+        ],
+        dim=-1,
+    )
+    ret['violations_between_residue_clash'] = masked_mean(
+        mask=batch['seq_mask'],
+        value=torch.max(
+            violations['between_residues']['clashes_per_atom_clash_mask'],
+            dim=-1
+        )[0],
+        dim=-1,
+    )
+    ret['violations_within_residue'] = masked_mean(
+        mask=batch['seq_mask'],
+        value=torch.max(
+            violations['within_residues']['per_atom_violations'], dim=-1
+        )[0],
+        dim=-1,
+    )
+    ret['violations_per_residue'] = masked_mean(
+        mask=batch['seq_mask'],
+        value=violations['total_per_residue_violations_mask'],
+        dim=-1,
+    )
+    return ret
+def compute_violation_metrics_np(
+    batch: Dict[str, np.ndarray],
+    atom14_pred_positions: np.ndarray,
+    violations: Dict[str, np.ndarray],
+) -> Dict[str, np.ndarray]:
+    to_tensor = lambda x: torch.tensor(x, requires_grad=False)
+    batch = tree_map(to_tensor, batch, np.ndarray)
+    atom14_pred_positions = to_tensor(atom14_pred_positions)
+    violations = tree_map(to_tensor, violations, np.ndarray)
+    out = compute_violation_metrics(batch, atom14_pred_positions, violations)
+    to_np = lambda x: np.array(x)
+    return tree_map(to_np, out, torch.Tensor)
+def compute_renamed_ground_truth(
+    batch: Dict[str, torch.Tensor],
+    atom14_pred_positions: torch.Tensor,
+    eps=1e-10,
+) -> Dict[str, torch.Tensor]:
+    """
+    Find optimal renaming of ground truth based on the predicted positions.
+    Alg. 26 "renameSymmetricGroundTruthAtoms"
+    This renamed ground truth is then used for all losses,
+    such that each loss moves the atoms in the same direction.
+    Args:
+      batch: Dictionary containing:
+        * atom14_gt_positions: Ground truth positions.
+        * atom14_alt_gt_positions: Ground truth positions with renaming swaps.
+        * atom14_atom_is_ambiguous: 1.0 for atoms that are affected by
+            renaming swaps.
+        * atom14_gt_exists: Mask for which atoms exist in ground truth.
+        * atom14_alt_gt_exists: Mask for which atoms exist in ground truth
+            after renaming.
+        * atom14_atom_exists: Mask for whether each atom is part of the given
+            amino acid type.
+      atom14_pred_positions: Array of atom positions in global frame with shape
+    Returns:
+      Dictionary containing:
+        alt_naming_is_better: Array with 1.0 where alternative swap is better.
+        renamed_atom14_gt_positions: Array of optimal ground truth positions
+          after renaming swaps are performed.
+        renamed_atom14_gt_exists: Mask after renaming swap is performed.
+    """
+    pred_dists = torch.sqrt(
+        eps + 
+        torch.sum(
+            (
+                atom14_pred_positions[...,    None, :, None, :] -
+                atom14_pred_positions[..., None, :, None, :, :]
+            )**2,
+            dim=-1,
+        )
+    )
+    atom14_gt_positions = batch['atom14_gt_positions']
+    gt_dists = torch.sqrt(
+        eps + 
+        torch.sum(
+            (
+                atom14_gt_positions[...,    None, :, None, :] -
+                atom14_gt_positions[..., None, :, None, :, :]
+            )**2,
+            dim=-1,
+        )
+    )
+    atom14_alt_gt_positions = batch['atom14_alt_gt_positions']
+    alt_gt_dists = torch.sqrt(
+        eps + 
+        torch.sum(
+            (
+                atom14_alt_gt_positions[...,    None, :, None, :] -
+                atom14_alt_gt_positions[..., None, :, None, :, :]
+            )**2,
+            dim=-1,
+        )
+    )
+    lddt = torch.sqrt(eps + (pred_dists - gt_dists)**2)
+    alt_lddt = torch.sqrt(eps + (pred_dists - alt_gt_dists)**2)
+    atom14_gt_exists = batch['atom14_gt_exists']
+    atom14_atom_is_ambiguous = batch['atom14_atom_is_ambiguous']
+    mask = (
+        atom14_gt_exists[..., None, :, None] *
+        atom14_atom_is_ambiguous[..., None, :, None] *
+        atom14_gt_exists[..., None, :, None, :] *
+        (1. - atom14_atom_is_ambiguous[..., None, :, None, :])
+    )
+    per_res_lddt = torch.sum(mask * lddt, dim=(-1, -2, -3))
+    alt_per_res_lddt = torch.sum(mask * alt_lddt, dim=(-1, -2, -3))
+    fp_type = atom14_pred_positions.dtype
+    alt_naming_is_better = (alt_per_res_lddt < per_res_lddt).type(fp_type)
+    renamed_atom14_gt_positions = (
+        (1. - alt_naming_is_better[..., None, None]) *
+        atom14_gt_positions +
+        alt_naming_is_better[..., None, None] *
+        atom14_alt_gt_positions
+    )
+    renamed_atom14_gt_mask = (
+        (1. - alt_naming_is_better[..., None]) * atom14_gt_exists +
+        alt_naming_is_better[..., None] * batch['atom14_alt_gt_exists']
+    )
+    return {
+        'alt_naming_is_better': alt_naming_is_better,
+        'renamed_atom14_gt_positions': renamed_atom14_gt_positions,
+        'renamed_atom14_gt_exists': renamed_atom14_gt_mask,
+    }
+def experimentally_resolved_loss(
+    logits: torch.Tensor,
+    atom37_atom_exists: torch.Tensor,
+    all_atom_mask: torch.Tensor,
+    eps: float = 1e-8,
+) -> torch.Tensor:
+    errors = sigmoid_cross_entropy(logits, all_atom_mask)
+    loss_num = torch.sum(errors * atom37_atom_exists, dim=(-1, -2))
+    loss = loss_num / (eps + torch.sum(atom37_atom_exists, dim=(-1, -2)))
+    return loss
--- a/alphafold/utils/tensor_utils.py
+++ b/alphafold/utils/tensor_utils.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+import torch
+import torch.nn as nn
+def permute_final_dims(tensor, *inds):
+    zero_index = -1 * len(inds)
+    first_inds = range(len(tensor.shape[:zero_index]))
+    return tensor.permute(*first_inds, *[zero_index + i for i in inds])
+def flatten_final_dims(tensor, no_dims):
+    return tensor.reshape(*tensor.shape[:-no_dims], -1)
+def masked_mean(mask, value, dim, eps=1e-10):
+    mask = mask.expand(*value.shape)
+    return torch.sum(mask * value, dim=dim) / (eps + torch.sum(mask, dim=dim))
+def pts_to_distogram(pts, min_bin=2.3125, max_bin=21.6875, no_bins=64):
+    boundaries = torch.linspace(
+        min_bin, max_bin, no_bins - 1, device=pts.device
+    )
+    dists = torch.sqrt(
+        torch.sum((pts.unsqueeze(-2) - pts.unsqueeze(-3)) ** 2, dim=-1)
+    )
+    return torch.bucketize(dists, boundaries)
+def stack_tensor_dicts(dicts):
+    first = dicts[0]
+    new_dict = {}
+    for k, v in first.items():
+        all_v = [d[k] for d in dicts]
+        if(type(v) is dict):
+            new_dict[k] = stack_tensor_dicts(all_v)
+        else:
+            new_dict[k] = torch.stack(all_v)
+    return new_dict
+def one_hot(x, v_bins):
+    reshaped_bins = v_bins.view(*((1,) * len(x.shape) + (len(v_bins),)))
+    diffs = x[..., None] - reshaped_bins
+    am = torch.argmin(torch.abs(diffs), dim=-1)
+    return nn.functional.one_hot(am, num_classes=len(v_bins)).float()
+def batched_gather(data, inds, dim=0, no_batch_dims=0):
+    ranges = []
+    for i, s in enumerate(data.shape[:no_batch_dims]):
+        r = torch.arange(s)
+        r = r.view(*(*((1,) * i), -1, *((1,) * (len(inds.shape) - i - 1))))
+        ranges.append(r)
+    remaining_dims = [
+        slice(None) for _ in range(len(data.shape) - no_batch_dims)
+    ]
+    remaining_dims[dim - no_batch_dims if dim >= 0 else dim] = inds
+    ranges.extend(remaining_dims)
+    return data[ranges]
+# With tree_map, a poor man's JAX tree_map
+def dict_map(fn, dic, leaf_type):
+    new_dict = {}
+    for k, v in dic.items():
+        if(type(v) is dict):
+            new_dict[k] = dict_map(fn, v, leaf_type)
+        else:
+            new_dict[k] = tree_map(fn, v, leaf_type)
+    return new_dict
+def tree_map(fn, tree, leaf_type):
+    tree_type = type(tree)
+    if(tree_type is dict):
+        return dict_map(fn, tree, leaf_type)
+    elif(tree_type is list):
+        return [tree_map(fn, x, leaf_type) for x in tree]
+    elif(tree_type is tuple):
+        return tuple([tree_map(fn, x, leaf_type) for x in tree])
+    elif(tree_type is leaf_type):
+        return fn(tree)
+    else:
+        raise ValueError("Not supported")
+tensor_tree_map = partial(tree_map, leaf_type=torch.Tensor)
+def chunk_layer(layer, inputs, chunk_size, no_batch_dims):
+    """
+        Implements the "chunking" procedure described in section 1.11.8.
+        Layer outputs and inputs are interpreted as simplified "pytrees," 
+        consisting only of (nested) lists, tuples, and dicts with tensor
+        leaves.
+        Args:
+            layer:
+                The layer to be applied chunk-wise
+            inputs:
+                A (nested) dictionary of keyworded inputs. All leaves must be 
+                tensors and must share the same batch dimensions.
+            chunk_size:
+                The number of sub-batches per chunk. If multiple batch
+                dimensions are specified, a "sub-batch" is defined as a single
+                indexing of all batch dimensions simultaneously (s.t. the 
+                number of sub-batches is the product of the batch dimensions).
+            no_batch_dims:
+                How many of the initial dimensions of each input tensor can
+                be considered batch dimensions.
+        Returns:
+            The reassembled output of the layer on the inputs.
+    """
+    if(not (len(inputs) > 0)):
+        raise ValueError("Must provide at least one input")
+    def fetch_dims(tree):
+        shapes = []
+        tree_type = type(tree)
+        if(tree_type is dict):
+            for v in tree.values():
+                shapes.extend(fetch_dims(v))
+        elif(tree_type is list or tree_type is tuple):
+            for t in tree:
+                shapes.extend(fetch_dims(t))
+        elif(tree_type is torch.Tensor):
+            shapes.append(tree.shape)
+        else:
+            raise ValueError("Not supported")
+        return shapes
+    initial_dims = [shape[:no_batch_dims] for shape in fetch_dims(inputs)]
+    orig_batch_dims = [max(s) for s in zip(*initial_dims)]
+    def prep_inputs(t):
+        t = t.expand(*orig_batch_dims, *t.shape[no_batch_dims:])
+        t = t.reshape(-1, *t.shape[no_batch_dims:])
+        return t
+    #shape = lambda t: t.shape
+    #print(tensor_tree_map(shape, inputs))
+    flattened_inputs = tensor_tree_map(prep_inputs, inputs)
+    flat_batch_dim = 1
+    for d in orig_batch_dims:
+        flat_batch_dim *= d
+    no_chunks = (
+        flat_batch_dim // chunk_size + (flat_batch_dim % chunk_size != 0)
+    )
+    i = 0
+    out = None
+    for _ in range(no_chunks):
+        # Chunk the input
+        select_chunk = lambda t: t[i:i+chunk_size]
+        chunks = tensor_tree_map(select_chunk, flattened_inputs)
+        # Run the layer on the chunk
+        output_chunk = layer(**chunks)
+        # Allocate space for the output
+        if(out is None):
+            allocate = lambda t: t.new_zeros(flat_batch_dim, *t.shape[1:])
+            out = tensor_tree_map(allocate, output_chunk)
+        # Put the chunk in its pre-allocated space
+        out_type = type(output_chunk)
+        if(out_type is dict):
+            def assign(d1, d2):
+                for k,v in d1.items():
+                    if(type(v) is dict):
+                        assign(v, d2[k])
+                    else:
+                        v[i:i+chunk_size] = d2[k]
+            assign(out, output_chunk)
+        elif(out_type is tuple):
+            for x1, x2 in zip(out, output_chunk):
+                x1[i:i+chunk_size] = x2
+        elif(out_type is torch.Tensor):
+            out[i:i+chunk_size] = output_chunk
+        else:
+            raise ValueError("Not supported")
+        i += chunk_size
+    reshape = lambda t: t.reshape(*orig_batch_dims, *t.shape[1:])
+    out = tensor_tree_map(reshape, out)
+    return out    
--- a/config.py
+++ b/config.py
+import copy
+import ml_collections as mlc
+def model_config(name):
+    c = copy.deepcopy(config)
+    if(name == "model_3"):
+        c.model.template.enabled = False
+    elif(name == "model_4"):
+        c.model.template.enabled = False
+    elif(name == "model_5"):
+        c.model.template.enabled = False
+    return c
+c_z = mlc.FieldReference(128)
+c_m = mlc.FieldReference(256)
+c_t = mlc.FieldReference(64)
+c_e = mlc.FieldReference(64)
+c_s = mlc.FieldReference(384)
+chunk_size = mlc.FieldReference(4)
+aux_distogram_bins = mlc.FieldReference(64)
+config = mlc.ConfigDict({
+    "model": {
+        "c_z": c_z,
+        "c_m": c_m,
+        "c_t": c_t,
+        "c_e": c_e,
+        "c_s": c_s,
+        "no_cycles": 4,
+        "_mask_trans": False,
+        "input_embedder": {
+            "tf_dim": 22,
+            "msa_dim": 49,
+            "c_z": c_z,
+            "c_m": c_m,
+            "relpos_k": 32,
+        },
+        "recycling_embedder": {
+            "c_z": c_z,
+            "c_m": c_m, 
+            "min_bin": 3.25,
+            "max_bin": 20.75,
+            "no_bins": 15,
+            "inf": 1e8,
+        },
+        "template": {
+            "distogram": {
+                "min_bin": 3.25,
+                "max_bin": 50.75,
+                "no_bins": 39,
+            },
+            "template_angle_embedder": {
+                # DISCREPANCY: c_in is supposed to be 51.
+                "c_in": 57,
+                "c_out": c_m,
+            },
+            "template_pair_embedder": {
+                "c_in": 88,
+                "c_out": c_t,
+            },
+            "template_pair_stack": {
+                "c_t": c_t, 
+                # DISCREPANCY: c_hidden_tri_att here is given in the supplement
+                # as 64. In the code, it's 16.
+                "c_hidden_tri_att": 16, 
+                "c_hidden_tri_mul": 64,
+                "no_blocks": 2, 
+                "no_heads": 4, 
+                "pair_transition_n": 2, 
+                "dropout_rate": 0.25,
+                "blocks_per_ckpt": None,
+                "chunk_size": chunk_size,
+            },
+            "template_pointwise_attention": {
+                "c_t": c_t, 
+                "c_z": c_z, 
+                # DISCREPANCY: c_hidden here is given in the supplement as 64.
+                # It's actually 16.
+                "c_hidden": 16, 
+                "no_heads": 4,
+                "chunk_size": chunk_size,
+            },
+            "eps": 1e-6,
+            "enabled": True,
+            "embed_angles": True,
+        },
+        "extra_msa": {
+            "extra_msa_embedder": {
+                "c_in": 25,
+                "c_out": c_e,
+            },
+            "extra_msa_stack": {
+                "c_m": c_e,
+                "c_z": c_z,
+                "c_hidden_msa_att": 8,
+                "c_hidden_opm": 32,
+                "c_hidden_mul": 128,
+                "c_hidden_pair_att": 32,
+                "no_heads_msa": 8,
+                "no_heads_pair": 4,
+                "no_blocks": 4,
+                "transition_n": 4,
+                "msa_dropout": 0.15,
+                "pair_dropout": 0.25,
+                "blocks_per_ckpt": None,
+                "chunk_size": chunk_size,
+                "inf": 1e9,
+                "eps": 1e-10,
+            },
+            "enabled": True,
+        },
+        "evoformer_stack": {
+            "c_m": c_m,
+            "c_z": c_z,
+            "c_hidden_msa_att": 32,
+            "c_hidden_opm": 32,
+            "c_hidden_mul": 128,
+            "c_hidden_pair_att": 32,
+            "c_s": c_s,
+            "no_heads_msa": 8,
+            "no_heads_pair": 4,
+            "no_blocks": 48,
+            "transition_n": 4,
+            "msa_dropout": 0.15,
+            "pair_dropout": 0.25,
+            "blocks_per_ckpt": None,
+            "chunk_size": chunk_size,
+            "inf": 1e9,
+            "eps": 1e-10,
+        },
+        "structure_module": {
+            "c_s": c_s, 
+            "c_z": c_z,
+            "c_ipa": 16,
+            "c_resnet": 128,
+            "no_heads_ipa": 12,
+            "no_qk_points": 4,
+            "no_v_points": 8,
+            "dropout_rate": 0.1,
+            "no_blocks": 8,
+            "no_transition_layers": 1,
+            "no_resnet_blocks": 2,
+            "no_angles": 7,
+            "trans_scale_factor": 10,
+            "epsilon": 1e-12,
+            "inf": 1e5,
+        },
+        "heads": {
+            "lddt": {
+                "no_bins": 50,
+                "c_in": c_s,
+                "c_hidden": 128,
+            },
+            "distogram": {
+                "c_z": c_z,
+                "no_bins": aux_distogram_bins,
+            },
+            "tm_score": {
+                "c_z": c_z,
+                "no_bins": aux_distogram_bins,
+                "enabled": False,
+            },
+            "masked_msa": {
+                "c_m": c_m,
+                "c_out": 23,
+            },
+            "experimentally_resolved": {
+                "c_s": c_s,
+                "c_out": 37,
+            },
+        },
+    },
+    "relax": {
+        "max_iterations": 0, # no max
+        "tolerance": 2.39,
+        "stiffness": 10.0,
+        "max_outer_iterations": 20,
+        "exclude_residues": [],
+    },
+})
--- a/lib/openmm.patch
+++ b/lib/openmm.patch
+Index: simtk/openmm/app/topology.py
+===================================================================
+--- simtk.orig/openmm/app/topology.py
+++ simtk/openmm/app/topology.py
+@@ -356,19 +356,35 @@
+         def isCyx(res):
+             names = [atom.name for atom in res._atoms]
+             return 'SG' in names and 'HG' not in names
+        # This function is used to prevent multiple di-sulfide bonds from being
+        # assigned to a given atom. This is a DeepMind modification.
+        def isDisulfideBonded(atom):
+            for b in self._bonds:
+                if (atom in b and b[0].name == 'SG' and
+                    b[1].name == 'SG'):
+                    return True
+
+            return False
+         cyx = [res for res in self.residues() if res.name == 'CYS' and isCyx(res)]
+         atomNames = [[atom.name for atom in res._atoms] for res in cyx]
+         for i in range(len(cyx)):
+             sg1 = cyx[i]._atoms[atomNames[i].index('SG')]
+             pos1 = positions[sg1.index]
+            candidate_distance, candidate_atom = 0.3*nanometers, None
+             for j in range(i):
+                 sg2 = cyx[j]._atoms[atomNames[j].index('SG')]
+                 pos2 = positions[sg2.index]
+                 delta = [x-y for (x,y) in zip(pos1, pos2)]
+                 distance = sqrt(delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2])
+-                if distance < 0.3*nanometers:
+-                    self.addBond(sg1, sg2)
+                if distance < candidate_distance and not isDisulfideBonded(sg2):
+                    candidate_distance = distance
+                    candidate_atom = sg2
+            # Assign bond to closest pair.
+            if candidate_atom:
+                self.addBond(sg1, candidate_atom)
+
+
+ class Chain(object):
+     """A Chain object represents a chain within a Topology."""
--- a/run_pretrained_alphafold.py
+++ b/run_pretrained_alphafold.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+sys.path.append("lib/conda/lib/python3.9/site-packages")
+import math
+import pickle
+import time
+import torch
+import torch.nn as nn
+import numpy as np
+from config import model_config
+from alphafold.model.model import AlphaFold
+import alphafold.np.protein as protein
+import alphafold.np.relax.relax as relax
+from alphafold.np import residue_constants
+from alphafold.utils.import_weights import (
+    import_jax_weights_,
+)
+from alphafold.utils.tensor_utils import (
+    tree_map,
+    tensor_tree_map,
+)
+MODEL_NAME = "model_1"
+MODEL_DEVICE = "cuda:1"
+PARAM_PATH = "alphafold/resources/params/params_model_1.npz"
+FEAT_PATH = "tests/test_data/sample_feats.pickle"
+config = model_config(MODEL_NAME)
+model = AlphaFold(config.model)
+model = model.eval()
+import_jax_weights_(model, PARAM_PATH)
+model_device = 'cuda:1'
+model = model.to(model_device)
+with open(FEAT_PATH, "rb") as f:
+    batch = pickle.load(f)
+batch = {k:torch.as_tensor(v, device=model_device) for k,v in batch.items()}
+longs = [
+    "aatype", 
+    "template_aatype", 
+    "extra_msa", 
+    "residx_atom37_to_atom14",
+    "residx_atom14_to_atom37",
+]
+for l in longs:
+    batch[l] = batch[l].long()
+# Move the recycling dimension to the end
+move_dim = lambda t: t.permute(*range(len(t.shape))[1:], 0).contiguous()
+batch = tensor_tree_map(move_dim, batch)
+with torch.no_grad():
+    t = time.time()
+    out = model(batch)
+    print(f"Inference time: {time.time() - t}")
+# Toss out the recycling dimensions --- we don't need them anymore
+batch = tensor_tree_map(lambda x: np.array(x[..., -1].cpu()), batch)
+out = tensor_tree_map(lambda x: np.array(x.cpu()), out)
+plddt = out["plddt"]
+mean_plddt = np.mean(plddt)
+plddt_b_factors = np.repeat(
+    plddt[..., None], residue_constants.atom_type_num, axis=-1
+)
+unrelaxed_protein = protein.from_prediction(
+    features=batch,
+    result=out,
+    b_factors=plddt_b_factors
+)
+amber_relaxer = relax.AmberRelaxation(
+    **config.relax
+)
+# Relax the prediction.
+relaxed_pdb_str, _, _ = amber_relaxer.process(prot=unrelaxed_protein)
+# Save the relaxed PDB.
+output_dir = '.'
+relaxed_output_path = os.path.join(output_dir, f'relaxed_{MODEL_NAME}.pdb')
+with open(relaxed_output_path, 'w') as f:
+    f.write(relaxed_pdb_str)
--- a/scripts/download_alphafold_params.sh
+++ b/scripts/download_alphafold_params.sh
+#!/bin/bash
+#
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Downloads and unzips the AlphaFold parameters.
+#
+# Usage: bash download_alphafold_params.sh /path/to/download/directory
+set -e
+if [[ $# -eq 0 ]]; then
+    echo "Error: download directory must be provided as an input argument."
+    exit 1
+fi
+if ! command -v aria2c &> /dev/null ; then
+    echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
+    exit 1
+fi
+DOWNLOAD_DIR="$1"
+ROOT_DIR="${DOWNLOAD_DIR}/params"
+SOURCE_URL="https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar"
+BASENAME=$(basename "${SOURCE_URL}")
+mkdir --parents "${ROOT_DIR}"
+aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
+tar --extract --verbose --file="${ROOT_DIR}/${BASENAME}" \
+  --directory="${ROOT_DIR}" --preserve-permissions
+rm "${ROOT_DIR}/${BASENAME}"
--- a/scripts/install_third_party_dependencies.sh
+++ b/scripts/install_third_party_dependencies.sh
+#!/bin/bash
+# Install Miniconda locally
+rm -rf lib/conda
+rm /tmp/Miniconda3-latest-Linux-x86_64.sh
+wget -q -P /tmp \
+    https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+    && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p lib/conda \
+    && rm /tmp/Miniconda3-latest-Linux-x86_64.sh
+# Grab conda-only packages
+PATH=lib/conda/bin:$PATH
+conda update -qy conda \
+    && conda install -qy -c conda-forge \
+      python=3.9 \
+      openmm=7.5.1 \
+      pdbfixer
+# Install DeepMind's OpenMM patch
+OPENFOLD_DIR=$PWD
+pushd lib/conda/lib/python3.9/site-packages/ \
+    && patch -p0 < $OPENFOLD_DIR/lib/openmm.patch \
+    && popd
+# Download folding resources
+wget -q -P alphafold/resources \
+    https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
+# Download pretrained Alphafold weights
+scripts/download_alphafold_params.sh alphafold/resources
+# Decompress test data
+gunzip tests/test_data/sample_feats.pickle.gz
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tests/sample_feats.py
+++ b/tests/sample_feats.py
--- a/tests/test_data/sample_feats.pickle.gz
+++ b/tests/test_data/sample_feats.pickle.gz
--- a/tests/test_embedders.py
+++ b/tests/test_embedders.py
+# Copyright 2021 AlQuraishi Laboratory
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import numpy as np
+import unittest
+from alphafold.model.embedders import *
+class TestInputEmbedder(unittest.TestCase):
+    def test_shape(self): 
+        tf_dim = 2
+        msa_dim = 3
+        c_z = 5
+        c_m = 7
+        relpos_k = 11
+        b = 13
+        n_res = 17
+        n_clust = 19
+        tf = torch.rand((b, n_res, tf_dim))
+        ri = torch.rand((b, n_res))
+        msa = torch.rand((b, n_clust, n_res, msa_dim))
+        ie = InputEmbedder(tf_dim, msa_dim, c_z, c_m, relpos_k)
+        msa_emb, pair_emb = ie(tf, ri, msa)
+        self.assertTrue(msa_emb.shape == (b, n_clust, n_res, c_m))
+        self.assertTrue(pair_emb.shape == (b, n_res, n_res, c_z))
+class TestRecyclingEmbedder(unittest.TestCase):
+    def test_shape(self): 
+        batch_size = 2
+        n = 3
+        c_z = 5
+        c_m = 7
+        min_bin = 0
+        max_bin = 10
+        no_bins = 9
+        re = RecyclingEmbedder(c_m, c_z, min_bin, max_bin, no_bins)
+        m_1 = torch.rand((batch_size, n, c_m))
+        z = torch.rand((batch_size, n, n, c_z))
+        x = torch.rand((batch_size, n, 3))
+        m_1, z = re(m_1, z, x)
+        self.assertTrue(z.shape == (batch_size, n, n, c_z))
+        self.assertTrue(m_1.shape == (batch_size, n, c_m))
+class TestTemplateAngleEmbedder(unittest.TestCase):
+    def test_shape(self):
+        template_angle_dim = 51
+        c_m = 256
+        batch_size = 4
+        n_templ = 4
+        n_res = 256
+        tae = TemplateAngleEmbedder(
+            template_angle_dim,
+            c_m,
+        )
+        x = torch.rand((batch_size, n_templ, n_res, template_angle_dim))
+        x = tae(x)
+        self.assertTrue(
+            x.shape == (batch_size, n_templ, n_res, c_m)
+        )
+class TestTemplatePairEmbedder(unittest.TestCase):
+    def test_shape(self):
+        batch_size = 2
+        n_templ = 3
+        n_res = 5
+        template_pair_dim = 7
+        c_t = 11
+        tpe = TemplatePairEmbedder(
+            template_pair_dim,
+            c_t,
+        )
+        x = torch.rand((batch_size, n_templ, n_res, n_res, template_pair_dim))
+        x = tpe(x)
+        self.assertTrue(
+            x.shape == (batch_size, n_templ, n_res, n_res, c_t)
+        )
+if __name__ == "__main__":
+    unittest.main()