Multimer (#55)

* support multimer datapipiline * module for multimer * support inference multimer * add geometry * delete unuseful code * delete debug print

Multimer (#55)
* support multimer datapipiline * module for multimer * support inference multimer * add geometry * delete unuseful code * delete debug print
ea7a6584 · Fazzie-Maqianli · GitHub · 2af21904 · ea7a6584 · ea7a6584
Unverified Commit ea7a6584 authored Sep 02, 2022 by Fazzie-Maqianli Committed by GitHub Sep 02, 2022
20 changed files
--- a/fastfold/data/data_transforms_multimer.py
+++ b/fastfold/data/data_transforms_multimer.py
+from typing import Sequence
+
+import torch
+
+from fastfold.data.data_transforms import curry1
+from fastfold.utils.tensor_utils import masked_mean
+
+
+def gumbel_noise(
+    shape: Sequence[int], 
+    device: torch.device, 
+    eps=1e-6,
+    generator=None,
+) -> torch.Tensor:
+    """Generate Gumbel Noise of given Shape.
+
+    This generates samples from Gumbel(0, 1).
+
+    Args:
+        shape: Shape of noise to return.
+
+    Returns:
+        Gumbel noise of given shape.
+    """
+    uniform_noise = torch.rand(
+        shape, dtype=torch.float32, device=device, generator=generator
+    )
+    gumbel = -torch.log(-torch.log(uniform_noise + eps) + eps)
+    return gumbel
+
+
+def gumbel_max_sample(logits: torch.Tensor, generator=None) -> torch.Tensor:
+    """Samples from a probability distribution given by 'logits'.
+
+    This uses Gumbel-max trick to implement the sampling in an efficient manner.
+
+    Args:
+        logits: Logarithm of probabilities to sample from, probabilities can be
+            unnormalized.
+
+    Returns:
+        Sample from logprobs in one-hot form.
+    """
+    z = gumbel_noise(logits.shape, device=logits.device, generator=generator)
+    return torch.nn.functional.one_hot(
+        torch.argmax(logits + z, dim=-1),
+        logits.shape[-1],
+    )
+
+
+def gumbel_argsort_sample_idx(
+    logits: torch.Tensor, 
+    generator=None
+) -> torch.Tensor:
+    """Samples with replacement from a distribution given by 'logits'.
+
+    This uses Gumbel trick to implement the sampling an efficient manner. For a
+    distribution over k items this samples k times without replacement, so this
+    is effectively sampling a random permutation with probabilities over the
+    permutations derived from the logprobs.
+
+    Args:
+        logits: Logarithm of probabilities to sample from, probabilities can be
+            unnormalized.
+
+    Returns:
+        Sample from logprobs in one-hot form.
+    """
+    z = gumbel_noise(logits.shape, device=logits.device, generator=generator)
+    return torch.argsort(logits + z, dim=-1, descending=True)
+
+
+@curry1
+def make_masked_msa(batch, config, replace_fraction, seed, eps=1e-6):
+    """Create data for BERT on raw MSA."""
+    # Add a random amino acid uniformly.
+    random_aa = torch.Tensor(
+        [0.05] * 20 + [0., 0.], 
+        device=batch['msa'].device
+    )
+
+    categorical_probs = (
+        config.uniform_prob * random_aa +
+        config.profile_prob * batch['msa_profile'] +
+        config.same_prob * torch.nn.functional.one_hot(batch['msa'], 22)
+    )
+
+    # Put all remaining probability on [MASK] which is a new column.
+    mask_prob = 1. - config.profile_prob - config.same_prob - config.uniform_prob
+   
+    categorical_probs = torch.nn.functional.pad(
+        categorical_probs, [0,1], value=mask_prob
+    )
+
+    sh = batch['msa'].shape
+    mask_position = torch.rand(sh, device=batch['msa'].device) < replace_fraction
+    mask_position *= batch['msa_mask'].to(mask_position.dtype)
+    
+    logits = torch.log(categorical_probs + eps)
+    
+    g = torch.Generator(device=batch["msa"].device)
+    if seed is not None:
+        g.manual_seed(seed)
+
+    bert_msa = gumbel_max_sample(logits, generator=g)
+    
+    bert_msa = torch.where(
+        mask_position,
+        torch.argmax(bert_msa, dim=-1), 
+        batch['msa']
+    )
+    bert_msa *= batch['msa_mask'].to(bert_msa.dtype)
+
+    # Mix real and masked MSA.
+    if 'bert_mask' in batch:
+        batch['bert_mask'] *= mask_position.to(torch.float32)
+    else:
+        batch['bert_mask'] = mask_position.to(torch.float32)
+    batch['true_msa'] = batch['msa']
+    batch['msa'] = bert_msa
+
+    return batch
+
+
+@curry1
+def nearest_neighbor_clusters(batch, gap_agreement_weight=0.):
+    """Assign each extra MSA sequence to its nearest neighbor in sampled MSA."""
+    device = batch["msa_mask"].device
+
+    # Determine how much weight we assign to each agreement.    In theory, we could
+    # use a full blosum matrix here, but right now let's just down-weight gap
+    # agreement because it could be spurious.
+    # Never put weight on agreeing on BERT mask.
+
+    weights = torch.Tensor(
+        [1.] * 21 + [gap_agreement_weight] + [0.], 
+        device=device,
+    )
+
+    msa_mask = batch['msa_mask']
+    msa_one_hot = torch.nn.functional.one_hot(batch['msa'], 23)
+
+    extra_mask = batch['extra_msa_mask']
+    extra_one_hot = torch.nn.functional.one_hot(batch['extra_msa'], 23)
+
+    msa_one_hot_masked = msa_mask[:, :, None] * msa_one_hot
+    extra_one_hot_masked = extra_mask[:, :, None] * extra_one_hot
+
+    agreement = torch.einsum(
+        'mrc, nrc->nm', 
+        extra_one_hot_masked,
+        weights * msa_one_hot_masked
+    )
+
+    cluster_assignment = torch.nn.functional.softmax(1e3 * agreement, dim=0)
+    cluster_assignment *= torch.einsum('mr, nr->mn', msa_mask, extra_mask)
+
+    cluster_count = torch.sum(cluster_assignment, dim=-1)
+    cluster_count += 1.    # We always include the sequence itself.
+
+    msa_sum = torch.einsum('nm, mrc->nrc', cluster_assignment, extra_one_hot_masked)
+    msa_sum += msa_one_hot_masked
+
+    cluster_profile = msa_sum / cluster_count[:, None, None]
+
+    extra_deletion_matrix = batch['extra_deletion_matrix']
+    deletion_matrix = batch['deletion_matrix']
+
+    del_sum = torch.einsum(
+        'nm, mc->nc', 
+        cluster_assignment,
+        extra_mask * extra_deletion_matrix
+    )
+    del_sum += deletion_matrix    # Original sequence.
+    cluster_deletion_mean = del_sum / cluster_count[:, None]
+
+    batch['cluster_profile'] = cluster_profile
+    batch['cluster_deletion_mean'] = cluster_deletion_mean
+
+    return batch
+
+
+def create_target_feat(batch):
+    """Create the target features"""
+    batch["target_feat"] = torch.nn.functional.one_hot(
+        batch["aatype"], 21
+    ).to(torch.float32)
+    return batch
+
+
+def create_msa_feat(batch):
+    """Create and concatenate MSA features."""
+    device = batch["msa"]
+    msa_1hot = torch.nn.functional.one_hot(batch['msa'], 23)
+    deletion_matrix = batch['deletion_matrix']
+    has_deletion = torch.clamp(deletion_matrix, min=0., max=1.)[..., None]
+    pi = torch.acos(torch.zeros(1, device=deletion_matrix.device)) * 2
+    deletion_value = (torch.atan(deletion_matrix / 3.) * (2. / pi))[..., None]
+
+    deletion_mean_value = (
+        torch.atan(
+            batch['cluster_deletion_mean'] / 3.) *
+            (2. / pi)
+    )[..., None]
+
+    msa_feat = torch.cat(
+        [
+            msa_1hot,
+            has_deletion,
+            deletion_value,
+            batch['cluster_profile'],
+            deletion_mean_value
+        ], 
+        dim=-1,
+    )
+
+    batch["msa_feat"] = msa_feat
+
+    return batch 
+
+
+def build_extra_msa_feat(batch):
+    """Expand extra_msa into 1hot and concat with other extra msa features.
+
+    We do this as late as possible as the one_hot extra msa can be very large.
+
+    Args:
+        batch: a dictionary with the following keys:
+         * 'extra_msa': [num_seq, num_res] MSA that wasn't selected as a cluster
+             centre. Note - This isn't one-hotted.
+         * 'extra_deletion_matrix': [num_seq, num_res] Number of deletions at given
+                position.
+        num_extra_msa: Number of extra msa to use.
+
+    Returns:
+        Concatenated tensor of extra MSA features.
+    """
+    # 23 = 20 amino acids + 'X' for unknown + gap + bert mask
+    extra_msa = batch['extra_msa']
+    deletion_matrix = batch['extra_deletion_matrix']
+    msa_1hot = torch.nn.functional.one_hot(extra_msa, 23)
+    has_deletion = torch.clamp(deletion_matrix, min=0., max=1.)[..., None]
+    pi = torch.acos(torch.zeros(1, device=deletion_matrix.device)) * 2
+    deletion_value = (
+        (torch.atan(deletion_matrix / 3.) * (2. / pi))[..., None]
+    )
+    extra_msa_mask = batch['extra_msa_mask']
+    catted = torch.cat([msa_1hot, has_deletion, deletion_value], dim=-1)
+  
+    return catted
+
+
+@curry1
+def sample_msa(batch, max_seq, max_extra_msa_seq, seed, inf=1e6):
+    """Sample MSA randomly, remaining sequences are stored as `extra_*`.
+
+    Args:
+        batch: batch to sample msa from.
+        max_seq: number of sequences to sample.
+    Returns:
+        Protein with sampled msa.
+    """
+    g = torch.Generator(device=batch["msa"].device)
+    if seed is not None:
+        g.manual_seed(seed)
+
+    # Sample uniformly among sequences with at least one non-masked position.
+    logits = (torch.clamp(torch.sum(batch['msa_mask'], dim=-1), 0., 1.) - 1.) * inf
+    # The cluster_bias_mask can be used to preserve the first row (target
+    # sequence) for each chain, for example.
+    if 'cluster_bias_mask' not in batch:
+        cluster_bias_mask = torch.nn.functional.pad(
+            batch['msa'].new_zeros(batch['msa'].shape[0] - 1), 
+            (1, 0), 
+            value=1.
+        )
+    else:
+        cluster_bias_mask = batch['cluster_bias_mask']
+
+    logits += cluster_bias_mask * inf
+    index_order = gumbel_argsort_sample_idx(logits, generator=g)
+    sel_idx = index_order[:max_seq]
+    extra_idx = index_order[max_seq:][:max_extra_msa_seq]
+
+    for k in ['msa', 'deletion_matrix', 'msa_mask', 'bert_mask']:
+        if k in batch:
+            batch['extra_' + k] = batch[k][extra_idx]
+            batch[k] = batch[k][sel_idx]
+
+    return batch
+
+
+def make_msa_profile(batch):
+    """Compute the MSA profile."""
+
+    # Compute the profile for every residue (over all MSA sequences).
+    batch["msa_profile"] = masked_mean(
+        batch['msa_mask'][..., None], 
+        torch.nn.functional.one_hot(batch['msa'], 22), 
+        dim=-3,
+    )
+
+    return batch
--- a/fastfold/data/feature_pipeline.py
+++ b/fastfold/data/feature_pipeline.py
@@ -20,7 +20,7 @@ import ml_collections
 import numpy as np
 import torch

-from fastfold.data import input_pipeline
+from fastfold.data import input_pipeline, input_pipeline_multimer


 FeatureDict = Mapping[str, np.ndarray]
@@ -72,10 +72,14 @@ def make_data_config(
 def np_example_to_features(
    np_example: FeatureDict,
    config: ml_collections.ConfigDict,
+    is_multimer: bool,
    mode: str,
 ):
    np_example = dict(np_example)
-    num_res = int(np_example["seq_length"][0])
+    if is_multimer:
+        num_res = int(np_example["seq_length"])
+    else:
+        num_res = int(np_example["seq_length"][0])
    cfg, feature_names = make_data_config(config, mode=mode, num_res=num_res)

    if "deletion_matrix_int" in np_example:
@@ -86,12 +90,20 @@ def np_example_to_features(
    tensor_dict = np_to_tensor_dict(
        np_example=np_example, features=feature_names
    )
+
    with torch.no_grad():
-        features = input_pipeline.process_tensors_from_config(
-            tensor_dict,
-            cfg.common,
-            cfg[mode],
-        )
+        if is_multimer:
+            features = input_pipeline_multimer.process_tensors_from_config(
+                tensor_dict,
+                cfg.common,
+                cfg[mode],
+            )
+        else:
+            features = input_pipeline.process_tensors_from_config(
+                tensor_dict,
+                cfg.common,
+                cfg[mode],
+            )

    return {k: v for k, v in features.items()}

@@ -107,9 +119,11 @@ class FeaturePipeline:
        self,
        raw_features: FeatureDict,
        mode: str = "train", 
+        is_multimer: bool = False,
    ) -> FeatureDict:
        return np_example_to_features(
            np_example=raw_features,
            config=self.config,
            mode=mode,
+            is_multimer=is_multimer,
        )
--- a/fastfold/data/input_pipeline_multimer.py
+++ b/fastfold/data/input_pipeline_multimer.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import torch
+
+from fastfold.data import (
+    data_transforms,
+    data_transforms_multimer,
+)
+
+
+def nonensembled_transform_fns(common_cfg, mode_cfg):
+    """Input pipeline data transformers that are not ensembled."""
+    transforms = [
+        data_transforms.cast_to_64bit_ints,
+        data_transforms_multimer.make_msa_profile,
+        data_transforms_multimer.create_target_feat,
+        data_transforms.make_atom14_masks,
+    ]
+
+    if(common_cfg.use_templates):
+        transforms.extend([
+            data_transforms.make_pseudo_beta("template_"),
+        ])
+
+    return transforms
+
+
+def ensembled_transform_fns(common_cfg, mode_cfg, ensemble_seed):
+    """Input pipeline data transformers that can be ensembled and averaged."""
+    transforms = []
+
+    pad_msa_clusters = mode_cfg.max_msa_clusters
+    max_msa_clusters = pad_msa_clusters
+    max_extra_msa = common_cfg.max_extra_msa
+
+    msa_seed = None
+    if(not common_cfg.resample_msa_in_recycling):
+        msa_seed = ensemble_seed
+    
+    transforms.append(
+        data_transforms_multimer.sample_msa(
+            max_msa_clusters, 
+            max_extra_msa,
+            seed=msa_seed,
+        )
+    )
+
+    if "masked_msa" in common_cfg:
+        # Masked MSA should come *before* MSA clustering so that
+        # the clustering and full MSA profile do not leak information about
+        # the masked locations and secret corrupted locations.
+        transforms.append(
+            data_transforms_multimer.make_masked_msa(
+                common_cfg.masked_msa, 
+                mode_cfg.masked_msa_replace_fraction,
+                seed=(msa_seed + 1) if msa_seed else None,
+            )
+        )
+
+    transforms.append(data_transforms_multimer.nearest_neighbor_clusters())
+    transforms.append(data_transforms_multimer.create_msa_feat)
+
+    return transforms
+
+
+def process_tensors_from_config(tensors, common_cfg, mode_cfg):
+    """Based on the config, apply filters and transformations to the data."""
+
+    ensemble_seed = torch.Generator().seed()
+
+    def wrap_ensemble_fn(data, i):
+        """Function to be mapped over the ensemble dimension."""
+        d = data.copy()
+        fns = ensembled_transform_fns(
+            common_cfg, 
+            mode_cfg, 
+            ensemble_seed,
+        )
+        fn = compose(fns)
+        d["ensemble_index"] = i
+        return fn(d)
+
+    no_templates = True
+    if("template_aatype" in tensors):
+        no_templates = tensors["template_aatype"].shape[0] == 0
+
+    nonensembled = nonensembled_transform_fns(
+        common_cfg,
+        mode_cfg,
+    )
+
+    tensors = compose(nonensembled)(tensors)
+
+    if("no_recycling_iters" in tensors):
+        num_recycling = int(tensors["no_recycling_iters"])
+    else:
+        num_recycling = common_cfg.max_recycling_iters
+
+    tensors = map_fn(
+        lambda x: wrap_ensemble_fn(tensors, x), torch.arange(num_recycling + 1)
+    )
+
+    return tensors
+
+
+@data_transforms.curry1
+def compose(x, fs):
+    for f in fs:
+        x = f(x)
+    return x
+
+
+def map_fn(fun, x):
+    ensembles = [fun(elem) for elem in x]
+    features = ensembles[0].keys()
+    ensembled_dict = {}
+    for feat in features:
+        ensembled_dict[feat] = torch.stack(
+            [dict_i[feat] for dict_i in ensembles], dim=-1
+        )
+    return ensembled_dict
--- a/fastfold/data/msa_identifiers.py
+++ b/fastfold/data/msa_identifiers.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for extracting identifiers from MSA sequence descriptions."""
+
+import dataclasses
+import re
+from typing import Optional
+
+
+# Sequences coming from UniProtKB database come in the
+# `db|UniqueIdentifier|EntryName` format, e.g. `tr|A0A146SKV9|A0A146SKV9_FUNHE`
+# or `sp|P0C2L1|A3X1_LOXLA` (for TREMBL/Swiss-Prot respectively).
+_UNIPROT_PATTERN = re.compile(
+    r"""
+    ^
+    # UniProtKB/TrEMBL or UniProtKB/Swiss-Prot
+    (?:tr|sp)
+    \|
+    # A primary accession number of the UniProtKB entry.
+    (?P<AccessionIdentifier>[A-Za-z0-9]{6,10})
+    # Occasionally there is a _0 or _1 isoform suffix, which we ignore.
+    (?:_\d)?
+    \|
+    # TREMBL repeats the accession ID here. Swiss-Prot has a mnemonic
+    # protein ID code.
+    (?:[A-Za-z0-9]+)
+    _
+    # A mnemonic species identification code.
+    (?P<SpeciesIdentifier>([A-Za-z0-9]){1,5})
+    # Small BFD uses a final value after an underscore, which we ignore.
+    (?:_\d+)?
+    $
+    """,
+    re.VERBOSE)
+
+
+@dataclasses.dataclass(frozen=True)
+class Identifiers:
+  species_id: str = ''
+
+
+def _parse_sequence_identifier(msa_sequence_identifier: str) -> Identifiers:
+  """Gets accession id and species from an msa sequence identifier.
+
+  The sequence identifier has the format specified by
+  _UNIPROT_TREMBL_ENTRY_NAME_PATTERN or _UNIPROT_SWISSPROT_ENTRY_NAME_PATTERN.
+  An example of a sequence identifier: `tr|A0A146SKV9|A0A146SKV9_FUNHE`
+
+  Args:
+    msa_sequence_identifier: a sequence identifier.
+
+  Returns:
+    An `Identifiers` instance with a uniprot_accession_id and species_id. These
+    can be empty in the case where no identifier was found.
+  """
+  matches = re.search(_UNIPROT_PATTERN, msa_sequence_identifier.strip())
+  if matches:
+    return Identifiers(
+        species_id=matches.group('SpeciesIdentifier')
+    )
+  return Identifiers()
+
+
+def _extract_sequence_identifier(description: str) -> Optional[str]:
+  """Extracts sequence identifier from description. Returns None if no match."""
+  split_description = description.split()
+  if split_description:
+    return split_description[0].partition('/')[0]
+  else:
+    return None
+
+
+def get_identifiers(description: str) -> Identifiers:
+  """Computes extra MSA features from the description."""
+  sequence_identifier = _extract_sequence_identifier(description)
+  if sequence_identifier is None:
+    return Identifiers()
+  else:
+    return _parse_sequence_identifier(sequence_identifier)
--- a/fastfold/data/msa_pairing.py
+++ b/fastfold/data/msa_pairing.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Pairing logic for multimer data pipeline."""
+
+import collections
+import functools
+import string
+from typing import Any, Dict, Iterable, List, Sequence, Mapping
+
+import numpy as np
+import pandas as pd
+import scipy.linalg
+
+from openfold.np import residue_constants
+
+
+# TODO: This stuff should probably also be in a config
+MSA_GAP_IDX = residue_constants.restypes_with_x_and_gap.index('-')
+SEQUENCE_GAP_CUTOFF = 0.5
+SEQUENCE_SIMILARITY_CUTOFF = 0.9
+
+MSA_PAD_VALUES = {'msa_all_seq': MSA_GAP_IDX,
+                  'msa_mask_all_seq': 1,
+                  'deletion_matrix_all_seq': 0,
+                  'deletion_matrix_int_all_seq': 0,
+                  'msa': MSA_GAP_IDX,
+                  'msa_mask': 1,
+                  'deletion_matrix': 0,
+                  'deletion_matrix_int': 0}
+
+MSA_FEATURES = ('msa', 'msa_mask', 'deletion_matrix', 'deletion_matrix_int')
+SEQ_FEATURES = ('residue_index', 'aatype', 'all_atom_positions',
+                'all_atom_mask', 'seq_mask', 'between_segment_residues',
+                'has_alt_locations', 'has_hetatoms', 'asym_id', 'entity_id',
+                'sym_id', 'entity_mask', 'deletion_mean',
+                'prediction_atom_mask',
+                'literature_positions', 'atom_indices_to_group_indices',
+                'rigid_group_default_frame')
+TEMPLATE_FEATURES = ('template_aatype', 'template_all_atom_positions',
+                     'template_all_atom_mask')
+CHAIN_FEATURES = ('num_alignments', 'seq_length')
+
+
+def create_paired_features(
+    chains: Iterable[Mapping[str, np.ndarray]],
+) ->  List[Mapping[str, np.ndarray]]:
+  """Returns the original chains with paired NUM_SEQ features.
+
+  Args:
+    chains:  A list of feature dictionaries for each chain.
+
+  Returns:
+    A list of feature dictionaries with sequence features including only
+    rows to be paired.
+  """
+  chains = list(chains)
+  chain_keys = chains[0].keys()
+
+  if len(chains) < 2:
+    return chains
+  else:
+    updated_chains = []
+    paired_chains_to_paired_row_indices = pair_sequences(chains)
+    paired_rows = reorder_paired_rows(
+        paired_chains_to_paired_row_indices)
+
+    for chain_num, chain in enumerate(chains):
+      new_chain = {k: v for k, v in chain.items() if '_all_seq' not in k}
+      for feature_name in chain_keys:
+        if feature_name.endswith('_all_seq'):
+          feats_padded = pad_features(chain[feature_name], feature_name)
+          new_chain[feature_name] = feats_padded[paired_rows[:, chain_num]]
+      new_chain['num_alignments_all_seq'] = np.asarray(
+          len(paired_rows[:, chain_num]))
+      updated_chains.append(new_chain)
+    return updated_chains
+
+
+def pad_features(feature: np.ndarray, feature_name: str) -> np.ndarray:
+  """Add a 'padding' row at the end of the features list.
+
+  The padding row will be selected as a 'paired' row in the case of partial
+  alignment - for the chain that doesn't have paired alignment.
+
+  Args:
+    feature: The feature to be padded.
+    feature_name: The name of the feature to be padded.
+
+  Returns:
+    The feature with an additional padding row.
+  """
+  assert feature.dtype != np.dtype(np.string_)
+  if feature_name in ('msa_all_seq', 'msa_mask_all_seq',
+                      'deletion_matrix_all_seq', 'deletion_matrix_int_all_seq'):
+    num_res = feature.shape[1]
+    padding = MSA_PAD_VALUES[feature_name] * np.ones([1, num_res],
+                                                     feature.dtype)
+  elif feature_name == 'msa_species_identifiers_all_seq':
+    padding = [b'']
+  else:
+    return feature
+  feats_padded = np.concatenate([feature, padding], axis=0)
+  return feats_padded
+
+
+def _make_msa_df(chain_features: Mapping[str, np.ndarray]) -> pd.DataFrame:
+  """Makes dataframe with msa features needed for msa pairing."""
+  chain_msa = chain_features['msa_all_seq']
+  query_seq = chain_msa[0]
+  per_seq_similarity = np.sum(
+      query_seq[None] == chain_msa, axis=-1) / float(len(query_seq))
+  per_seq_gap = np.sum(chain_msa == 21, axis=-1) / float(len(query_seq))
+  msa_df = pd.DataFrame({
+      'msa_species_identifiers':
+          chain_features['msa_species_identifiers_all_seq'],
+      'msa_row':
+          np.arange(len(
+              chain_features['msa_species_identifiers_all_seq'])),
+      'msa_similarity': per_seq_similarity,
+      'gap': per_seq_gap
+  })
+  return msa_df
+
+
+def _create_species_dict(msa_df: pd.DataFrame) -> Dict[bytes, pd.DataFrame]:
+  """Creates mapping from species to msa dataframe of that species."""
+  species_lookup = {}
+  for species, species_df in msa_df.groupby('msa_species_identifiers'):
+    species_lookup[species] = species_df
+  return species_lookup
+
+
+def _match_rows_by_sequence_similarity(this_species_msa_dfs: List[pd.DataFrame]
+                                       ) -> List[List[int]]:
+  """Finds MSA sequence pairings across chains based on sequence similarity.
+
+  Each chain's MSA sequences are first sorted by their sequence similarity to
+  their respective target sequence. The sequences are then paired, starting
+  from the sequences most similar to their target sequence.
+
+  Args:
+    this_species_msa_dfs: a list of dataframes containing MSA features for
+      sequences for a specific species.
+
+  Returns:
+   A list of lists, each containing M indices corresponding to paired MSA rows,
+   where M is the number of chains.
+  """
+  all_paired_msa_rows = []
+
+  num_seqs = [len(species_df) for species_df in this_species_msa_dfs
+              if species_df is not None]
+  take_num_seqs = np.min(num_seqs)
+
+  sort_by_similarity = (
+      lambda x: x.sort_values('msa_similarity', axis=0, ascending=False))
+
+  for species_df in this_species_msa_dfs:
+    if species_df is not None:
+      species_df_sorted = sort_by_similarity(species_df)
+      msa_rows = species_df_sorted.msa_row.iloc[:take_num_seqs].values
+    else:
+      msa_rows = [-1] * take_num_seqs  # take the last 'padding' row
+    all_paired_msa_rows.append(msa_rows)
+  all_paired_msa_rows = list(np.array(all_paired_msa_rows).transpose())
+  return all_paired_msa_rows
+
+
+def pair_sequences(
+    examples: List[Mapping[str, np.ndarray]],
+) -> Dict[int, np.ndarray]:
+  """Returns indices for paired MSA sequences across chains."""
+
+  num_examples = len(examples)
+
+  all_chain_species_dict = []
+  common_species = set()
+  for chain_features in examples:
+    msa_df = _make_msa_df(chain_features)
+    species_dict = _create_species_dict(msa_df)
+    all_chain_species_dict.append(species_dict)
+    common_species.update(set(species_dict))
+
+  common_species = sorted(common_species)
+  common_species.remove(b'')  # Remove target sequence species.
+
+  all_paired_msa_rows = [np.zeros(len(examples), int)]
+  all_paired_msa_rows_dict = {k: [] for k in range(num_examples)}
+  all_paired_msa_rows_dict[num_examples] = [np.zeros(len(examples), int)]
+
+  for species in common_species:
+    if not species:
+      continue
+    this_species_msa_dfs = []
+    species_dfs_present = 0
+    for species_dict in all_chain_species_dict:
+      if species in species_dict:
+        this_species_msa_dfs.append(species_dict[species])
+        species_dfs_present += 1
+      else:
+        this_species_msa_dfs.append(None)
+
+    # Skip species that are present in only one chain.
+    if species_dfs_present <= 1:
+      continue
+
+    if np.any(
+        np.array([len(species_df) for species_df in
+                  this_species_msa_dfs if
+                  isinstance(species_df, pd.DataFrame)]) > 600):
+      continue
+
+    paired_msa_rows = _match_rows_by_sequence_similarity(this_species_msa_dfs)
+    all_paired_msa_rows.extend(paired_msa_rows)
+    all_paired_msa_rows_dict[species_dfs_present].extend(paired_msa_rows)
+  all_paired_msa_rows_dict = {
+      num_examples: np.array(paired_msa_rows) for
+      num_examples, paired_msa_rows in all_paired_msa_rows_dict.items()
+  }
+  return all_paired_msa_rows_dict
+
+
+def reorder_paired_rows(all_paired_msa_rows_dict: Dict[int, np.ndarray]
+                        ) -> np.ndarray:
+  """Creates a list of indices of paired MSA rows across chains.
+
+  Args:
+    all_paired_msa_rows_dict: a mapping from the number of paired chains to the
+      paired indices.
+
+  Returns:
+    a list of lists, each containing indices of paired MSA rows across chains.
+    The paired-index lists are ordered by:
+      1) the number of chains in the paired alignment, i.e, all-chain pairings
+         will come first.
+      2) e-values
+  """
+  all_paired_msa_rows = []
+
+  for num_pairings in sorted(all_paired_msa_rows_dict, reverse=True):
+    paired_rows = all_paired_msa_rows_dict[num_pairings]
+    paired_rows_product = abs(np.array([np.prod(rows) for rows in paired_rows]))
+    paired_rows_sort_index = np.argsort(paired_rows_product)
+    all_paired_msa_rows.extend(paired_rows[paired_rows_sort_index])
+
+  return np.array(all_paired_msa_rows)
+
+
+def block_diag(*arrs: np.ndarray, pad_value: float = 0.0) -> np.ndarray:
+  """Like scipy.linalg.block_diag but with an optional padding value."""
+  ones_arrs = [np.ones_like(x) for x in arrs]
+  off_diag_mask = 1.0 - scipy.linalg.block_diag(*ones_arrs)
+  diag = scipy.linalg.block_diag(*arrs)
+  diag += (off_diag_mask * pad_value).astype(diag.dtype)
+  return diag
+
+
+def _correct_post_merged_feats(
+    np_example: Mapping[str, np.ndarray],
+    np_chains_list: Sequence[Mapping[str, np.ndarray]],
+    pair_msa_sequences: bool
+) -> Mapping[str, np.ndarray]:
+    """Adds features that need to be computed/recomputed post merging."""
+
+    num_res = np_example['aatype'].shape[0]
+    np_example['seq_length'] = np.asarray(
+        [num_res] * num_res,
+        dtype=np.int32
+    )
+    np_example['num_alignments'] = np.asarray(
+        np_example['msa'].shape[0],
+        dtype=np.int32
+    )
+
+    if not pair_msa_sequences:
+      # Generate a bias that is 1 for the first row of every block in the
+      # block diagonal MSA - i.e. make sure the cluster stack always includes
+      # the query sequences for each chain (since the first row is the query
+      # sequence).
+        cluster_bias_masks = []
+        for chain in np_chains_list:
+            mask = np.zeros(chain['msa'].shape[0])
+            mask[0] = 1
+            cluster_bias_masks.append(mask)
+        
+        np_example['cluster_bias_mask'] = np.concatenate(cluster_bias_masks)
+  
+        # Initialize Bert mask with masked out off diagonals.
+        msa_masks = [
+            np.ones(x['msa'].shape, dtype=np.float32)
+            for x in np_chains_list
+        ]
+  
+        np_example['bert_mask'] = block_diag(
+            *msa_masks, pad_value=0
+        )
+    else:
+        np_example['cluster_bias_mask'] = np.zeros(np_example['msa'].shape[0])
+        np_example['cluster_bias_mask'][0] = 1
+  
+        # Initialize Bert mask with masked out off diagonals.
+        msa_masks = [
+            np.ones(x['msa'].shape, dtype=np.float32) for
+            x in np_chains_list
+        ]
+        msa_masks_all_seq = [
+            np.ones(x['msa_all_seq'].shape, dtype=np.float32) for
+            x in np_chains_list
+        ]
+  
+        msa_mask_block_diag = block_diag(
+            *msa_masks, pad_value=0
+        )
+        msa_mask_all_seq = np.concatenate(msa_masks_all_seq, axis=1)
+        np_example['bert_mask'] = np.concatenate(
+            [msa_mask_all_seq, msa_mask_block_diag], 
+            axis=0
+        )
+    
+    return np_example
+
+
+def _pad_templates(chains: Sequence[Mapping[str, np.ndarray]],
+                   max_templates: int) -> Sequence[Mapping[str, np.ndarray]]:
+  """For each chain pad the number of templates to a fixed size.
+
+  Args:
+    chains: A list of protein chains.
+    max_templates: Each chain will be padded to have this many templates.
+
+  Returns:
+    The list of chains, updated to have template features padded to
+    max_templates.
+  """
+  for chain in chains:
+    for k, v in chain.items():
+      if k in TEMPLATE_FEATURES:
+        padding = np.zeros_like(v.shape)
+        padding[0] = max_templates - v.shape[0]
+        padding = [(0, p) for p in padding]
+        chain[k] = np.pad(v, padding, mode='constant')
+  return chains
+
+
+def _merge_features_from_multiple_chains(
+    chains: Sequence[Mapping[str, np.ndarray]],
+    pair_msa_sequences: bool) -> Mapping[str, np.ndarray]:
+  """Merge features from multiple chains.
+
+  Args:
+    chains: A list of feature dictionaries that we want to merge.
+    pair_msa_sequences: Whether to concatenate MSA features along the
+      num_res dimension (if True), or to block diagonalize them (if False).
+
+  Returns:
+    A feature dictionary for the merged example.
+  """
+  merged_example = {}
+  for feature_name in chains[0]:
+    feats = [x[feature_name] for x in chains]
+    feature_name_split = feature_name.split('_all_seq')[0]
+    if feature_name_split in MSA_FEATURES:
+      if pair_msa_sequences or '_all_seq' in feature_name:
+        merged_example[feature_name] = np.concatenate(feats, axis=1)
+      else:
+        merged_example[feature_name] = block_diag(
+            *feats, pad_value=MSA_PAD_VALUES[feature_name])
+    elif feature_name_split in SEQ_FEATURES:
+      merged_example[feature_name] = np.concatenate(feats, axis=0)
+    elif feature_name_split in TEMPLATE_FEATURES:
+      merged_example[feature_name] = np.concatenate(feats, axis=1)
+    elif feature_name_split in CHAIN_FEATURES:
+      merged_example[feature_name] = np.sum(x for x in feats).astype(np.int32)
+    else:
+      merged_example[feature_name] = feats[0]
+  return merged_example
+
+
+def _merge_homomers_dense_msa(
+    chains: Iterable[Mapping[str, np.ndarray]]) -> Sequence[Mapping[str, np.ndarray]]:
+  """Merge all identical chains, making the resulting MSA dense.
+
+  Args:
+    chains: An iterable of features for each chain.
+
+  Returns:
+    A list of feature dictionaries.  All features with the same entity_id
+    will be merged - MSA features will be concatenated along the num_res
+    dimension - making them dense.
+  """
+  entity_chains = collections.defaultdict(list)
+  for chain in chains:
+    entity_id = chain['entity_id'][0]
+    entity_chains[entity_id].append(chain)
+
+  grouped_chains = []
+  for entity_id in sorted(entity_chains):
+    chains = entity_chains[entity_id]
+    grouped_chains.append(chains)
+  chains = [
+      _merge_features_from_multiple_chains(chains, pair_msa_sequences=True)
+      for chains in grouped_chains]
+  return chains
+
+
+def _concatenate_paired_and_unpaired_features(
+    example: Mapping[str, np.ndarray]) -> Mapping[str, np.ndarray]:
+  """Merges paired and block-diagonalised features."""
+  features = MSA_FEATURES
+  for feature_name in features:
+    if feature_name in example:
+      feat = example[feature_name]
+      feat_all_seq = example[feature_name + '_all_seq']
+      merged_feat = np.concatenate([feat_all_seq, feat], axis=0)
+      example[feature_name] = merged_feat
+  example['num_alignments'] = np.array(example['msa'].shape[0],
+                                       dtype=np.int32)
+  return example
+
+
+def merge_chain_features(np_chains_list: List[Mapping[str, np.ndarray]],
+                         pair_msa_sequences: bool,
+                         max_templates: int) -> Mapping[str, np.ndarray]:
+  """Merges features for multiple chains to single FeatureDict.
+
+  Args:
+    np_chains_list: List of FeatureDicts for each chain.
+    pair_msa_sequences: Whether to merge paired MSAs.
+    max_templates: The maximum number of templates to include.
+
+  Returns:
+    Single FeatureDict for entire complex.
+  """
+  np_chains_list = _pad_templates(
+      np_chains_list, max_templates=max_templates)
+  np_chains_list = _merge_homomers_dense_msa(np_chains_list)
+  # Unpaired MSA features will be always block-diagonalised; paired MSA
+  # features will be concatenated.
+  np_example = _merge_features_from_multiple_chains(
+      np_chains_list, pair_msa_sequences=False)
+  if pair_msa_sequences:
+    np_example = _concatenate_paired_and_unpaired_features(np_example)
+  np_example = _correct_post_merged_feats(
+      np_example=np_example,
+      np_chains_list=np_chains_list,
+      pair_msa_sequences=pair_msa_sequences)
+
+  return np_example
+
+
+def deduplicate_unpaired_sequences(
+    np_chains: List[Mapping[str, np.ndarray]]) -> List[Mapping[str, np.ndarray]]:
+  """Removes unpaired sequences which duplicate a paired sequence."""
+
+  feature_names = np_chains[0].keys()
+  msa_features = MSA_FEATURES
+
+  for chain in np_chains:
+    # Convert the msa_all_seq numpy array to a tuple for hashing.
+    sequence_set = set(tuple(s) for s in chain['msa_all_seq'])
+    keep_rows = []
+    # Go through unpaired MSA seqs and remove any rows that correspond to the
+    # sequences that are already present in the paired MSA.
+    for row_num, seq in enumerate(chain['msa']):
+      if tuple(seq) not in sequence_set:
+        keep_rows.append(row_num)
+    for feature_name in feature_names:
+      if feature_name in msa_features:
+        chain[feature_name] = chain[feature_name][keep_rows]
+    chain['num_alignments'] = np.array(chain['msa'].shape[0], dtype=np.int32)
+  return np_chains
--- a/fastfold/model/hub/alphafold.py
+++ b/fastfold/model/hub/alphafold.py
@@ -26,12 +26,12 @@ from fastfold.utils.feats import (
 )
 from fastfold.model.nn.embedders import (
    InputEmbedder,
-    InputEmbedderMultimer,
    RecyclingEmbedder,
    TemplateAngleEmbedder,
    TemplatePairEmbedder,
    ExtraMSAEmbedder,
 )
+from fastfold.model.nn.embedders_multimer import TemplateEmbedderMultimer, InputEmbedderMultimer
 from fastfold.model.nn.evoformer import EvoformerStack, ExtraMSAStack
 from fastfold.model.nn.heads import AuxiliaryHeads
 import fastfold.common.residue_constants as residue_constants
@@ -74,25 +74,31 @@ class AlphaFold(nn.Module):
            self.input_embedder = InputEmbedderMultimer(
                **config["input_embedder"],
            )
+            self.template_embedder = TemplateEmbedderMultimer(
+                template_config,
+            )
        else:   
            self.input_embedder = InputEmbedder(
                **config["input_embedder"],
            )
-        self.recycling_embedder = RecyclingEmbedder(
-            **config["recycling_embedder"],
-        )
-        self.template_angle_embedder = TemplateAngleEmbedder(
+            self.template_angle_embedder = TemplateAngleEmbedder(
            **template_config["template_angle_embedder"],
-        )
-        self.template_pair_embedder = TemplatePairEmbedder(
-            **template_config["template_pair_embedder"],
-        )
-        self.template_pair_stack = TemplatePairStack(
-            **template_config["template_pair_stack"],
-        )
-        self.template_pointwise_att = TemplatePointwiseAttention(
-            **template_config["template_pointwise_attention"],
-        )
+            )
+            self.template_pair_embedder = TemplatePairEmbedder(
+                **template_config["template_pair_embedder"],
+            )
+            self.template_pair_stack = TemplatePairStack(
+                **template_config["template_pair_stack"],
+            )
+            self.template_pointwise_att = TemplatePointwiseAttention(
+                **template_config["template_pointwise_attention"],
+            )
+            self.recycling_embedder = RecyclingEmbedder(
+                **config["recycling_embedder"],
+            )
+
+
+
        self.extra_msa_embedder = ExtraMSAEmbedder(
            **extra_msa_config["extra_msa_embedder"],
        )
@@ -103,6 +109,7 @@ class AlphaFold(nn.Module):
            **config["evoformer_stack"],
        )
        self.structure_module = StructureModule(
+            is_multimer=self.globals.is_multimer,
            **config["structure_module"],
        )


--- a/fastfold/model/nn/embedders.py
+++ b/fastfold/model/nn/embedders.py
@@ -125,146 +125,6 @@ class InputEmbedder(nn.Module):

        return msa_emb, pair_emb

-class InputEmbedderMultimer(nn.Module):
-    """
-    Embeds a subset of the input features.
-
-    Implements Algorithms 3 (InputEmbedder) and 4 (relpos).
-    """
-
-    def __init__(
-        self,
-        tf_dim: int,
-        msa_dim: int,
-        c_z: int,
-        c_m: int,
-        max_relative_idx: int,
-        use_chain_relative: bool,
-        max_relative_chain: int,
-        **kwargs,
-    ):
-        """
-        Args:
-            tf_dim:
-                Final dimension of the target features
-            msa_dim:
-                Final dimension of the MSA features
-            c_z:
-                Pair embedding dimension
-            c_m:
-                MSA embedding dimension
-            relpos_k:
-                Window size used in relative positional encoding
-        """
-        super(InputEmbedderMultimer, self).__init__()
-
-        self.tf_dim = tf_dim
-        self.msa_dim = msa_dim
-
-        self.c_z = c_z
-        self.c_m = c_m
-
-        self.linear_tf_z_i = Linear(tf_dim, c_z)
-        self.linear_tf_z_j = Linear(tf_dim, c_z)
-        self.linear_tf_m = Linear(tf_dim, c_m)
-        self.linear_msa_m = Linear(msa_dim, c_m)
-
-        # RPE stuff
-        self.max_relative_idx = max_relative_idx
-        self.use_chain_relative = use_chain_relative
-        self.max_relative_chain = max_relative_chain
-        if self.use_chain_relative:
-            self.no_bins = 2 * max_relative_idx + 2 + 1 + 2 * max_relative_chain + 2
-        else:
-            self.no_bins = 2 * max_relative_idx + 1
-        self.linear_relpos = Linear(self.no_bins, c_z)
-
-    def relpos(self, batch: Dict[str, torch.Tensor]):
-        pos = batch["residue_index"]
-        asym_id = batch["asym_id"]
-        asym_id_same = asym_id[..., None] == asym_id[..., None, :]
-        offset = pos[..., None] - pos[..., None, :]
-
-        clipped_offset = torch.clamp(
-            offset + self.max_relative_idx, 0, 2 * self.max_relative_idx
-        )
-
-        rel_feats = []
-        if self.use_chain_relative:
-            final_offset = torch.where(
-                asym_id_same,
-                clipped_offset,
-                (2 * self.max_relative_idx + 1) * torch.ones_like(clipped_offset),
-            )
-
-            rel_pos = torch.nn.functional.one_hot(
-                final_offset,
-                2 * self.max_relative_idx + 2,
-            )
-
-            rel_feats.append(rel_pos)
-
-            entity_id = batch["entity_id"]
-            entity_id_same = entity_id[..., None] == entity_id[..., None, :]
-            rel_feats.append(entity_id_same[..., None])
-
-            sym_id = batch["sym_id"]
-            rel_sym_id = sym_id[..., None] - sym_id[..., None, :]
-
-            max_rel_chain = self.max_relative_chain
-            clipped_rel_chain = torch.clamp(
-                rel_sym_id + max_rel_chain,
-                0,
-                2 * max_rel_chain,
-            )
-
-            final_rel_chain = torch.where(
-                entity_id_same,
-                clipped_rel_chain,
-                (2 * max_rel_chain + 1) * torch.ones_like(clipped_rel_chain),
-            )
-
-            rel_chain = torch.nn.functional.one_hot(
-                final_rel_chain.long(),
-                2 * max_rel_chain + 2,
-            )
-
-            rel_feats.append(rel_chain)
-        else:
-            rel_pos = torch.nn.functional.one_hot(
-                clipped_offset,
-                2 * self.max_relative_idx + 1,
-            )
-            rel_feats.append(rel_pos)
-
-        rel_feat = torch.cat(rel_feats, dim=-1).to(self.linear_relpos.weight.dtype)
-
-        return self.linear_relpos(rel_feat)
-
-    def forward(
-        self, batch: Dict[str, torch.Tensor]
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        tf = batch["target_feat"]
-        msa = batch["msa_feat"]
-
-        # [*, N_res, c_z]
-        tf_emb_i = self.linear_tf_z_i(tf)
-        tf_emb_j = self.linear_tf_z_j(tf)
-
-        # [*, N_res, N_res, c_z]
-        pair_emb = tf_emb_i[..., None, :] + tf_emb_j[..., None, :, :]
-        pair_emb = pair_emb + self.relpos(batch)
-
-        # [*, N_clust, N_res, c_m]
-        n_clust = msa.shape[-3]
-        tf_m = (
-            self.linear_tf_m(tf)
-            .unsqueeze(-3)
-            .expand(((-1,) * len(tf.shape[:-2]) + (n_clust, -1, -1)))
-        )
-        msa_emb = self.linear_msa_m(msa) + tf_m
-
-        return msa_emb, pair_emb

 class RecyclingEmbedder(nn.Module):
    """

--- a/fastfold/model/nn/embedders_multimer.py
+++ b/fastfold/model/nn/embedders_multimer.py
+from functools import partial
+
+import torch
+import torch.nn as nn
+from typing import Tuple, Dict
+
+from fastfold.utils import all_atom_multimer
+from fastfold.utils.feats import dgram_from_positions
+
+from fastfold.model.nn.primitives import Linear, LayerNorm
+from fastfold.model.nn.template import (
+    TemplatePairStack,
+    TemplatePointwiseAttention,
+)
+from fastfold.utils import geometry
+from fastfold.utils.tensor_utils import one_hot, tensor_tree_map, dict_multimap
+
+class InputEmbedderMultimer(nn.Module):
+    """
+    Embeds a subset of the input features.
+
+    Implements Algorithms 3 (InputEmbedder) and 4 (relpos).
+    """
+
+    def __init__(
+        self,
+        tf_dim: int,
+        msa_dim: int,
+        c_z: int,
+        c_m: int,
+        max_relative_idx: int,
+        use_chain_relative: bool,
+        max_relative_chain: int,
+        **kwargs,
+    ):
+        """
+        Args:
+            tf_dim:
+                Final dimension of the target features
+            msa_dim:
+                Final dimension of the MSA features
+            c_z:
+                Pair embedding dimension
+            c_m:
+                MSA embedding dimension
+            relpos_k:
+                Window size used in relative positional encoding
+        """
+        super(InputEmbedderMultimer, self).__init__()
+
+        self.tf_dim = tf_dim
+        self.msa_dim = msa_dim
+
+        self.c_z = c_z
+        self.c_m = c_m
+
+        self.linear_tf_z_i = Linear(tf_dim, c_z)
+        self.linear_tf_z_j = Linear(tf_dim, c_z)
+        self.linear_tf_m = Linear(tf_dim, c_m)
+        self.linear_msa_m = Linear(msa_dim, c_m)
+
+        # RPE stuff
+        self.max_relative_idx = max_relative_idx
+        self.use_chain_relative = use_chain_relative
+        self.max_relative_chain = max_relative_chain
+        if self.use_chain_relative:
+            self.no_bins = 2 * max_relative_idx + 2 + 1 + 2 * max_relative_chain + 2
+        else:
+            self.no_bins = 2 * max_relative_idx + 1
+        self.linear_relpos = Linear(self.no_bins, c_z)
+
+    def relpos(self, batch: Dict[str, torch.Tensor]):
+        pos = batch["residue_index"]
+        asym_id = batch["asym_id"]
+        asym_id_same = asym_id[..., None] == asym_id[..., None, :]
+        offset = pos[..., None] - pos[..., None, :]
+
+        clipped_offset = torch.clamp(
+            offset + self.max_relative_idx, 0, 2 * self.max_relative_idx
+        )
+
+        rel_feats = []
+        if self.use_chain_relative:
+            final_offset = torch.where(
+                asym_id_same,
+                clipped_offset,
+                (2 * self.max_relative_idx + 1) * torch.ones_like(clipped_offset),
+            )
+
+            rel_pos = torch.nn.functional.one_hot(
+                final_offset,
+                2 * self.max_relative_idx + 2,
+            )
+
+            rel_feats.append(rel_pos)
+
+            entity_id = batch["entity_id"]
+            entity_id_same = entity_id[..., None] == entity_id[..., None, :]
+            rel_feats.append(entity_id_same[..., None])
+
+            sym_id = batch["sym_id"]
+            rel_sym_id = sym_id[..., None] - sym_id[..., None, :]
+
+            max_rel_chain = self.max_relative_chain
+            clipped_rel_chain = torch.clamp(
+                rel_sym_id + max_rel_chain,
+                0,
+                2 * max_rel_chain,
+            )
+
+            final_rel_chain = torch.where(
+                entity_id_same,
+                clipped_rel_chain,
+                (2 * max_rel_chain + 1) * torch.ones_like(clipped_rel_chain),
+            )
+
+            rel_chain = torch.nn.functional.one_hot(
+                final_rel_chain.long(),
+                2 * max_rel_chain + 2,
+            )
+
+            rel_feats.append(rel_chain)
+        else:
+            rel_pos = torch.nn.functional.one_hot(
+                clipped_offset,
+                2 * self.max_relative_idx + 1,
+            )
+            rel_feats.append(rel_pos)
+
+        rel_feat = torch.cat(rel_feats, dim=-1).to(self.linear_relpos.weight.dtype)
+
+        return self.linear_relpos(rel_feat)
+
+    def forward(
+        self, batch: Dict[str, torch.Tensor]
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        tf = batch["target_feat"]
+        msa = batch["msa_feat"]
+
+        # [*, N_res, c_z]
+        tf_emb_i = self.linear_tf_z_i(tf)
+        tf_emb_j = self.linear_tf_z_j(tf)
+
+        # [*, N_res, N_res, c_z]
+        pair_emb = tf_emb_i[..., None, :] + tf_emb_j[..., None, :, :]
+        pair_emb = pair_emb + self.relpos(batch)
+
+        # [*, N_clust, N_res, c_m]
+        n_clust = msa.shape[-3]
+        tf_m = (
+            self.linear_tf_m(tf)
+            .unsqueeze(-3)
+            .expand(((-1,) * len(tf.shape[:-2]) + (n_clust, -1, -1)))
+        )
+        msa_emb = self.linear_msa_m(msa) + tf_m
+
+        return msa_emb, pair_emb
+
+
+class TemplatePairEmbedderMultimer(nn.Module):
+    def __init__(self,
+        c_z: int,
+        c_out: int,
+        c_dgram: int,
+        c_aatype: int,
+    ):
+        super().__init__()
+
+        self.dgram_linear = Linear(c_dgram, c_out)
+        self.aatype_linear_1 = Linear(c_aatype, c_out)
+        self.aatype_linear_2 = Linear(c_aatype, c_out)
+        self.query_embedding_layer_norm = LayerNorm(c_z)
+        self.query_embedding_linear = Linear(c_z, c_out)
+        
+        self.pseudo_beta_mask_linear = Linear(1, c_out)
+        self.x_linear = Linear(1, c_out)
+        self.y_linear = Linear(1, c_out)
+        self.z_linear = Linear(1, c_out)
+        self.backbone_mask_linear = Linear(1, c_out)
+
+    def forward(self,
+        template_dgram: torch.Tensor,
+        aatype_one_hot: torch.Tensor,
+        query_embedding: torch.Tensor,
+        pseudo_beta_mask: torch.Tensor,
+        backbone_mask: torch.Tensor,
+        multichain_mask_2d: torch.Tensor,
+        unit_vector: geometry.Vec3Array,
+    ) -> torch.Tensor:
+        act = 0.
+
+        pseudo_beta_mask_2d = (
+            pseudo_beta_mask[..., None] * pseudo_beta_mask[..., None, :]
+        )
+        pseudo_beta_mask_2d *= multichain_mask_2d
+        template_dgram *= pseudo_beta_mask_2d[..., None]
+        act += self.dgram_linear(template_dgram)
+        act += self.pseudo_beta_mask_linear(pseudo_beta_mask_2d[..., None])
+       
+        aatype_one_hot = aatype_one_hot.to(template_dgram.dtype)
+        act += self.aatype_linear_1(aatype_one_hot[..., None, :, :])
+        act += self.aatype_linear_2(aatype_one_hot[..., None, :])
+
+        backbone_mask_2d = (
+            backbone_mask[..., None] * backbone_mask[..., None, :]
+        )
+        backbone_mask_2d *= multichain_mask_2d
+        x, y, z = [coord * backbone_mask_2d for coord in unit_vector]
+        act += self.x_linear(x[..., None])
+        act += self.y_linear(y[..., None])
+        act += self.z_linear(z[..., None])
+       
+        act += self.backbone_mask_linear(backbone_mask_2d[..., None]) 
+
+        query_embedding = self.query_embedding_layer_norm(query_embedding)
+        act += self.query_embedding_linear(query_embedding)
+
+        return act
+
+
+class TemplateSingleEmbedderMultimer(nn.Module):
+    def __init__(self,
+        c_in: int,
+        c_m: int,
+    ):
+        super().__init__()
+        self.template_single_embedder = Linear(c_in, c_m)
+        self.template_projector = Linear(c_m, c_m)
+    
+    def forward(self,
+        batch,
+        atom_pos,
+        aatype_one_hot,
+    ):
+        out = {}
+
+        template_chi_angles, template_chi_mask = (
+            all_atom_multimer.compute_chi_angles(
+                atom_pos,
+                batch["template_all_atom_mask"],
+                batch["template_aatype"],
+            )
+        )
+
+        template_features = torch.cat(
+            [
+                aatype_one_hot,
+                torch.sin(template_chi_angles) * template_chi_mask,
+                torch.cos(template_chi_angles) * template_chi_mask,
+                template_chi_mask,
+            ],
+            dim=-1,
+        )
+
+        template_mask = template_chi_mask[..., 0]
+
+        template_activations = self.template_single_embedder(
+            template_features
+        )
+        template_activations = torch.nn.functional.relu(
+            template_activations
+        )
+        template_activations = self.template_projector(
+            template_activations,
+        )
+
+        out["template_single_embedding"] = (
+            template_activations
+        )
+        out["template_mask"] = template_mask
+
+        return out 
+
+
+class TemplateEmbedderMultimer(nn.Module):
+    def __init__(self, config):
+        super(TemplateEmbedderMultimer, self).__init__()
+        
+        self.config = config
+        self.template_pair_embedder = TemplatePairEmbedderMultimer(
+            **config["template_pair_embedder"],
+        )
+        self.template_single_embedder = TemplateSingleEmbedderMultimer(
+            **config["template_single_embedder"],
+        )
+        self.template_pair_stack = TemplatePairStack(
+            **config["template_pair_stack"],
+        )
+
+        self.linear_t = Linear(config.c_t, config.c_z)
+    
+    def forward(self, 
+        batch, 
+        z, 
+        padding_mask_2d, 
+        templ_dim,
+        chunk_size,
+        multichain_mask_2d,
+    ):
+        template_embeds = []
+        n_templ = batch["template_aatype"].shape[templ_dim]
+        for i in range(n_templ):
+            idx = batch["template_aatype"].new_tensor(i)
+            single_template_feats = tensor_tree_map(
+                lambda t: torch.index_select(t, templ_dim, idx),
+                batch,
+            )
+
+            single_template_embeds = {}
+            act = 0.
+
+            template_positions, pseudo_beta_mask = (
+                single_template_feats["template_pseudo_beta"],
+                single_template_feats["template_pseudo_beta_mask"],
+            )
+
+            template_dgram = dgram_from_positions(
+                template_positions,
+                inf=self.config.inf,
+                **self.config.distogram,
+            )
+
+            aatype_one_hot = torch.nn.functional.one_hot(
+                single_template_feats["template_aatype"], 22,
+            )
+            
+            raw_atom_pos = single_template_feats["template_all_atom_positions"]
+            
+            atom_pos = geometry.Vec3Array.from_array(raw_atom_pos)
+            rigid, backbone_mask = all_atom_multimer.make_backbone_affine(
+                atom_pos,
+                single_template_feats["template_all_atom_mask"],
+                single_template_feats["template_aatype"],
+            )
+            points = rigid.translation
+            rigid_vec = rigid[..., None].inverse().apply_to_point(points)
+            unit_vector = rigid_vec.normalized()
+
+            pair_act = self.template_pair_embedder(
+                template_dgram,
+                aatype_one_hot,
+                z,
+                pseudo_beta_mask,
+                backbone_mask,
+                multichain_mask_2d,
+                unit_vector,
+            )
+
+            single_template_embeds["template_pair_embedding"] = pair_act
+            single_template_embeds.update(
+                self.template_single_embedder(
+                    single_template_feats,
+                    atom_pos,
+                    aatype_one_hot,
+                )
+            )
+            template_embeds.append(single_template_embeds)
+
+        template_embeds = dict_multimap(
+            partial(torch.cat, dim=templ_dim),
+            template_embeds,
+        )
+
+        # [*, S_t, N, N, C_z]
+        t = self.template_pair_stack(
+            template_embeds["template_pair_embedding"], 
+            padding_mask_2d.unsqueeze(-3).to(dtype=z.dtype), 
+            chunk_size=chunk_size,
+            _mask_trans=False,
+        )
+        # [*, N, N, C_z]
+        t = torch.sum(t, dim=-4) / n_templ
+        t = torch.nn.functional.relu(t)
+        t = self.linear_t(t)
+        template_embeds["template_pair_embedding"] = t
+
+        return template_embeds
--- a/fastfold/model/nn/structure_module.py
+++ b/fastfold/model/nn/structure_module.py
@@ -16,7 +16,7 @@
 import math
 import torch
 import torch.nn as nn
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union

 from fastfold.model.nn.primitives import Linear, LayerNorm, ipa_point_weights_init_
 from fastfold.common.residue_constants import (
@@ -25,6 +25,9 @@ from fastfold.common.residue_constants import (
    restype_atom14_mask,
    restype_atom14_rigid_group_positions,
 )
+from fastfold.utils.geometry.quat_rigid import QuatRigid
+from fastfold.utils.geometry.rigid_matrix_vector import Rigid3Array
+from fastfold.utils.geometry.vector import Vec3Array
 from fastfold.utils.feats import (
    frames_and_literature_positions_to_atom14_pos,
    torsion_angles_to_frames,
@@ -150,11 +153,47 @@ class AngleResnet(nn.Module):

        return unnormalized_s, s

+class PointProjection(nn.Module):
+    def __init__(
+        self,
+        c_hidden: int,
+        num_points: int,
+        no_heads: int,
+        return_local_points: bool = False,
+    ):
+        super().__init__()
+        self.return_local_points = return_local_points
+        self.no_heads = no_heads
+
+        self.linear = Linear(c_hidden, no_heads * 3 * num_points)
+
+    def forward(
+        self,
+        activations: torch.Tensor,
+        rigids: Rigid3Array,
+    ) -> Union[Vec3Array, Tuple[Vec3Array, Vec3Array]]:
+        # TODO: Needs to run in high precision during training
+        points_local = self.linear(activations)
+        points_local = points_local.reshape(
+            *points_local.shape[:-1],
+            self.no_heads,
+            -1,
+        )
+        points_local = torch.split(points_local, points_local.shape[-1] // 3, dim=-1)
+        points_local = Vec3Array(*points_local)
+        points_global = rigids[..., None, None].apply_to_point(points_local)
+
+        if self.return_local_points:
+            return points_global, points_local
+
+        return points_global
+

 class InvariantPointAttention(nn.Module):
    """
    Implements Algorithm 22.
    """
+
    def __init__(
        self,
        c_s: int,
@@ -165,6 +204,7 @@ class InvariantPointAttention(nn.Module):
        no_v_points: int,
        inf: float = 1e5,
        eps: float = 1e-8,
+        is_multimer: bool = False,
    ):
        """
        Args:
@@ -191,23 +231,45 @@ class InvariantPointAttention(nn.Module):
        self.no_v_points = no_v_points
        self.inf = inf
        self.eps = eps
+        self.is_multimer = is_multimer

        # These linear layers differ from their specifications in the
        # supplement. There, they lack bias and use Glorot initialization.
        # Here as in the official source, they have bias and use the default
        # Lecun initialization.
-        hc = self.c_hidden * self.no_heads
-        self.linear_q = Linear(self.c_s, hc)
-        self.linear_kv = Linear(self.c_s, 2 * hc)
+        if not self.is_multimer:
+            hc = self.c_hidden * self.no_heads
+            self.linear_q = Linear(self.c_s, hc, bias=(not is_multimer))
+            self.linear_kv = Linear(self.c_s, 2 * hc)
+
+            hpq = self.no_heads * self.no_qk_points * 3
+            self.linear_q_points = Linear(self.c_s, hpq)

-        hpq = self.no_heads * self.no_qk_points * 3
-        self.linear_q_points = Linear(self.c_s, hpq)
+            hpkv = self.no_heads * (self.no_qk_points + self.no_v_points) * 3
+            self.linear_kv_points = Linear(self.c_s, hpkv)

-        hpkv = self.no_heads * (self.no_qk_points + self.no_v_points) * 3
-        self.linear_kv_points = Linear(self.c_s, hpkv)
+            # hpv = self.no_heads * self.no_v_points * 3

-        hpv = self.no_heads * self.no_v_points * 3
+        else:
+            hc = self.c_hidden * self.no_heads
+            self.linear_q = Linear(self.c_s, hc, bias=(not is_multimer))
+            self.linear_q_points = PointProjection(
+                self.c_s, self.no_qk_points, self.no_heads
+            )
+
+            self.linear_k = Linear(self.c_s, hc, bias=False)
+            self.linear_v = Linear(self.c_s, hc, bias=False)
+            self.linear_k_points = PointProjection(
+                self.c_s,
+                self.no_qk_points,
+                self.no_heads,
+            )

+            self.linear_v_points = PointProjection(
+                self.c_s,
+                self.no_v_points,
+                self.no_heads,
+            )
        self.linear_b = Linear(self.c_z, self.no_heads)

        self.head_weights = nn.Parameter(torch.zeros((no_heads)))
@@ -225,7 +287,7 @@ class InvariantPointAttention(nn.Module):
        self,
        s: torch.Tensor,
        z: torch.Tensor,
-        r: Rigid,
+        r: Union[Rigid, Rigid3Array],
        mask: torch.Tensor,
    ) -> torch.Tensor:
        """
@@ -244,48 +306,72 @@ class InvariantPointAttention(nn.Module):
        #######################################
        # Generate scalar and point activations
        #######################################
-        # [*, N_res, H * C_hidden]
-        q = self.linear_q(s)
-        kv = self.linear_kv(s)

-        # [*, N_res, H, C_hidden]
-        q = q.view(q.shape[:-1] + (self.no_heads, -1))
+        # The following two blocks are equivalent
+        # They're separated only to preserve compatibility with old AF weights
+        if self.is_multimer:
+            # [*, N_res, H * C_hidden]
+            q = self.linear_q(s)

-        # [*, N_res, H, 2 * C_hidden]
-        kv = kv.view(kv.shape[:-1] + (self.no_heads, -1))
+            # [*, N_res, H, C_hidden]
+            q = q.view(q.shape[:-1] + (self.no_heads, -1))

-        # [*, N_res, H, C_hidden]
-        k, v = torch.split(kv, self.c_hidden, dim=-1)
+            # [*, N_res, H, P_qk]
+            q_pts = self.linear_q_points(s, r)
+            # [*, N_res, H * C_hidden]
+            k = self.linear_k(s)
+            v = self.linear_v(s)

-        # [*, N_res, H * P_q * 3]
-        q_pts = self.linear_q_points(s)
+            # [*, N_res, H, C_hidden]
+            k = k.view(k.shape[:-1] + (self.no_heads, -1))
+            v = v.view(v.shape[:-1] + (self.no_heads, -1))

-        # This is kind of clunky, but it's how the original does it
-        # [*, N_res, H * P_q, 3]
-        q_pts = torch.split(q_pts, q_pts.shape[-1] // 3, dim=-1)
-        q_pts = torch.stack(q_pts, dim=-1)
-        q_pts = r[..., None].apply(q_pts)
+            # [*, N_res, H, P_qk, 3]
+            k_pts = self.linear_k_points(s, r)

-        # [*, N_res, H, P_q, 3]
-        q_pts = q_pts.view(
-            q_pts.shape[:-2] + (self.no_heads, self.no_qk_points, 3)
-        )
+            # [*, N_res, H, P_v, 3]
+            v_pts = self.linear_v_points(s, r)
+        else:
+            # [*, N_res, H * C_hidden]
+            q = self.linear_q(s)
+            kv = self.linear_kv(s)

-        # [*, N_res, H * (P_q + P_v) * 3]
-        kv_pts = self.linear_kv_points(s)
+            # [*, N_res, H, C_hidden]
+            q = q.view(q.shape[:-1] + (self.no_heads, -1))

-        # [*, N_res, H * (P_q + P_v), 3]
-        kv_pts = torch.split(kv_pts, kv_pts.shape[-1] // 3, dim=-1)
-        kv_pts = torch.stack(kv_pts, dim=-1)
-        kv_pts = r[..., None].apply(kv_pts)
+            # [*, N_res, H, 2 * C_hidden]
+            kv = kv.view(kv.shape[:-1] + (self.no_heads, -1))

-        # [*, N_res, H, (P_q + P_v), 3]
-        kv_pts = kv_pts.view(kv_pts.shape[:-2] + (self.no_heads, -1, 3))
+            # [*, N_res, H, C_hidden]
+            k, v = torch.split(kv, self.c_hidden, dim=-1)

-        # [*, N_res, H, P_q/P_v, 3]
-        k_pts, v_pts = torch.split(
-            kv_pts, [self.no_qk_points, self.no_v_points], dim=-2
-        )
+            # [*, N_res, H * P_q * 3]
+            q_pts = self.linear_q_points(s)
+
+            # This is kind of clunky, but it's how the original does it
+            # [*, N_res, H * P_q, 3]
+            q_pts = torch.split(q_pts, q_pts.shape[-1] // 3, dim=-1)
+            q_pts = torch.stack(q_pts, dim=-1)
+            q_pts = r[..., None].apply(q_pts)
+
+            # [*, N_res, H, P_q, 3]
+            q_pts = q_pts.view(q_pts.shape[:-2] + (self.no_heads, self.no_qk_points, 3))
+
+            # [*, N_res, H * (P_q + P_v) * 3]
+            kv_pts = self.linear_kv_points(s)
+
+            # [*, N_res, H * (P_q + P_v), 3]
+            kv_pts = torch.split(kv_pts, kv_pts.shape[-1] // 3, dim=-1)
+            kv_pts = torch.stack(kv_pts, dim=-1)
+            kv_pts = r[..., None].apply(kv_pts)
+
+            # [*, N_res, H, (P_q + P_v), 3]
+            kv_pts = kv_pts.view(kv_pts.shape[:-2] + (self.no_heads, -1, 3))
+
+            # [*, N_res, H, P_q/P_v, 3]
+            k_pts, v_pts = torch.split(
+                kv_pts, [self.no_qk_points, self.no_v_points], dim=-2
+            )

        ##########################
        # Compute attention scores
@@ -299,14 +385,20 @@ class InvariantPointAttention(nn.Module):
            permute_final_dims(k, (1, 2, 0)),  # [*, H, C_hidden, N_res]
        )
        a *= math.sqrt(1.0 / (3 * self.c_hidden))
-        a += (math.sqrt(1.0 / 3) * permute_final_dims(b, (2, 0, 1)))
-
-        # [*, N_res, N_res, H, P_q, 3]
-        pt_att = q_pts.unsqueeze(-4) - k_pts.unsqueeze(-5)
-        pt_att = pt_att ** 2
+        a += math.sqrt(1.0 / 3) * permute_final_dims(b, (2, 0, 1))
+
+        if self.is_multimer:
+            # [*, N_res, N_res, H, P_q, 3]
+            pt_att = q_pts[..., None, :, :] - k_pts[..., None, :, :, :]
+            # [*, N_res, N_res, H, P_q]
+            pt_att = sum([c**2 for c in pt_att])
+        else:
+            # [*, N_res, N_res, H, P_q, 3]
+            pt_att = q_pts.unsqueeze(-4) - k_pts.unsqueeze(-5)
+            pt_att = pt_att**2
+            # [*, N_res, N_res, H, P_q]
+            pt_att = sum(torch.unbind(pt_att, dim=-1))

-        # [*, N_res, N_res, H, P_q]
-        pt_att = sum(torch.unbind(pt_att, dim=-1))
        head_weights = self.softplus(self.head_weights).view(
            *((1,) * len(pt_att.shape[:-2]) + (-1, 1))
        )
@@ -323,7 +415,7 @@ class InvariantPointAttention(nn.Module):

        # [*, H, N_res, N_res]
        pt_att = permute_final_dims(pt_att, (2, 0, 1))
-        a = a + pt_att 
+        a = a + pt_att
        a = a + square_mask.unsqueeze(-3)
        a = self.softmax(a)

@@ -331,35 +423,47 @@ class InvariantPointAttention(nn.Module):
        # Compute output
        ################
        # [*, N_res, H, C_hidden]
-        o = torch.matmul(
-            a, v.transpose(-2, -3).to(dtype=a.dtype)
-        ).transpose(-2, -3)
+        o = torch.matmul(a, v.transpose(-2, -3).to(dtype=a.dtype)).transpose(-2, -3)

        # [*, N_res, H * C_hidden]
        o = flatten_final_dims(o, 2)

        # As DeepMind explains, this manual matmul ensures that the operation
        # happens in float32.
-        # [*, H, 3, N_res, P_v]
-        o_pt = torch.sum(
-            (
-                a[..., None, :, :, None]
-                * permute_final_dims(v_pts, (1, 3, 0, 2))[..., None, :, :]
-            ),
-            dim=-2,
-        )
+        if self.is_multimer:
+            # [*, N_res, H, P_v]
+            o_pt = v_pts * permute_final_dims(a, (1, 2, 0)).unsqueeze(-1)
+            o_pt = o_pt.sum(dim=-3)
+
+            # [*, N_res, H, P_v]
+            o_pt = r[..., None, None].apply_inverse_to_point(o_pt)
+
+            # [*, N_res, H * P_v, 3]
+            o_pt = o_pt.reshape(o_pt.shape[:-2] + (-1,))
+
+            # [*, N_res, H * P_v]
+            o_pt_norm = o_pt.norm(self.eps)
+        else:
+            # [*, H, 3, N_res, P_v]
+            o_pt = torch.sum(
+                (
+                    a[..., None, :, :, None]
+                    * permute_final_dims(v_pts, (1, 3, 0, 2))[..., None, :, :]
+                ),
+                dim=-2,
+            )

-        # [*, N_res, H, P_v, 3]
-        o_pt = permute_final_dims(o_pt, (2, 0, 3, 1))
-        o_pt = r[..., None, None].invert_apply(o_pt)
+            # [*, N_res, H, P_v, 3]
+            o_pt = permute_final_dims(o_pt, (2, 0, 3, 1))
+            o_pt = r[..., None, None].invert_apply(o_pt)

-        # [*, N_res, H * P_v]
-        o_pt_norm = flatten_final_dims(
-            torch.sqrt(torch.sum(o_pt ** 2, dim=-1) + self.eps), 2
-        )
+            # [*, N_res, H * P_v]
+            o_pt_norm = flatten_final_dims(
+                torch.sqrt(torch.sum(o_pt**2, dim=-1) + self.eps), 2
+            )

-        # [*, N_res, H * P_v, 3]
-        o_pt = o_pt.reshape(*o_pt.shape[:-3], -1, 3)
+            # [*, N_res, H * P_v, 3]
+            o_pt = o_pt.reshape(*o_pt.shape[:-3], -1, 3)

        # [*, N_res, H, C_z]
        o_pair = torch.matmul(a.transpose(-2, -3), z.to(dtype=a.dtype))
@@ -368,11 +472,16 @@ class InvariantPointAttention(nn.Module):
        o_pair = flatten_final_dims(o_pair, 2)

        # [*, N_res, C_s]
-        s = self.linear_out(
-            torch.cat(
-                (o, *torch.unbind(o_pt, dim=-1), o_pt_norm, o_pair), dim=-1
-            ).to(dtype=z.dtype)
-        )
+        if self.is_multimer:
+            s = self.linear_out(
+                torch.cat((o, *o_pt, o_pt_norm, o_pair), dim=-1).to(dtype=z.dtype)
+            )
+        else:
+            s = self.linear_out(
+                torch.cat(
+                    (o, *torch.unbind(o_pt, dim=-1), o_pt_norm, o_pair), dim=-1
+                ).to(dtype=z.dtype)
+            )

        return s

@@ -476,6 +585,7 @@ class StructureModule(nn.Module):
        trans_scale_factor,
        epsilon,
        inf,
+        is_multimer=False,
        **kwargs,
    ):
        """
@@ -529,6 +639,7 @@ class StructureModule(nn.Module):
        self.trans_scale_factor = trans_scale_factor
        self.epsilon = epsilon
        self.inf = inf
+        self.is_multimer = is_multimer

        # To be lazily initialized later
        self.default_frames = None
@@ -550,6 +661,7 @@ class StructureModule(nn.Module):
            self.no_v_points,
            inf=self.inf,
            eps=self.epsilon,
+            is_multimer=self.is_multimer,
        )

        self.ipa_dropout = nn.Dropout(self.dropout_rate)

--- a/fastfold/np/__init__.py
+++ b/fastfold/np/__init__.py
+import os
+import glob
+import importlib as importlib
+
+_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
+__all__ = [
+    os.path.basename(f)[:-3]
+    for f in _files
+    if os.path.isfile(f) and not f.endswith("__init__.py")
+]
+_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
+for _m in _modules:
+    globals()[_m[0]] = _m[1]
+
+# Avoid needlessly cluttering the global namespace
+del _files, _m, _modules
--- a/fastfold/np/protein.py
+++ b/fastfold/np/protein.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Protein data type."""
+import dataclasses
+import io
+from typing import Any, Mapping, Optional
+import re
+
+from fastfold.np import residue_constants
+from Bio.PDB import PDBParser
+import numpy as np
+
+
+FeatureDict = Mapping[str, np.ndarray]
+ModelOutput = Mapping[str, Any]  # Is a nested dict.
+PICO_TO_ANGSTROM = 0.01
+
+PDB_CHAIN_IDS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+PDB_MAX_CHAINS = len(PDB_CHAIN_IDS)
+assert(PDB_MAX_CHAINS == 62)
+
+
+@dataclasses.dataclass(frozen=True)
+class Protein:
+    """Protein structure representation."""
+
+    # Cartesian coordinates of atoms in angstroms. The atom types correspond to
+    # residue_constants.atom_types, i.e. the first three are N, CA, CB.
+    atom_positions: np.ndarray  # [num_res, num_atom_type, 3]
+
+    # Amino-acid type for each residue represented as an integer between 0 and
+    # 20, where 20 is 'X'.
+    aatype: np.ndarray  # [num_res]
+
+    # Binary float mask to indicate presence of a particular atom. 1.0 if an atom
+    # is present and 0.0 if not. This should be used for loss masking.
+    atom_mask: np.ndarray  # [num_res, num_atom_type]
+
+    # Residue index as used in PDB. It is not necessarily continuous or 0-indexed.
+    residue_index: np.ndarray  # [num_res]
+    
+    # 0-indexed number corresponding to the chain in the protein that this 
+    # residue belongs to
+    chain_index: np.ndarray # [num_res]
+
+    # B-factors, or temperature factors, of each residue (in sq. angstroms units),
+    # representing the displacement of the residue from its ground truth mean
+    # value.
+    b_factors: np.ndarray  # [num_res, num_atom_type]
+
+    def __post_init__(self):
+        if(len(np.unique(self.chain_index)) > PDB_MAX_CHAINS):
+            raise ValueError(
+                f"Cannot build an instance with more than {PDB_MAX_CHAINS} "
+                "chains because these cannot be written to PDB format"
+            )
+
+
+def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
+    """Takes a PDB string and constructs a Protein object.
+
+    WARNING: All non-standard residue types will be converted into UNK. All
+      non-standard atoms will be ignored.
+
+    Args:
+      pdb_str: The contents of the pdb file
+      chain_id: If chain_id is specified (e.g. A), then only that chain is 
+      parsed. Else, all chains are parsed.
+
+    Returns:
+      A new `Protein` parsed from the pdb contents.
+    """
+    pdb_fh = io.StringIO(pdb_str)
+    parser = PDBParser(QUIET=True)
+    structure = parser.get_structure("none", pdb_fh)
+    models = list(structure.get_models())
+    if len(models) != 1:
+        raise ValueError(
+            f"Only single model PDBs are supported. Found {len(models)} models."
+        )
+    model = models[0]
+
+    atom_positions = []
+    aatype = []
+    atom_mask = []
+    residue_index = []
+    chain_ids = []
+    b_factors = []
+
+    for chain in model:
+        if(chain_id is not None and chain.id != chain_id):
+            continue
+
+
+        for res in chain:
+            if res.id[2] != " ":
+                raise ValueError(
+                    f"PDB contains an insertion code at chain {chain.id} and residue "
+                    f"index {res.id[1]}. These are not supported."
+                )
+            res_shortname = residue_constants.restype_3to1.get(res.resname, "X")
+            restype_idx = residue_constants.restype_order.get(
+                res_shortname, residue_constants.restype_num
+            )
+            pos = np.zeros((residue_constants.atom_type_num, 3))
+            mask = np.zeros((residue_constants.atom_type_num,))
+            res_b_factors = np.zeros((residue_constants.atom_type_num,))
+            for atom in res:
+                if atom.name not in residue_constants.atom_types:
+                    continue
+                pos[residue_constants.atom_order[atom.name]] = atom.coord
+                mask[residue_constants.atom_order[atom.name]] = 1.0
+                res_b_factors[
+                    residue_constants.atom_order[atom.name]
+                ] = atom.bfactor
+            if np.sum(mask) < 0.5:
+                # If no known atom positions are reported for the residue then skip it.
+                continue
+            
+            aatype.append(restype_idx)
+            atom_positions.append(pos)
+            atom_mask.append(mask)
+            residue_index.append(res.id[1])
+            chain_ids.append(chain.id)
+            b_factors.append(res_b_factors)
+
+    # Chain IDs are usually characters so map these to ints
+    unique_chain_ids = np.unique(chain_ids)
+    chain_id_mapping = {cid: n for n, cid in enumerate(unique_chain_ids)}
+    chain_index = np.array([chain_id_mapping[cid] for cid in chain_ids])
+
+    return Protein(
+        atom_positions=np.array(atom_positions),
+        atom_mask=np.array(atom_mask),
+        aatype=np.array(aatype),
+        residue_index=np.array(residue_index),
+        chain_index=chain_index,
+        b_factors=np.array(b_factors),
+    )
+
+
+def from_proteinnet_string(proteinnet_str: str) -> Protein:
+    tag_re = r'(\[[A-Z]+\]\n)'
+    tags = [
+        tag.strip() for tag in re.split(tag_re, proteinnet_str) if len(tag) > 0
+    ]
+    groups = zip(tags[0::2], [l.split('\n') for l in tags[1::2]])
+   
+    atoms = ['N', 'CA', 'C']
+    aatype = None
+    atom_positions = None
+    atom_mask = None
+    for g in groups:
+        if("[PRIMARY]" == g[0]):
+            seq = g[1][0].strip()
+            for i in range(len(seq)):
+                if(seq[i] not in residue_constants.restypes):
+                    seq[i] = 'X'
+            aatype = np.array([
+                residue_constants.restype_order.get(
+                    res_symbol, residue_constants.restype_num
+                ) for res_symbol in seq
+            ])
+        elif("[TERTIARY]" == g[0]):
+            tertiary = []
+            for axis in range(3):
+                tertiary.append(list(map(float, g[1][axis].split())))
+            tertiary_np = np.array(tertiary)
+            atom_positions = np.zeros(
+                (len(tertiary[0])//3, residue_constants.atom_type_num, 3)
+            ).astype(np.float32)
+            for i, atom in enumerate(atoms):
+                atom_positions[:, residue_constants.atom_order[atom], :] = (
+                    np.transpose(tertiary_np[:, i::3])
+                )
+            atom_positions *= PICO_TO_ANGSTROM
+        elif("[MASK]" == g[0]):
+            mask = np.array(list(map({'-': 0, '+': 1}.get, g[1][0].strip())))
+            atom_mask = np.zeros(
+                (len(mask), residue_constants.atom_type_num,)
+            ).astype(np.float32)
+            for i, atom in enumerate(atoms):
+                atom_mask[:, residue_constants.atom_order[atom]] = 1
+            atom_mask *= mask[..., None]
+
+    return Protein(
+        atom_positions=atom_positions,
+        atom_mask=atom_mask,
+        aatype=aatype,
+        residue_index=np.arange(len(aatype)),
+        b_factors=None,
+    )
+
+
+def _chain_end(atom_index, end_resname, chain_name, residue_index) -> str:
+    chain_end = 'TER'
+    return(
+        f'{chain_end:<6}{atom_index:>5}      {end_resname:>3} '
+        f'{chain_name:>1}{residue_index:>4}'
+    )
+
+
+def to_pdb(prot: Protein) -> str:
+    """Converts a `Protein` instance to a PDB string.
+
+    Args:
+      prot: The protein to convert to PDB.
+
+    Returns:
+      PDB string.
+    """
+    restypes = residue_constants.restypes + ["X"]
+    res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], "UNK")
+    atom_types = residue_constants.atom_types
+
+    pdb_lines = []
+
+    atom_mask = prot.atom_mask
+    aatype = prot.aatype
+    atom_positions = prot.atom_positions
+    residue_index = prot.residue_index.astype(np.int32)
+    chain_index = prot.chain_index.astype(np.int32)
+    b_factors = prot.b_factors
+
+    if np.any(aatype > residue_constants.restype_num):
+        raise ValueError("Invalid aatypes.")
+
+    # Construct a mapping from chain integer indices to chain ID strings.
+    chain_ids = {}
+    for i in np.unique(chain_index): # np.unique gives sorted output.
+        if i >= PDB_MAX_CHAINS:
+            raise ValueError(
+                f"The PDB format supports at most {PDB_MAX_CHAINS} chains."
+            )
+        chain_ids[i] = PDB_CHAIN_IDS[i]
+
+    pdb_lines.append("MODEL     1")
+    atom_index = 1
+    last_chain_index = chain_index[0]
+    # Add all atom sites.
+    for i in range(aatype.shape[0]):
+        # Close the previous chain if in a multichain PDB.
+        if last_chain_index != chain_index[i]:
+            pdb_lines.append(
+                _chain_end(
+                    atom_index, 
+                    res_1to3(aatype[i - 1]), 
+                    chain_ids[chain_index[i - 1]], 
+                    residue_index[i - 1]
+                )
+            )
+            last_chain_index = chain_index[i]
+            atom_index += 1 # Atom index increases at the TER symbol.
+
+        res_name_3 = res_1to3(aatype[i])
+        for atom_name, pos, mask, b_factor in zip(
+            atom_types, atom_positions[i], atom_mask[i], b_factors[i]
+        ):
+            if mask < 0.5:
+                continue
+
+            record_type = "ATOM"
+            name = atom_name if len(atom_name) == 4 else f" {atom_name}"
+            alt_loc = ""
+            insertion_code = ""
+            occupancy = 1.00
+            element = atom_name[
+                0
+            ]  # Protein supports only C, N, O, S, this works.
+            charge = ""
+            # PDB is a columnar format, every space matters here!
+            atom_line = (
+                f"{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}"
+                f"{res_name_3:>3} {chain_ids[chain_index[i]]:>1}"
+                f"{residue_index[i]:>4}{insertion_code:>1}   "
+                f"{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}"
+                f"{occupancy:>6.2f}{b_factor:>6.2f}          "
+                f"{element:>2}{charge:>2}"
+            )
+            pdb_lines.append(atom_line)
+            atom_index += 1
+
+    # Close the final chain.
+    pdb_lines.append(
+        _chain_end(
+            atom_index, 
+            res_1to3(aatype[-1]), 
+            chain_ids[chain_index[-1]], 
+            residue_index[-1]
+        )
+    )
+    
+    pdb_lines.append("ENDMDL")
+    pdb_lines.append("END")
+
+    # Pad all lines to 80 characters
+    pdb_lines = [line.ljust(80) for line in pdb_lines]
+    return '\n'.join(pdb_lines) + '\n' # Add terminating newline.
+
+
+def ideal_atom_mask(prot: Protein) -> np.ndarray:
+    """Computes an ideal atom mask.
+
+    `Protein.atom_mask` typically is defined according to the atoms that are
+    reported in the PDB. This function computes a mask according to heavy atoms
+    that should be present in the given sequence of amino acids.
+
+    Args:
+      prot: `Protein` whose fields are `numpy.ndarray` objects.
+
+    Returns:
+      An ideal atom mask.
+    """
+    return residue_constants.STANDARD_ATOM_MASK[prot.aatype]
+
+
+def from_prediction(
+    features: FeatureDict,
+    result: ModelOutput,
+    b_factors: Optional[np.ndarray] = None,
+    remove_leading_feature_dimension: bool = True,
+) -> Protein:
+    """Assembles a protein from a prediction.
+
+    Args:
+      features: Dictionary holding model inputs.
+      result: Dictionary holding model outputs.
+      b_factors: (Optional) B-factors to use for the protein.
+      remove_leading_feature_dimension: Whether to remove the leading dimension 
+        of the `features` values
+
+    Returns:
+      A protein instance.
+    """
+    def _maybe_remove_leading_dim(arr: np.ndarray) -> np.ndarray:
+        return arr[0] if remove_leading_feature_dimension else arr
+
+    if 'asym_id' in features:
+        chain_index = _maybe_remove_leading_dim(features["asym_id"])
+    else:
+        chain_index = np.zeros_like(
+            _maybe_remove_leading_dim(features["aatype"])
+        )
+
+    if b_factors is None:
+        b_factors = np.zeros_like(result["final_atom_mask"])
+
+    return Protein(
+        aatype=_maybe_remove_leading_dim(features["aatype"]),
+        atom_positions=result["final_atom_positions"],
+        atom_mask=result["final_atom_mask"],
+        residue_index=_maybe_remove_leading_dim(features["residue_index"]) + 1,
+        chain_index=chain_index,
+        b_factors=b_factors,
+    )
--- a/fastfold/np/relax/__init__.py
+++ b/fastfold/np/relax/__init__.py
+import os
+import glob
+import importlib as importlib
+
+_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
+__all__ = [
+    os.path.basename(f)[:-3]
+    for f in _files
+    if os.path.isfile(f) and not f.endswith("__init__.py")
+]
+_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
+for _m in _modules:
+    globals()[_m[0]] = _m[1]
+
+# Avoid needlessly cluttering the global namespace
+del _files, _m, _modules
--- a/fastfold/np/relax/amber_minimize.py
+++ b/fastfold/np/relax/amber_minimize.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Restrained Amber Minimization of a structure."""
+
+import io
+import time
+from typing import Collection, Optional, Sequence
+
+from absl import logging
+from openfold.np import (
+    protein,
+    residue_constants,
+)
+import openfold.utils.loss as loss
+from openfold.np.relax import cleanup, utils
+import ml_collections
+import numpy as np
+from simtk import openmm
+from simtk import unit
+from simtk.openmm import app as openmm_app
+from simtk.openmm.app.internal.pdbstructure import PdbStructure
+
+ENERGY = unit.kilocalories_per_mole
+LENGTH = unit.angstroms
+
+
+def will_restrain(atom: openmm_app.Atom, rset: str) -> bool:
+    """Returns True if the atom will be restrained by the given restraint set."""
+
+    if rset == "non_hydrogen":
+        return atom.element.name != "hydrogen"
+    elif rset == "c_alpha":
+        return atom.name == "CA"
+
+
+def _add_restraints(
+    system: openmm.System,
+    reference_pdb: openmm_app.PDBFile,
+    stiffness: unit.Unit,
+    rset: str,
+    exclude_residues: Sequence[int],
+):
+    """Adds a harmonic potential that restrains the system to a structure."""
+    assert rset in ["non_hydrogen", "c_alpha"]
+
+    force = openmm.CustomExternalForce(
+        "0.5 * k * ((x-x0)^2 + (y-y0)^2 + (z-z0)^2)"
+    )
+    force.addGlobalParameter("k", stiffness)
+    for p in ["x0", "y0", "z0"]:
+        force.addPerParticleParameter(p)
+
+    for i, atom in enumerate(reference_pdb.topology.atoms()):
+        if atom.residue.index in exclude_residues:
+            continue
+        if will_restrain(atom, rset):
+            force.addParticle(i, reference_pdb.positions[i])
+    logging.info(
+        "Restraining %d / %d particles.",
+        force.getNumParticles(),
+        system.getNumParticles(),
+    )
+    system.addForce(force)
+
+
+def _openmm_minimize(
+    pdb_str: str,
+    max_iterations: int,
+    tolerance: unit.Unit,
+    stiffness: unit.Unit,
+    restraint_set: str,
+    exclude_residues: Sequence[int],
+    use_gpu: bool,
+):
+    """Minimize energy via openmm."""
+
+    pdb_file = io.StringIO(pdb_str)
+    pdb = openmm_app.PDBFile(pdb_file)
+
+    force_field = openmm_app.ForceField("amber99sb.xml")
+    constraints = openmm_app.HBonds
+    system = force_field.createSystem(pdb.topology, constraints=constraints)
+    if stiffness > 0 * ENERGY / (LENGTH ** 2):
+        _add_restraints(system, pdb, stiffness, restraint_set, exclude_residues)
+
+    integrator = openmm.LangevinIntegrator(0, 0.01, 0.0)
+    platform = openmm.Platform.getPlatformByName("CUDA" if use_gpu else "CPU")
+    simulation = openmm_app.Simulation(
+        pdb.topology, system, integrator, platform
+    )
+    simulation.context.setPositions(pdb.positions)
+
+    ret = {}
+    state = simulation.context.getState(getEnergy=True, getPositions=True)
+    ret["einit"] = state.getPotentialEnergy().value_in_unit(ENERGY)
+    ret["posinit"] = state.getPositions(asNumpy=True).value_in_unit(LENGTH)
+    simulation.minimizeEnergy(maxIterations=max_iterations, tolerance=tolerance)
+    state = simulation.context.getState(getEnergy=True, getPositions=True)
+    ret["efinal"] = state.getPotentialEnergy().value_in_unit(ENERGY)
+    ret["pos"] = state.getPositions(asNumpy=True).value_in_unit(LENGTH)
+    ret["min_pdb"] = _get_pdb_string(simulation.topology, state.getPositions())
+    return ret
+
+
+def _get_pdb_string(topology: openmm_app.Topology, positions: unit.Quantity):
+    """Returns a pdb string provided OpenMM topology and positions."""
+    with io.StringIO() as f:
+        openmm_app.PDBFile.writeFile(topology, positions, f)
+        return f.getvalue()
+
+
+def _check_cleaned_atoms(pdb_cleaned_string: str, pdb_ref_string: str):
+    """Checks that no atom positions have been altered by cleaning."""
+    cleaned = openmm_app.PDBFile(io.StringIO(pdb_cleaned_string))
+    reference = openmm_app.PDBFile(io.StringIO(pdb_ref_string))
+
+    cl_xyz = np.array(cleaned.getPositions().value_in_unit(LENGTH))
+    ref_xyz = np.array(reference.getPositions().value_in_unit(LENGTH))
+
+    for ref_res, cl_res in zip(
+        reference.topology.residues(), cleaned.topology.residues()
+    ):
+        assert ref_res.name == cl_res.name
+        for rat in ref_res.atoms():
+            for cat in cl_res.atoms():
+                if cat.name == rat.name:
+                    if not np.array_equal(
+                        cl_xyz[cat.index], ref_xyz[rat.index]
+                    ):
+                        raise ValueError(
+                            f"Coordinates of cleaned atom {cat} do not match "
+                            f"coordinates of reference atom {rat}."
+                        )
+
+
+def _check_residues_are_well_defined(prot: protein.Protein):
+    """Checks that all residues contain non-empty atom sets."""
+    if (prot.atom_mask.sum(axis=-1) == 0).any():
+        raise ValueError(
+            "Amber minimization can only be performed on proteins with"
+            " well-defined residues. This protein contains at least"
+            " one residue with no atoms."
+        )
+
+
+def _check_atom_mask_is_ideal(prot):
+    """Sanity-check the atom mask is ideal, up to a possible OXT."""
+    atom_mask = prot.atom_mask
+    ideal_atom_mask = protein.ideal_atom_mask(prot)
+    utils.assert_equal_nonterminal_atom_types(atom_mask, ideal_atom_mask)
+
+
+def clean_protein(prot: protein.Protein, checks: bool = True):
+    """Adds missing atoms to Protein instance.
+
+    Args:
+      prot: A `protein.Protein` instance.
+      checks: A `bool` specifying whether to add additional checks to the cleaning
+        process.
+
+    Returns:
+      pdb_string: A string of the cleaned protein.
+    """
+    _check_atom_mask_is_ideal(prot)
+
+    # Clean pdb.
+    prot_pdb_string = protein.to_pdb(prot)
+    pdb_file = io.StringIO(prot_pdb_string)
+    alterations_info = {}
+    fixed_pdb = cleanup.fix_pdb(pdb_file, alterations_info)
+    fixed_pdb_file = io.StringIO(fixed_pdb)
+    pdb_structure = PdbStructure(fixed_pdb_file)
+    cleanup.clean_structure(pdb_structure, alterations_info)
+
+    logging.info("alterations info: %s", alterations_info)
+
+    # Write pdb file of cleaned structure.
+    as_file = openmm_app.PDBFile(pdb_structure)
+    pdb_string = _get_pdb_string(as_file.getTopology(), as_file.getPositions())
+    if checks:
+        _check_cleaned_atoms(pdb_string, prot_pdb_string)
+    return pdb_string
+
+
+def make_atom14_positions(prot):
+    """Constructs denser atom positions (14 dimensions instead of 37)."""
+    restype_atom14_to_atom37 = []  # mapping (restype, atom14) --> atom37
+    restype_atom37_to_atom14 = []  # mapping (restype, atom37) --> atom14
+    restype_atom14_mask = []
+
+    for rt in residue_constants.restypes:
+        atom_names = residue_constants.restype_name_to_atom14_names[
+            residue_constants.restype_1to3[rt]
+        ]
+
+        restype_atom14_to_atom37.append(
+            [
+                (residue_constants.atom_order[name] if name else 0)
+                for name in atom_names
+            ]
+        )
+
+        atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
+        restype_atom37_to_atom14.append(
+            [
+                (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
+                for name in residue_constants.atom_types
+            ]
+        )
+
+        restype_atom14_mask.append(
+            [(1.0 if name else 0.0) for name in atom_names]
+        )
+
+    # Add dummy mapping for restype 'UNK'.
+    restype_atom14_to_atom37.append([0] * 14)
+    restype_atom37_to_atom14.append([0] * 37)
+    restype_atom14_mask.append([0.0] * 14)
+
+    restype_atom14_to_atom37 = np.array(
+        restype_atom14_to_atom37, dtype=np.int32
+    )
+    restype_atom37_to_atom14 = np.array(
+        restype_atom37_to_atom14, dtype=np.int32
+    )
+    restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
+
+    # Create the mapping for (residx, atom14) --> atom37, i.e. an array
+    # with shape (num_res, 14) containing the atom37 indices for this protein.
+    residx_atom14_to_atom37 = restype_atom14_to_atom37[prot["aatype"]]
+    residx_atom14_mask = restype_atom14_mask[prot["aatype"]]
+
+    # Create a mask for known ground truth positions.
+    residx_atom14_gt_mask = residx_atom14_mask * np.take_along_axis(
+        prot["all_atom_mask"], residx_atom14_to_atom37, axis=1
+    ).astype(np.float32)
+
+    # Gather the ground truth positions.
+    residx_atom14_gt_positions = residx_atom14_gt_mask[:, :, None] * (
+        np.take_along_axis(
+            prot["all_atom_positions"],
+            residx_atom14_to_atom37[..., None],
+            axis=1,
+        )
+    )
+
+    prot["atom14_atom_exists"] = residx_atom14_mask
+    prot["atom14_gt_exists"] = residx_atom14_gt_mask
+    prot["atom14_gt_positions"] = residx_atom14_gt_positions
+
+    prot["residx_atom14_to_atom37"] = residx_atom14_to_atom37.astype(np.int64)
+
+    # Create the gather indices for mapping back.
+    residx_atom37_to_atom14 = restype_atom37_to_atom14[prot["aatype"]]
+    prot["residx_atom37_to_atom14"] = residx_atom37_to_atom14.astype(np.int64)
+
+    # Create the corresponding mask.
+    restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
+    for restype, restype_letter in enumerate(residue_constants.restypes):
+        restype_name = residue_constants.restype_1to3[restype_letter]
+        atom_names = residue_constants.residue_atoms[restype_name]
+        for atom_name in atom_names:
+            atom_type = residue_constants.atom_order[atom_name]
+            restype_atom37_mask[restype, atom_type] = 1
+
+    residx_atom37_mask = restype_atom37_mask[prot["aatype"]]
+    prot["atom37_atom_exists"] = residx_atom37_mask
+
+    # As the atom naming is ambiguous for 7 of the 20 amino acids, provide
+    # alternative ground truth coordinates where the naming is swapped
+    restype_3 = [
+        residue_constants.restype_1to3[res]
+        for res in residue_constants.restypes
+    ]
+    restype_3 += ["UNK"]
+
+    # Matrices for renaming ambiguous atoms.
+    all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3}
+    for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
+        correspondences = np.arange(14)
+        for source_atom_swap, target_atom_swap in swap.items():
+            source_index = residue_constants.restype_name_to_atom14_names[
+                resname
+            ].index(source_atom_swap)
+            target_index = residue_constants.restype_name_to_atom14_names[
+                resname
+            ].index(target_atom_swap)
+            correspondences[source_index] = target_index
+            correspondences[target_index] = source_index
+            renaming_matrix = np.zeros((14, 14), dtype=np.float32)
+            for index, correspondence in enumerate(correspondences):
+                renaming_matrix[index, correspondence] = 1.0
+        all_matrices[resname] = renaming_matrix.astype(np.float32)
+    renaming_matrices = np.stack(
+        [all_matrices[restype] for restype in restype_3]
+    )
+
+    # Pick the transformation matrices for the given residue sequence
+    # shape (num_res, 14, 14).
+    renaming_transform = renaming_matrices[prot["aatype"]]
+
+    # Apply it to the ground truth positions. shape (num_res, 14, 3).
+    alternative_gt_positions = np.einsum(
+        "rac,rab->rbc", residx_atom14_gt_positions, renaming_transform
+    )
+    prot["atom14_alt_gt_positions"] = alternative_gt_positions
+
+    # Create the mask for the alternative ground truth (differs from the
+    # ground truth mask, if only one of the atoms in an ambiguous pair has a
+    # ground truth position).
+    alternative_gt_mask = np.einsum(
+        "ra,rab->rb", residx_atom14_gt_mask, renaming_transform
+    )
+
+    prot["atom14_alt_gt_exists"] = alternative_gt_mask
+
+    # Create an ambiguous atoms mask.  shape: (21, 14).
+    restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32)
+    for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
+        for atom_name1, atom_name2 in swap.items():
+            restype = residue_constants.restype_order[
+                residue_constants.restype_3to1[resname]
+            ]
+            atom_idx1 = residue_constants.restype_name_to_atom14_names[
+                resname
+            ].index(atom_name1)
+            atom_idx2 = residue_constants.restype_name_to_atom14_names[
+                resname
+            ].index(atom_name2)
+            restype_atom14_is_ambiguous[restype, atom_idx1] = 1
+            restype_atom14_is_ambiguous[restype, atom_idx2] = 1
+
+    # From this create an ambiguous_mask for the given sequence.
+    prot["atom14_atom_is_ambiguous"] = restype_atom14_is_ambiguous[
+        prot["aatype"]
+    ]
+
+    return prot
+
+
+def find_violations(prot_np: protein.Protein):
+    """Analyzes a protein and returns structural violation information.
+
+    Args:
+      prot_np: A protein.
+
+    Returns:
+      violations: A `dict` of structure components with structural violations.
+      violation_metrics: A `dict` of violation metrics.
+    """
+    batch = {
+        "aatype": prot_np.aatype,
+        "all_atom_positions": prot_np.atom_positions.astype(np.float32),
+        "all_atom_mask": prot_np.atom_mask.astype(np.float32),
+        "residue_index": prot_np.residue_index,
+    }
+
+    batch["seq_mask"] = np.ones_like(batch["aatype"], np.float32)
+    batch = make_atom14_positions(batch)
+
+    violations = loss.find_structural_violations_np(
+        batch=batch,
+        atom14_pred_positions=batch["atom14_gt_positions"],
+        config=ml_collections.ConfigDict(
+            {
+                "violation_tolerance_factor": 12,  # Taken from model config.
+                "clash_overlap_tolerance": 1.5,  # Taken from model config.
+            }
+        ),
+    )
+    violation_metrics = loss.compute_violation_metrics_np(
+        batch=batch,
+        atom14_pred_positions=batch["atom14_gt_positions"],
+        violations=violations,
+    )
+
+    return violations, violation_metrics
+
+
+def get_violation_metrics(prot: protein.Protein):
+    """Computes violation and alignment metrics."""
+    structural_violations, struct_metrics = find_violations(prot)
+    violation_idx = np.flatnonzero(
+        structural_violations["total_per_residue_violations_mask"]
+    )
+
+    struct_metrics["residue_violations"] = violation_idx
+    struct_metrics["num_residue_violations"] = len(violation_idx)
+    struct_metrics["structural_violations"] = structural_violations
+    return struct_metrics
+
+
+def _run_one_iteration(
+    *,
+    pdb_string: str,
+    max_iterations: int,
+    tolerance: float,
+    stiffness: float,
+    restraint_set: str,
+    max_attempts: int,
+    exclude_residues: Optional[Collection[int]] = None,
+    use_gpu: bool,
+):
+    """Runs the minimization pipeline.
+
+    Args:
+      pdb_string: A pdb string.
+      max_iterations: An `int` specifying the maximum number of L-BFGS iterations.
+      A value of 0 specifies no limit.
+      tolerance: kcal/mol, the energy tolerance of L-BFGS.
+      stiffness: kcal/mol A**2, spring constant of heavy atom restraining
+        potential.
+      restraint_set: The set of atoms to restrain.
+      max_attempts: The maximum number of minimization attempts.
+      exclude_residues: An optional list of zero-indexed residues to exclude from
+          restraints.
+      use_gpu: Whether to run relaxation on GPU
+    Returns:
+      A `dict` of minimization info.
+    """
+    exclude_residues = exclude_residues or []
+
+    # Assign physical dimensions.
+    tolerance = tolerance * ENERGY
+    stiffness = stiffness * ENERGY / (LENGTH ** 2)
+
+    start = time.perf_counter()
+    minimized = False
+    attempts = 0
+    while not minimized and attempts < max_attempts:
+        attempts += 1
+        try:
+            logging.info(
+                "Minimizing protein, attempt %d of %d.", attempts, max_attempts
+            )
+            ret = _openmm_minimize(
+                pdb_string,
+                max_iterations=max_iterations,
+                tolerance=tolerance,
+                stiffness=stiffness,
+                restraint_set=restraint_set,
+                exclude_residues=exclude_residues,
+                use_gpu=use_gpu,
+            )
+            minimized = True
+        except Exception as e:  # pylint: disable=broad-except
+            print(e)
+            logging.info(e)
+    if not minimized:
+        raise ValueError(f"Minimization failed after {max_attempts} attempts.")
+    ret["opt_time"] = time.perf_counter() - start
+    ret["min_attempts"] = attempts
+    return ret
+
+
+def run_pipeline(
+    prot: protein.Protein,
+    stiffness: float,
+    use_gpu: bool,
+    max_outer_iterations: int = 1,
+    place_hydrogens_every_iteration: bool = True,
+    max_iterations: int = 0,
+    tolerance: float = 2.39,
+    restraint_set: str = "non_hydrogen",
+    max_attempts: int = 100,
+    checks: bool = True,
+    exclude_residues: Optional[Sequence[int]] = None,
+):
+    """Run iterative amber relax.
+
+    Successive relax iterations are performed until all violations have been
+    resolved. Each iteration involves a restrained Amber minimization, with
+    restraint exclusions determined by violation-participating residues.
+
+    Args:
+      prot: A protein to be relaxed.
+      stiffness: kcal/mol A**2, the restraint stiffness.
+      use_gpu: Whether to run on GPU
+      max_outer_iterations: The maximum number of iterative minimization.
+      place_hydrogens_every_iteration: Whether hydrogens are re-initialized
+          prior to every minimization.
+      max_iterations: An `int` specifying the maximum number of L-BFGS steps
+          per relax iteration. A value of 0 specifies no limit.
+      tolerance: kcal/mol, the energy tolerance of L-BFGS.
+          The default value is the OpenMM default.
+      restraint_set: The set of atoms to restrain.
+      max_attempts: The maximum number of minimization attempts per iteration.
+      checks: Whether to perform cleaning checks.
+      exclude_residues: An optional list of zero-indexed residues to exclude from
+          restraints.
+
+    Returns:
+      out: A dictionary of output values.
+    """
+
+    # `protein.to_pdb` will strip any poorly-defined residues so we need to
+    # perform this check before `clean_protein`.
+    _check_residues_are_well_defined(prot)
+    pdb_string = clean_protein(prot, checks=checks)
+
+    exclude_residues = exclude_residues or []
+    exclude_residues = set(exclude_residues)
+    violations = np.inf
+    iteration = 0
+
+    while violations > 0 and iteration < max_outer_iterations:
+        ret = _run_one_iteration(
+            pdb_string=pdb_string,
+            exclude_residues=exclude_residues,
+            max_iterations=max_iterations,
+            tolerance=tolerance,
+            stiffness=stiffness,
+            restraint_set=restraint_set,
+            max_attempts=max_attempts,
+            use_gpu=use_gpu,
+        )
+        prot = protein.from_pdb_string(ret["min_pdb"])
+        if place_hydrogens_every_iteration:
+            pdb_string = clean_protein(prot, checks=True)
+        else:
+            pdb_string = ret["min_pdb"]
+        ret.update(get_violation_metrics(prot))
+        ret.update(
+            {
+                "num_exclusions": len(exclude_residues),
+                "iteration": iteration,
+            }
+        )
+        violations = ret["violations_per_residue"]
+        exclude_residues = exclude_residues.union(ret["residue_violations"])
+
+        logging.info(
+            "Iteration completed: Einit %.2f Efinal %.2f Time %.2f s "
+            "num residue violations %d num residue exclusions %d ",
+            ret["einit"],
+            ret["efinal"],
+            ret["opt_time"],
+            ret["num_residue_violations"],
+            ret["num_exclusions"],
+        )
+        iteration += 1
+    return ret
--- a/fastfold/np/relax/cleanup.py
+++ b/fastfold/np/relax/cleanup.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Cleans up a PDB file using pdbfixer in preparation for OpenMM simulations.
+
+fix_pdb uses a third-party tool. We also support fixing some additional edge
+cases like removing chains of length one (see clean_structure).
+"""
+import io
+
+import pdbfixer
+from simtk.openmm import app
+from simtk.openmm.app import element
+
+
+def fix_pdb(pdbfile, alterations_info):
+    """Apply pdbfixer to the contents of a PDB file; return a PDB string result.
+
+    1) Replaces nonstandard residues.
+    2) Removes heterogens (non protein residues) including water.
+    3) Adds missing residues and missing atoms within existing residues.
+    4) Adds hydrogens assuming pH=7.0.
+    5) KeepIds is currently true, so the fixer must keep the existing chain and
+       residue identifiers. This will fail for some files in wider PDB that have
+       invalid IDs.
+
+    Args:
+      pdbfile: Input PDB file handle.
+      alterations_info: A dict that will store details of changes made.
+
+    Returns:
+      A PDB string representing the fixed structure.
+    """
+    fixer = pdbfixer.PDBFixer(pdbfile=pdbfile)
+    fixer.findNonstandardResidues()
+    alterations_info["nonstandard_residues"] = fixer.nonstandardResidues
+    fixer.replaceNonstandardResidues()
+    _remove_heterogens(fixer, alterations_info, keep_water=False)
+    fixer.findMissingResidues()
+    alterations_info["missing_residues"] = fixer.missingResidues
+    fixer.findMissingAtoms()
+    alterations_info["missing_heavy_atoms"] = fixer.missingAtoms
+    alterations_info["missing_terminals"] = fixer.missingTerminals
+    fixer.addMissingAtoms(seed=0)
+    fixer.addMissingHydrogens()
+    out_handle = io.StringIO()
+    app.PDBFile.writeFile(
+        fixer.topology, fixer.positions, out_handle, keepIds=True
+    )
+    return out_handle.getvalue()
+
+
+def clean_structure(pdb_structure, alterations_info):
+    """Applies additional fixes to an OpenMM structure, to handle edge cases.
+
+    Args:
+      pdb_structure: An OpenMM structure to modify and fix.
+      alterations_info: A dict that will store details of changes made.
+    """
+    _replace_met_se(pdb_structure, alterations_info)
+    _remove_chains_of_length_one(pdb_structure, alterations_info)
+
+
+def _remove_heterogens(fixer, alterations_info, keep_water):
+    """Removes the residues that Pdbfixer considers to be heterogens.
+
+    Args:
+      fixer: A Pdbfixer instance.
+      alterations_info: A dict that will store details of changes made.
+      keep_water: If True, water (HOH) is not considered to be a heterogen.
+    """
+    initial_resnames = set()
+    for chain in fixer.topology.chains():
+        for residue in chain.residues():
+            initial_resnames.add(residue.name)
+    fixer.removeHeterogens(keepWater=keep_water)
+    final_resnames = set()
+    for chain in fixer.topology.chains():
+        for residue in chain.residues():
+            final_resnames.add(residue.name)
+    alterations_info["removed_heterogens"] = initial_resnames.difference(
+        final_resnames
+    )
+
+
+def _replace_met_se(pdb_structure, alterations_info):
+    """Replace the Se in any MET residues that were not marked as modified."""
+    modified_met_residues = []
+    for res in pdb_structure.iter_residues():
+        name = res.get_name_with_spaces().strip()
+        if name == "MET":
+            s_atom = res.get_atom("SD")
+            if s_atom.element_symbol == "Se":
+                s_atom.element_symbol = "S"
+                s_atom.element = element.get_by_symbol("S")
+                modified_met_residues.append(s_atom.residue_number)
+    alterations_info["Se_in_MET"] = modified_met_residues
+
+
+def _remove_chains_of_length_one(pdb_structure, alterations_info):
+    """Removes chains that correspond to a single amino acid.
+
+    A single amino acid in a chain is both N and C terminus. There is no force
+    template for this case.
+
+    Args:
+      pdb_structure: An OpenMM pdb_structure to modify and fix.
+      alterations_info: A dict that will store details of changes made.
+    """
+    removed_chains = {}
+    for model in pdb_structure.iter_models():
+        valid_chains = [c for c in model.iter_chains() if len(c) > 1]
+        invalid_chain_ids = [
+            c.chain_id for c in model.iter_chains() if len(c) <= 1
+        ]
+        model.chains = valid_chains
+        for chain_id in invalid_chain_ids:
+            model.chains_by_id.pop(chain_id)
+        removed_chains[model.number] = invalid_chain_ids
+    alterations_info["removed_chains"] = removed_chains
--- a/fastfold/np/relax/relax.py
+++ b/fastfold/np/relax/relax.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Amber relaxation."""
+from typing import Any, Dict, Sequence, Tuple
+from openfold.np import protein
+from openfold.np.relax import amber_minimize, utils
+import numpy as np
+
+
+class AmberRelaxation(object):
+    """Amber relaxation."""
+    def __init__(
+        self,
+        *,
+        max_iterations: int,
+        tolerance: float,
+        stiffness: float,
+        exclude_residues: Sequence[int],
+        max_outer_iterations: int,
+        use_gpu: bool,
+    ):
+        """Initialize Amber Relaxer.
+
+        Args:
+          max_iterations: Maximum number of L-BFGS iterations. 0 means no max.
+          tolerance: kcal/mol, the energy tolerance of L-BFGS.
+          stiffness: kcal/mol A**2, spring constant of heavy atom restraining
+            potential.
+          exclude_residues: Residues to exclude from per-atom restraining.
+            Zero-indexed.
+          max_outer_iterations: Maximum number of violation-informed relax
+           iterations. A value of 1 will run the non-iterative procedure used in
+           CASP14. Use 20 so that >95% of the bad cases are relaxed. Relax finishes
+           as soon as there are no violations, hence in most cases this causes no
+           slowdown. In the worst case we do 20 outer iterations.
+          use_gpu: Whether to run on GPU
+        """
+
+        self._max_iterations = max_iterations
+        self._tolerance = tolerance
+        self._stiffness = stiffness
+        self._exclude_residues = exclude_residues
+        self._max_outer_iterations = max_outer_iterations
+        self._use_gpu = use_gpu
+
+    def process(
+        self, *, prot: protein.Protein
+    ) -> Tuple[str, Dict[str, Any], np.ndarray]:
+        """Runs Amber relax on a prediction, adds hydrogens, returns PDB string."""
+        out = amber_minimize.run_pipeline(
+            prot=prot,
+            max_iterations=self._max_iterations,
+            tolerance=self._tolerance,
+            stiffness=self._stiffness,
+            exclude_residues=self._exclude_residues,
+            max_outer_iterations=self._max_outer_iterations,
+            use_gpu=self._use_gpu,
+        )
+        min_pos = out["pos"]
+        start_pos = out["posinit"]
+        rmsd = np.sqrt(np.sum((start_pos - min_pos) ** 2) / start_pos.shape[0])
+        debug_data = {
+            "initial_energy": out["einit"],
+            "final_energy": out["efinal"],
+            "attempts": out["min_attempts"],
+            "rmsd": rmsd,
+        }
+        pdb_str = amber_minimize.clean_protein(prot)
+        min_pdb = utils.overwrite_pdb_coordinates(pdb_str, min_pos)
+        min_pdb = utils.overwrite_b_factors(min_pdb, prot.b_factors)
+        utils.assert_equal_nonterminal_atom_types(
+            protein.from_pdb_string(min_pdb).atom_mask, prot.atom_mask
+        )
+        violations = out["structural_violations"][
+            "total_per_residue_violations_mask"
+        ]
+        return min_pdb, debug_data, violations
--- a/fastfold/np/relax/utils.py
+++ b/fastfold/np/relax/utils.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utils for minimization."""
+import io
+from openfold.np import residue_constants
+from Bio import PDB
+import numpy as np
+from simtk.openmm import app as openmm_app
+from simtk.openmm.app.internal.pdbstructure import PdbStructure
+
+
+def overwrite_pdb_coordinates(pdb_str: str, pos) -> str:
+    pdb_file = io.StringIO(pdb_str)
+    structure = PdbStructure(pdb_file)
+    topology = openmm_app.PDBFile(structure).getTopology()
+    with io.StringIO() as f:
+        openmm_app.PDBFile.writeFile(topology, pos, f)
+        return f.getvalue()
+
+
+def overwrite_b_factors(pdb_str: str, bfactors: np.ndarray) -> str:
+    """Overwrites the B-factors in pdb_str with contents of bfactors array.
+
+    Args:
+      pdb_str: An input PDB string.
+      bfactors: A numpy array with shape [1, n_residues, 37]. We assume that the
+        B-factors are per residue; i.e. that the nonzero entries are identical in
+        [0, i, :].
+
+    Returns:
+      A new PDB string with the B-factors replaced.
+    """
+    if bfactors.shape[-1] != residue_constants.atom_type_num:
+        raise ValueError(
+            f"Invalid final dimension size for bfactors: {bfactors.shape[-1]}."
+        )
+
+    parser = PDB.PDBParser(QUIET=True)
+    handle = io.StringIO(pdb_str)
+    structure = parser.get_structure("", handle)
+
+    curr_resid = ("", "", "")
+    idx = -1
+    for atom in structure.get_atoms():
+        atom_resid = atom.parent.get_id()
+        if atom_resid != curr_resid:
+            idx += 1
+            if idx >= bfactors.shape[0]:
+                raise ValueError(
+                    "Index into bfactors exceeds number of residues. "
+                    "B-factors shape: {shape}, idx: {idx}."
+                )
+        curr_resid = atom_resid
+        atom.bfactor = bfactors[idx, residue_constants.atom_order["CA"]]
+
+    new_pdb = io.StringIO()
+    pdb_io = PDB.PDBIO()
+    pdb_io.set_structure(structure)
+    pdb_io.save(new_pdb)
+    return new_pdb.getvalue()
+
+
+def assert_equal_nonterminal_atom_types(
+    atom_mask: np.ndarray, ref_atom_mask: np.ndarray
+):
+    """Checks that pre- and post-minimized proteins have same atom set."""
+    # Ignore any terminal OXT atoms which may have been added by minimization.
+    oxt = residue_constants.atom_order["OXT"]
+    no_oxt_mask = np.ones(shape=atom_mask.shape, dtype=np.bool)
+    no_oxt_mask[..., oxt] = False
+    np.testing.assert_almost_equal(
+        ref_atom_mask[no_oxt_mask], atom_mask[no_oxt_mask]
+    )
--- a/fastfold/np/residue_constants.py
+++ b/fastfold/np/residue_constants.py
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Constants used in AlphaFold."""
+
+import collections
+import functools
+import os
+from typing import Mapping, List, Tuple
+from importlib import resources
+
+import numpy as np
+import tree
+
+
+# Distance from one CA to next CA [trans configuration: omega = 180].
+ca_ca = 3.80209737096
+
+# Format: The list for each AA type contains chi1, chi2, chi3, chi4 in
+# this order (or a relevant subset from chi1 onwards). ALA and GLY don't have
+# chi angles so their chi angle lists are empty.
+chi_angles_atoms = {
+    "ALA": [],
+    # Chi5 in arginine is always 0 +- 5 degrees, so ignore it.
+    "ARG": [
+        ["N", "CA", "CB", "CG"],
+        ["CA", "CB", "CG", "CD"],
+        ["CB", "CG", "CD", "NE"],
+        ["CG", "CD", "NE", "CZ"],
+    ],
+    "ASN": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "OD1"]],
+    "ASP": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "OD1"]],
+    "CYS": [["N", "CA", "CB", "SG"]],
+    "GLN": [
+        ["N", "CA", "CB", "CG"],
+        ["CA", "CB", "CG", "CD"],
+        ["CB", "CG", "CD", "OE1"],
+    ],
+    "GLU": [
+        ["N", "CA", "CB", "CG"],
+        ["CA", "CB", "CG", "CD"],
+        ["CB", "CG", "CD", "OE1"],
+    ],
+    "GLY": [],
+    "HIS": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "ND1"]],
+    "ILE": [["N", "CA", "CB", "CG1"], ["CA", "CB", "CG1", "CD1"]],
+    "LEU": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]],
+    "LYS": [
+        ["N", "CA", "CB", "CG"],
+        ["CA", "CB", "CG", "CD"],
+        ["CB", "CG", "CD", "CE"],
+        ["CG", "CD", "CE", "NZ"],
+    ],
+    "MET": [
+        ["N", "CA", "CB", "CG"],
+        ["CA", "CB", "CG", "SD"],
+        ["CB", "CG", "SD", "CE"],
+    ],
+    "PHE": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]],
+    "PRO": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD"]],
+    "SER": [["N", "CA", "CB", "OG"]],
+    "THR": [["N", "CA", "CB", "OG1"]],
+    "TRP": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]],
+    "TYR": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]],
+    "VAL": [["N", "CA", "CB", "CG1"]],
+}
+
+# If chi angles given in fixed-length array, this matrix determines how to mask
+# them for each AA type. The order is as per restype_order (see below).
+chi_angles_mask = [
+    [0.0, 0.0, 0.0, 0.0],  # ALA
+    [1.0, 1.0, 1.0, 1.0],  # ARG
+    [1.0, 1.0, 0.0, 0.0],  # ASN
+    [1.0, 1.0, 0.0, 0.0],  # ASP
+    [1.0, 0.0, 0.0, 0.0],  # CYS
+    [1.0, 1.0, 1.0, 0.0],  # GLN
+    [1.0, 1.0, 1.0, 0.0],  # GLU
+    [0.0, 0.0, 0.0, 0.0],  # GLY
+    [1.0, 1.0, 0.0, 0.0],  # HIS
+    [1.0, 1.0, 0.0, 0.0],  # ILE
+    [1.0, 1.0, 0.0, 0.0],  # LEU
+    [1.0, 1.0, 1.0, 1.0],  # LYS
+    [1.0, 1.0, 1.0, 0.0],  # MET
+    [1.0, 1.0, 0.0, 0.0],  # PHE
+    [1.0, 1.0, 0.0, 0.0],  # PRO
+    [1.0, 0.0, 0.0, 0.0],  # SER
+    [1.0, 0.0, 0.0, 0.0],  # THR
+    [1.0, 1.0, 0.0, 0.0],  # TRP
+    [1.0, 1.0, 0.0, 0.0],  # TYR
+    [1.0, 0.0, 0.0, 0.0],  # VAL
+]
+
+# The following chi angles are pi periodic: they can be rotated by a multiple
+# of pi without affecting the structure.
+chi_pi_periodic = [
+    [0.0, 0.0, 0.0, 0.0],  # ALA
+    [0.0, 0.0, 0.0, 0.0],  # ARG
+    [0.0, 0.0, 0.0, 0.0],  # ASN
+    [0.0, 1.0, 0.0, 0.0],  # ASP
+    [0.0, 0.0, 0.0, 0.0],  # CYS
+    [0.0, 0.0, 0.0, 0.0],  # GLN
+    [0.0, 0.0, 1.0, 0.0],  # GLU
+    [0.0, 0.0, 0.0, 0.0],  # GLY
+    [0.0, 0.0, 0.0, 0.0],  # HIS
+    [0.0, 0.0, 0.0, 0.0],  # ILE
+    [0.0, 0.0, 0.0, 0.0],  # LEU
+    [0.0, 0.0, 0.0, 0.0],  # LYS
+    [0.0, 0.0, 0.0, 0.0],  # MET
+    [0.0, 1.0, 0.0, 0.0],  # PHE
+    [0.0, 0.0, 0.0, 0.0],  # PRO
+    [0.0, 0.0, 0.0, 0.0],  # SER
+    [0.0, 0.0, 0.0, 0.0],  # THR
+    [0.0, 0.0, 0.0, 0.0],  # TRP
+    [0.0, 1.0, 0.0, 0.0],  # TYR
+    [0.0, 0.0, 0.0, 0.0],  # VAL
+    [0.0, 0.0, 0.0, 0.0],  # UNK
+]
+
+# Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi,
+# psi and chi angles:
+# 0: 'backbone group',
+# 1: 'pre-omega-group', (empty)
+# 2: 'phi-group', (currently empty, because it defines only hydrogens)
+# 3: 'psi-group',
+# 4,5,6,7: 'chi1,2,3,4-group'
+# The atom positions are relative to the axis-end-atom of the corresponding
+# rotation axis. The x-axis is in direction of the rotation axis, and the y-axis
+# is defined such that the dihedral-angle-definiting atom (the last entry in
+# chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate).
+# format: [atomname, group_idx, rel_position]
+rigid_group_atom_positions = {
+    "ALA": [
+        ["N", 0, (-0.525, 1.363, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.526, -0.000, -0.000)],
+        ["CB", 0, (-0.529, -0.774, -1.205)],
+        ["O", 3, (0.627, 1.062, 0.000)],
+    ],
+    "ARG": [
+        ["N", 0, (-0.524, 1.362, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.525, -0.000, -0.000)],
+        ["CB", 0, (-0.524, -0.778, -1.209)],
+        ["O", 3, (0.626, 1.062, 0.000)],
+        ["CG", 4, (0.616, 1.390, -0.000)],
+        ["CD", 5, (0.564, 1.414, 0.000)],
+        ["NE", 6, (0.539, 1.357, -0.000)],
+        ["NH1", 7, (0.206, 2.301, 0.000)],
+        ["NH2", 7, (2.078, 0.978, -0.000)],
+        ["CZ", 7, (0.758, 1.093, -0.000)],
+    ],
+    "ASN": [
+        ["N", 0, (-0.536, 1.357, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.526, -0.000, -0.000)],
+        ["CB", 0, (-0.531, -0.787, -1.200)],
+        ["O", 3, (0.625, 1.062, 0.000)],
+        ["CG", 4, (0.584, 1.399, 0.000)],
+        ["ND2", 5, (0.593, -1.188, 0.001)],
+        ["OD1", 5, (0.633, 1.059, 0.000)],
+    ],
+    "ASP": [
+        ["N", 0, (-0.525, 1.362, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.527, 0.000, -0.000)],
+        ["CB", 0, (-0.526, -0.778, -1.208)],
+        ["O", 3, (0.626, 1.062, -0.000)],
+        ["CG", 4, (0.593, 1.398, -0.000)],
+        ["OD1", 5, (0.610, 1.091, 0.000)],
+        ["OD2", 5, (0.592, -1.101, -0.003)],
+    ],
+    "CYS": [
+        ["N", 0, (-0.522, 1.362, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.524, 0.000, 0.000)],
+        ["CB", 0, (-0.519, -0.773, -1.212)],
+        ["O", 3, (0.625, 1.062, -0.000)],
+        ["SG", 4, (0.728, 1.653, 0.000)],
+    ],
+    "GLN": [
+        ["N", 0, (-0.526, 1.361, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.526, 0.000, 0.000)],
+        ["CB", 0, (-0.525, -0.779, -1.207)],
+        ["O", 3, (0.626, 1.062, -0.000)],
+        ["CG", 4, (0.615, 1.393, 0.000)],
+        ["CD", 5, (0.587, 1.399, -0.000)],
+        ["NE2", 6, (0.593, -1.189, -0.001)],
+        ["OE1", 6, (0.634, 1.060, 0.000)],
+    ],
+    "GLU": [
+        ["N", 0, (-0.528, 1.361, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.526, -0.000, -0.000)],
+        ["CB", 0, (-0.526, -0.781, -1.207)],
+        ["O", 3, (0.626, 1.062, 0.000)],
+        ["CG", 4, (0.615, 1.392, 0.000)],
+        ["CD", 5, (0.600, 1.397, 0.000)],
+        ["OE1", 6, (0.607, 1.095, -0.000)],
+        ["OE2", 6, (0.589, -1.104, -0.001)],
+    ],
+    "GLY": [
+        ["N", 0, (-0.572, 1.337, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.517, -0.000, -0.000)],
+        ["O", 3, (0.626, 1.062, -0.000)],
+    ],
+    "HIS": [
+        ["N", 0, (-0.527, 1.360, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.525, 0.000, 0.000)],
+        ["CB", 0, (-0.525, -0.778, -1.208)],
+        ["O", 3, (0.625, 1.063, 0.000)],
+        ["CG", 4, (0.600, 1.370, -0.000)],
+        ["CD2", 5, (0.889, -1.021, 0.003)],
+        ["ND1", 5, (0.744, 1.160, -0.000)],
+        ["CE1", 5, (2.030, 0.851, 0.002)],
+        ["NE2", 5, (2.145, -0.466, 0.004)],
+    ],
+    "ILE": [
+        ["N", 0, (-0.493, 1.373, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.527, -0.000, -0.000)],
+        ["CB", 0, (-0.536, -0.793, -1.213)],
+        ["O", 3, (0.627, 1.062, -0.000)],
+        ["CG1", 4, (0.534, 1.437, -0.000)],
+        ["CG2", 4, (0.540, -0.785, -1.199)],
+        ["CD1", 5, (0.619, 1.391, 0.000)],
+    ],
+    "LEU": [
+        ["N", 0, (-0.520, 1.363, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.525, -0.000, -0.000)],
+        ["CB", 0, (-0.522, -0.773, -1.214)],
+        ["O", 3, (0.625, 1.063, -0.000)],
+        ["CG", 4, (0.678, 1.371, 0.000)],
+        ["CD1", 5, (0.530, 1.430, -0.000)],
+        ["CD2", 5, (0.535, -0.774, 1.200)],
+    ],
+    "LYS": [
+        ["N", 0, (-0.526, 1.362, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.526, 0.000, 0.000)],
+        ["CB", 0, (-0.524, -0.778, -1.208)],
+        ["O", 3, (0.626, 1.062, -0.000)],
+        ["CG", 4, (0.619, 1.390, 0.000)],
+        ["CD", 5, (0.559, 1.417, 0.000)],
+        ["CE", 6, (0.560, 1.416, 0.000)],
+        ["NZ", 7, (0.554, 1.387, 0.000)],
+    ],
+    "MET": [
+        ["N", 0, (-0.521, 1.364, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.525, 0.000, 0.000)],
+        ["CB", 0, (-0.523, -0.776, -1.210)],
+        ["O", 3, (0.625, 1.062, -0.000)],
+        ["CG", 4, (0.613, 1.391, -0.000)],
+        ["SD", 5, (0.703, 1.695, 0.000)],
+        ["CE", 6, (0.320, 1.786, -0.000)],
+    ],
+    "PHE": [
+        ["N", 0, (-0.518, 1.363, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.524, 0.000, -0.000)],
+        ["CB", 0, (-0.525, -0.776, -1.212)],
+        ["O", 3, (0.626, 1.062, -0.000)],
+        ["CG", 4, (0.607, 1.377, 0.000)],
+        ["CD1", 5, (0.709, 1.195, -0.000)],
+        ["CD2", 5, (0.706, -1.196, 0.000)],
+        ["CE1", 5, (2.102, 1.198, -0.000)],
+        ["CE2", 5, (2.098, -1.201, -0.000)],
+        ["CZ", 5, (2.794, -0.003, -0.001)],
+    ],
+    "PRO": [
+        ["N", 0, (-0.566, 1.351, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.527, -0.000, 0.000)],
+        ["CB", 0, (-0.546, -0.611, -1.293)],
+        ["O", 3, (0.621, 1.066, 0.000)],
+        ["CG", 4, (0.382, 1.445, 0.0)],
+        # ['CD', 5, (0.427, 1.440, 0.0)],
+        ["CD", 5, (0.477, 1.424, 0.0)],  # manually made angle 2 degrees larger
+    ],
+    "SER": [
+        ["N", 0, (-0.529, 1.360, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.525, -0.000, -0.000)],
+        ["CB", 0, (-0.518, -0.777, -1.211)],
+        ["O", 3, (0.626, 1.062, -0.000)],
+        ["OG", 4, (0.503, 1.325, 0.000)],
+    ],
+    "THR": [
+        ["N", 0, (-0.517, 1.364, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.526, 0.000, -0.000)],
+        ["CB", 0, (-0.516, -0.793, -1.215)],
+        ["O", 3, (0.626, 1.062, 0.000)],
+        ["CG2", 4, (0.550, -0.718, -1.228)],
+        ["OG1", 4, (0.472, 1.353, 0.000)],
+    ],
+    "TRP": [
+        ["N", 0, (-0.521, 1.363, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.525, -0.000, 0.000)],
+        ["CB", 0, (-0.523, -0.776, -1.212)],
+        ["O", 3, (0.627, 1.062, 0.000)],
+        ["CG", 4, (0.609, 1.370, -0.000)],
+        ["CD1", 5, (0.824, 1.091, 0.000)],
+        ["CD2", 5, (0.854, -1.148, -0.005)],
+        ["CE2", 5, (2.186, -0.678, -0.007)],
+        ["CE3", 5, (0.622, -2.530, -0.007)],
+        ["NE1", 5, (2.140, 0.690, -0.004)],
+        ["CH2", 5, (3.028, -2.890, -0.013)],
+        ["CZ2", 5, (3.283, -1.543, -0.011)],
+        ["CZ3", 5, (1.715, -3.389, -0.011)],
+    ],
+    "TYR": [
+        ["N", 0, (-0.522, 1.362, 0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.524, -0.000, -0.000)],
+        ["CB", 0, (-0.522, -0.776, -1.213)],
+        ["O", 3, (0.627, 1.062, -0.000)],
+        ["CG", 4, (0.607, 1.382, -0.000)],
+        ["CD1", 5, (0.716, 1.195, -0.000)],
+        ["CD2", 5, (0.713, -1.194, -0.001)],
+        ["CE1", 5, (2.107, 1.200, -0.002)],
+        ["CE2", 5, (2.104, -1.201, -0.003)],
+        ["OH", 5, (4.168, -0.002, -0.005)],
+        ["CZ", 5, (2.791, -0.001, -0.003)],
+    ],
+    "VAL": [
+        ["N", 0, (-0.494, 1.373, -0.000)],
+        ["CA", 0, (0.000, 0.000, 0.000)],
+        ["C", 0, (1.527, -0.000, -0.000)],
+        ["CB", 0, (-0.533, -0.795, -1.213)],
+        ["O", 3, (0.627, 1.062, -0.000)],
+        ["CG1", 4, (0.540, 1.429, -0.000)],
+        ["CG2", 4, (0.533, -0.776, 1.203)],
+    ],
+}
+
+# A list of atoms (excluding hydrogen) for each AA type. PDB naming convention.
+residue_atoms = {
+    "ALA": ["C", "CA", "CB", "N", "O"],
+    "ARG": ["C", "CA", "CB", "CG", "CD", "CZ", "N", "NE", "O", "NH1", "NH2"],
+    "ASP": ["C", "CA", "CB", "CG", "N", "O", "OD1", "OD2"],
+    "ASN": ["C", "CA", "CB", "CG", "N", "ND2", "O", "OD1"],
+    "CYS": ["C", "CA", "CB", "N", "O", "SG"],
+    "GLU": ["C", "CA", "CB", "CG", "CD", "N", "O", "OE1", "OE2"],
+    "GLN": ["C", "CA", "CB", "CG", "CD", "N", "NE2", "O", "OE1"],
+    "GLY": ["C", "CA", "N", "O"],
+    "HIS": ["C", "CA", "CB", "CG", "CD2", "CE1", "N", "ND1", "NE2", "O"],
+    "ILE": ["C", "CA", "CB", "CG1", "CG2", "CD1", "N", "O"],
+    "LEU": ["C", "CA", "CB", "CG", "CD1", "CD2", "N", "O"],
+    "LYS": ["C", "CA", "CB", "CG", "CD", "CE", "N", "NZ", "O"],
+    "MET": ["C", "CA", "CB", "CG", "CE", "N", "O", "SD"],
+    "PHE": ["C", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "N", "O"],
+    "PRO": ["C", "CA", "CB", "CG", "CD", "N", "O"],
+    "SER": ["C", "CA", "CB", "N", "O", "OG"],
+    "THR": ["C", "CA", "CB", "CG2", "N", "O", "OG1"],
+    "TRP": [
+        "C",
+        "CA",
+        "CB",
+        "CG",
+        "CD1",
+        "CD2",
+        "CE2",
+        "CE3",
+        "CZ2",
+        "CZ3",
+        "CH2",
+        "N",
+        "NE1",
+        "O",
+    ],
+    "TYR": [
+        "C",
+        "CA",
+        "CB",
+        "CG",
+        "CD1",
+        "CD2",
+        "CE1",
+        "CE2",
+        "CZ",
+        "N",
+        "O",
+        "OH",
+    ],
+    "VAL": ["C", "CA", "CB", "CG1", "CG2", "N", "O"],
+}
+
+# Naming swaps for ambiguous atom names.
+# Due to symmetries in the amino acids the naming of atoms is ambiguous in
+# 4 of the 20 amino acids.
+# (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities
+# in LEU, VAL and ARG can be resolved by using the 3d constellations of
+# the 'ambiguous' atoms and their neighbours)
+# TODO: ^ interpret this
+residue_atom_renaming_swaps = {
+    "ASP": {"OD1": "OD2"},
+    "GLU": {"OE1": "OE2"},
+    "PHE": {"CD1": "CD2", "CE1": "CE2"},
+    "TYR": {"CD1": "CD2", "CE1": "CE2"},
+}
+
+# Van der Waals radii [Angstroem] of the atoms (from Wikipedia)
+van_der_waals_radius = {
+    "C": 1.7,
+    "N": 1.55,
+    "O": 1.52,
+    "S": 1.8,
+}
+
+Bond = collections.namedtuple(
+    "Bond", ["atom1_name", "atom2_name", "length", "stddev"]
+)
+BondAngle = collections.namedtuple(
+    "BondAngle",
+    ["atom1_name", "atom2_name", "atom3name", "angle_rad", "stddev"],
+)
+
+
+@functools.lru_cache(maxsize=None)
+def load_stereo_chemical_props() -> Tuple[
+    Mapping[str, List[Bond]],
+    Mapping[str, List[Bond]],
+    Mapping[str, List[BondAngle]],
+]:
+    """Load stereo_chemical_props.txt into a nice structure.
+
+    Load literature values for bond lengths and bond angles and translate
+    bond angles into the length of the opposite edge of the triangle
+    ("residue_virtual_bonds").
+
+    Returns:
+      residue_bonds:  Dict that maps resname -> list of Bond tuples
+      residue_virtual_bonds: Dict that maps resname -> list of Bond tuples
+      residue_bond_angles: Dict that maps resname -> list of BondAngle tuples
+    """
+    # TODO: this file should be downloaded in a setup script
+    stereo_chemical_props = resources.read_text("openfold.resources", "stereo_chemical_props.txt")
+
+    lines_iter = iter(stereo_chemical_props.splitlines())
+    # Load bond lengths.
+    residue_bonds = {}
+    next(lines_iter)  # Skip header line.
+    for line in lines_iter:
+        if line.strip() == "-":
+            break
+        bond, resname, length, stddev = line.split()
+        atom1, atom2 = bond.split("-")
+        if resname not in residue_bonds:
+            residue_bonds[resname] = []
+        residue_bonds[resname].append(
+            Bond(atom1, atom2, float(length), float(stddev))
+        )
+    residue_bonds["UNK"] = []
+
+    # Load bond angles.
+    residue_bond_angles = {}
+    next(lines_iter)  # Skip empty line.
+    next(lines_iter)  # Skip header line.
+    for line in lines_iter:
+        if line.strip() == "-":
+            break
+        bond, resname, angle_degree, stddev_degree = line.split()
+        atom1, atom2, atom3 = bond.split("-")
+        if resname not in residue_bond_angles:
+            residue_bond_angles[resname] = []
+        residue_bond_angles[resname].append(
+            BondAngle(
+                atom1,
+                atom2,
+                atom3,
+                float(angle_degree) / 180.0 * np.pi,
+                float(stddev_degree) / 180.0 * np.pi,
+            )
+        )
+    residue_bond_angles["UNK"] = []
+
+    def make_bond_key(atom1_name, atom2_name):
+        """Unique key to lookup bonds."""
+        return "-".join(sorted([atom1_name, atom2_name]))
+
+    # Translate bond angles into distances ("virtual bonds").
+    residue_virtual_bonds = {}
+    for resname, bond_angles in residue_bond_angles.items():
+        # Create a fast lookup dict for bond lengths.
+        bond_cache = {}
+        for b in residue_bonds[resname]:
+            bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b
+        residue_virtual_bonds[resname] = []
+        for ba in bond_angles:
+            bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)]
+            bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)]
+
+            # Compute distance between atom1 and atom3 using the law of cosines
+            # c^2 = a^2 + b^2 - 2ab*cos(gamma).
+            gamma = ba.angle_rad
+            length = np.sqrt(
+                bond1.length ** 2
+                + bond2.length ** 2
+                - 2 * bond1.length * bond2.length * np.cos(gamma)
+            )
+
+            # Propagation of uncertainty assuming uncorrelated errors.
+            dl_outer = 0.5 / length
+            dl_dgamma = (
+                2 * bond1.length * bond2.length * np.sin(gamma)
+            ) * dl_outer
+            dl_db1 = (
+                2 * bond1.length - 2 * bond2.length * np.cos(gamma)
+            ) * dl_outer
+            dl_db2 = (
+                2 * bond2.length - 2 * bond1.length * np.cos(gamma)
+            ) * dl_outer
+            stddev = np.sqrt(
+                (dl_dgamma * ba.stddev) ** 2
+                + (dl_db1 * bond1.stddev) ** 2
+                + (dl_db2 * bond2.stddev) ** 2
+            )
+            residue_virtual_bonds[resname].append(
+                Bond(ba.atom1_name, ba.atom3name, length, stddev)
+            )
+
+    return (residue_bonds, residue_virtual_bonds, residue_bond_angles)
+
+
+# Between-residue bond lengths for general bonds (first element) and for Proline
+# (second element).
+between_res_bond_length_c_n = [1.329, 1.341]
+between_res_bond_length_stddev_c_n = [0.014, 0.016]
+
+# Between-residue cos_angles.
+between_res_cos_angles_c_n_ca = [-0.5203, 0.0353]  # degrees: 121.352 +- 2.315
+between_res_cos_angles_ca_c_n = [-0.4473, 0.0311]  # degrees: 116.568 +- 1.995
+
+# This mapping is used when we need to store atom data in a format that requires
+# fixed atom data size for every residue (e.g. a numpy array).
+atom_types = [
+    "N",
+    "CA",
+    "C",
+    "CB",
+    "O",
+    "CG",
+    "CG1",
+    "CG2",
+    "OG",
+    "OG1",
+    "SG",
+    "CD",
+    "CD1",
+    "CD2",
+    "ND1",
+    "ND2",
+    "OD1",
+    "OD2",
+    "SD",
+    "CE",
+    "CE1",
+    "CE2",
+    "CE3",
+    "NE",
+    "NE1",
+    "NE2",
+    "OE1",
+    "OE2",
+    "CH2",
+    "NH1",
+    "NH2",
+    "OH",
+    "CZ",
+    "CZ2",
+    "CZ3",
+    "NZ",
+    "OXT",
+]
+atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)}
+atom_type_num = len(atom_types)  # := 37.
+
+# A compact atom encoding with 14 columns
+# pylint: disable=line-too-long
+# pylint: disable=bad-whitespace
+restype_name_to_atom14_names = {
+    "ALA": ["N", "CA", "C", "O", "CB", "", "", "", "", "", "", "", "", ""],
+    "ARG": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD",
+        "NE",
+        "CZ",
+        "NH1",
+        "NH2",
+        "",
+        "",
+        "",
+    ],
+    "ASN": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "OD1",
+        "ND2",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "ASP": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "OD1",
+        "OD2",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "CYS": ["N", "CA", "C", "O", "CB", "SG", "", "", "", "", "", "", "", ""],
+    "GLN": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD",
+        "OE1",
+        "NE2",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "GLU": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD",
+        "OE1",
+        "OE2",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "GLY": ["N", "CA", "C", "O", "", "", "", "", "", "", "", "", "", ""],
+    "HIS": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "ND1",
+        "CD2",
+        "CE1",
+        "NE2",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "ILE": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG1",
+        "CG2",
+        "CD1",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "LEU": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD1",
+        "CD2",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "LYS": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD",
+        "CE",
+        "NZ",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "MET": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "SD",
+        "CE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "PHE": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD1",
+        "CD2",
+        "CE1",
+        "CE2",
+        "CZ",
+        "",
+        "",
+        "",
+    ],
+    "PRO": ["N", "CA", "C", "O", "CB", "CG", "CD", "", "", "", "", "", "", ""],
+    "SER": ["N", "CA", "C", "O", "CB", "OG", "", "", "", "", "", "", "", ""],
+    "THR": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "OG1",
+        "CG2",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "TRP": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD1",
+        "CD2",
+        "NE1",
+        "CE2",
+        "CE3",
+        "CZ2",
+        "CZ3",
+        "CH2",
+    ],
+    "TYR": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CD1",
+        "CD2",
+        "CE1",
+        "CE2",
+        "CZ",
+        "OH",
+        "",
+        "",
+    ],
+    "VAL": [
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG1",
+        "CG2",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
+    "UNK": ["", "", "", "", "", "", "", "", "", "", "", "", "", ""],
+}
+# pylint: enable=line-too-long
+# pylint: enable=bad-whitespace
+
+
+# This is the standard residue order when coding AA type as a number.
+# Reproduce it by taking 3-letter AA codes and sorting them alphabetically.
+restypes = [
+    "A",
+    "R",
+    "N",
+    "D",
+    "C",
+    "Q",
+    "E",
+    "G",
+    "H",
+    "I",
+    "L",
+    "K",
+    "M",
+    "F",
+    "P",
+    "S",
+    "T",
+    "W",
+    "Y",
+    "V",
+]
+restype_order = {restype: i for i, restype in enumerate(restypes)}
+restype_num = len(restypes)  # := 20.
+unk_restype_index = restype_num  # Catch-all index for unknown restypes.
+
+restypes_with_x = restypes + ["X"]
+restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)}
+
+
+def sequence_to_onehot(
+    sequence: str, mapping: Mapping[str, int], map_unknown_to_x: bool = False
+) -> np.ndarray:
+    """Maps the given sequence into a one-hot encoded matrix.
+
+    Args:
+      sequence: An amino acid sequence.
+      mapping: A dictionary mapping amino acids to integers.
+      map_unknown_to_x: If True, any amino acid that is not in the mapping will be
+        mapped to the unknown amino acid 'X'. If the mapping doesn't contain
+        amino acid 'X', an error will be thrown. If False, any amino acid not in
+        the mapping will throw an error.
+
+    Returns:
+      A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of
+      the sequence.
+
+    Raises:
+      ValueError: If the mapping doesn't contain values from 0 to
+        num_unique_aas - 1 without any gaps.
+    """
+    num_entries = max(mapping.values()) + 1
+
+    if sorted(set(mapping.values())) != list(range(num_entries)):
+        raise ValueError(
+            "The mapping must have values from 0 to num_unique_aas-1 "
+            "without any gaps. Got: %s" % sorted(mapping.values())
+        )
+
+    one_hot_arr = np.zeros((len(sequence), num_entries), dtype=np.int32)
+
+    for aa_index, aa_type in enumerate(sequence):
+        if map_unknown_to_x:
+            if aa_type.isalpha() and aa_type.isupper():
+                aa_id = mapping.get(aa_type, mapping["X"])
+            else:
+                raise ValueError(
+                    f"Invalid character in the sequence: {aa_type}"
+                )
+        else:
+            aa_id = mapping[aa_type]
+        one_hot_arr[aa_index, aa_id] = 1
+
+    return one_hot_arr
+
+
+restype_1to3 = {
+    "A": "ALA",
+    "R": "ARG",
+    "N": "ASN",
+    "D": "ASP",
+    "C": "CYS",
+    "Q": "GLN",
+    "E": "GLU",
+    "G": "GLY",
+    "H": "HIS",
+    "I": "ILE",
+    "L": "LEU",
+    "K": "LYS",
+    "M": "MET",
+    "F": "PHE",
+    "P": "PRO",
+    "S": "SER",
+    "T": "THR",
+    "W": "TRP",
+    "Y": "TYR",
+    "V": "VAL",
+}
+
+
+# NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple
+# 1-to-1 mapping of 3 letter names to one letter names. The latter contains
+# many more, and less common, three letter names as keys and maps many of these
+# to the same one letter name (including 'X' and 'U' which we don't use here).
+restype_3to1 = {v: k for k, v in restype_1to3.items()}
+
+# Define a restype name for all unknown residues.
+unk_restype = "UNK"
+
+resnames = [restype_1to3[r] for r in restypes] + [unk_restype]
+resname_to_idx = {resname: i for i, resname in enumerate(resnames)}
+
+
+# The mapping here uses hhblits convention, so that B is mapped to D, J and O
+# are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the
+# remaining 20 amino acids are kept in alphabetical order.
+# There are 2 non-amino acid codes, X (representing any amino acid) and
+# "-" representing a missing amino acid in an alignment.  The id for these
+# codes is put at the end (20 and 21) so that they can easily be ignored if
+# desired.
+HHBLITS_AA_TO_ID = {
+    "A": 0,
+    "B": 2,
+    "C": 1,
+    "D": 2,
+    "E": 3,
+    "F": 4,
+    "G": 5,
+    "H": 6,
+    "I": 7,
+    "J": 20,
+    "K": 8,
+    "L": 9,
+    "M": 10,
+    "N": 11,
+    "O": 20,
+    "P": 12,
+    "Q": 13,
+    "R": 14,
+    "S": 15,
+    "T": 16,
+    "U": 1,
+    "V": 17,
+    "W": 18,
+    "X": 20,
+    "Y": 19,
+    "Z": 3,
+    "-": 21,
+}
+
+# Partial inversion of HHBLITS_AA_TO_ID.
+ID_TO_HHBLITS_AA = {
+    0: "A",
+    1: "C",  # Also U.
+    2: "D",  # Also B.
+    3: "E",  # Also Z.
+    4: "F",
+    5: "G",
+    6: "H",
+    7: "I",
+    8: "K",
+    9: "L",
+    10: "M",
+    11: "N",
+    12: "P",
+    13: "Q",
+    14: "R",
+    15: "S",
+    16: "T",
+    17: "V",
+    18: "W",
+    19: "Y",
+    20: "X",  # Includes J and O.
+    21: "-",
+}
+
+restypes_with_x_and_gap = restypes + ["X", "-"]
+MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple(
+    restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i])
+    for i in range(len(restypes_with_x_and_gap))
+)
+
+
+def _make_standard_atom_mask() -> np.ndarray:
+    """Returns [num_res_types, num_atom_types] mask array."""
+    # +1 to account for unknown (all 0s).
+    mask = np.zeros([restype_num + 1, atom_type_num], dtype=np.int32)
+    for restype, restype_letter in enumerate(restypes):
+        restype_name = restype_1to3[restype_letter]
+        atom_names = residue_atoms[restype_name]
+        for atom_name in atom_names:
+            atom_type = atom_order[atom_name]
+            mask[restype, atom_type] = 1
+    return mask
+
+
+STANDARD_ATOM_MASK = _make_standard_atom_mask()
+
+
+# A one hot representation for the first and second atoms defining the axis
+# of rotation for each chi-angle in each residue.
+def chi_angle_atom(atom_index: int) -> np.ndarray:
+    """Define chi-angle rigid groups via one-hot representations."""
+    chi_angles_index = {}
+    one_hots = []
+
+    for k, v in chi_angles_atoms.items():
+        indices = [atom_types.index(s[atom_index]) for s in v]
+        indices.extend([-1] * (4 - len(indices)))
+        chi_angles_index[k] = indices
+
+    for r in restypes:
+        res3 = restype_1to3[r]
+        one_hot = np.eye(atom_type_num)[chi_angles_index[res3]]
+        one_hots.append(one_hot)
+
+    one_hots.append(np.zeros([4, atom_type_num]))  # Add zeros for residue `X`.
+    one_hot = np.stack(one_hots, axis=0)
+    one_hot = np.transpose(one_hot, [0, 2, 1])
+
+    return one_hot
+
+
+chi_atom_1_one_hot = chi_angle_atom(1)
+chi_atom_2_one_hot = chi_angle_atom(2)
+
+# An array like chi_angles_atoms but using indices rather than names.
+chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes]
+chi_angles_atom_indices = tree.map_structure(
+    lambda atom_name: atom_order[atom_name], chi_angles_atom_indices
+)
+chi_angles_atom_indices = np.array(
+    [
+        chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms)))
+        for chi_atoms in chi_angles_atom_indices
+    ]
+)
+
+# Mapping from (res_name, atom_name) pairs to the atom's chi group index
+# and atom index within that group.
+chi_groups_for_atom = collections.defaultdict(list)
+for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items():
+    for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res):
+        for atom_i, atom in enumerate(chi_group):
+            chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i))
+chi_groups_for_atom = dict(chi_groups_for_atom)
+
+
+def _make_rigid_transformation_4x4(ex, ey, translation):
+    """Create a rigid 4x4 transformation matrix from two axes and transl."""
+    # Normalize ex.
+    ex_normalized = ex / np.linalg.norm(ex)
+
+    # make ey perpendicular to ex
+    ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized
+    ey_normalized /= np.linalg.norm(ey_normalized)
+
+    # compute ez as cross product
+    eznorm = np.cross(ex_normalized, ey_normalized)
+    m = np.stack(
+        [ex_normalized, ey_normalized, eznorm, translation]
+    ).transpose()
+    m = np.concatenate([m, [[0.0, 0.0, 0.0, 1.0]]], axis=0)
+    return m
+
+
+# create an array with (restype, atomtype) --> rigid_group_idx
+# and an array with (restype, atomtype, coord) for the atom positions
+# and compute affine transformation matrices (4,4) from one rigid group to the
+# previous group
+restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=np.int)
+restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
+restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32)
+restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=np.int)
+restype_atom14_mask = np.zeros([21, 14], dtype=np.float32)
+restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32)
+restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32)
+
+
+def _make_rigid_group_constants():
+    """Fill the arrays above."""
+    for restype, restype_letter in enumerate(restypes):
+        resname = restype_1to3[restype_letter]
+        for atomname, group_idx, atom_position in rigid_group_atom_positions[
+            resname
+        ]:
+            atomtype = atom_order[atomname]
+            restype_atom37_to_rigid_group[restype, atomtype] = group_idx
+            restype_atom37_mask[restype, atomtype] = 1
+            restype_atom37_rigid_group_positions[
+                restype, atomtype, :
+            ] = atom_position
+
+            atom14idx = restype_name_to_atom14_names[resname].index(atomname)
+            restype_atom14_to_rigid_group[restype, atom14idx] = group_idx
+            restype_atom14_mask[restype, atom14idx] = 1
+            restype_atom14_rigid_group_positions[
+                restype, atom14idx, :
+            ] = atom_position
+
+    for restype, restype_letter in enumerate(restypes):
+        resname = restype_1to3[restype_letter]
+        atom_positions = {
+            name: np.array(pos)
+            for name, _, pos in rigid_group_atom_positions[resname]
+        }
+
+        # backbone to backbone is the identity transform
+        restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4)
+
+        # pre-omega-frame to backbone (currently dummy identity matrix)
+        restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4)
+
+        # phi-frame to backbone
+        mat = _make_rigid_transformation_4x4(
+            ex=atom_positions["N"] - atom_positions["CA"],
+            ey=np.array([1.0, 0.0, 0.0]),
+            translation=atom_positions["N"],
+        )
+        restype_rigid_group_default_frame[restype, 2, :, :] = mat
+
+        # psi-frame to backbone
+        mat = _make_rigid_transformation_4x4(
+            ex=atom_positions["C"] - atom_positions["CA"],
+            ey=atom_positions["CA"] - atom_positions["N"],
+            translation=atom_positions["C"],
+        )
+        restype_rigid_group_default_frame[restype, 3, :, :] = mat
+
+        # chi1-frame to backbone
+        if chi_angles_mask[restype][0]:
+            base_atom_names = chi_angles_atoms[resname][0]
+            base_atom_positions = [
+                atom_positions[name] for name in base_atom_names
+            ]
+            mat = _make_rigid_transformation_4x4(
+                ex=base_atom_positions[2] - base_atom_positions[1],
+                ey=base_atom_positions[0] - base_atom_positions[1],
+                translation=base_atom_positions[2],
+            )
+            restype_rigid_group_default_frame[restype, 4, :, :] = mat
+
+        # chi2-frame to chi1-frame
+        # chi3-frame to chi2-frame
+        # chi4-frame to chi3-frame
+        # luckily all rotation axes for the next frame start at (0,0,0) of the
+        # previous frame
+        for chi_idx in range(1, 4):
+            if chi_angles_mask[restype][chi_idx]:
+                axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2]
+                axis_end_atom_position = atom_positions[axis_end_atom_name]
+                mat = _make_rigid_transformation_4x4(
+                    ex=axis_end_atom_position,
+                    ey=np.array([-1.0, 0.0, 0.0]),
+                    translation=axis_end_atom_position,
+                )
+                restype_rigid_group_default_frame[
+                    restype, 4 + chi_idx, :, :
+                ] = mat
+
+
+_make_rigid_group_constants()
+
+
+def make_atom14_dists_bounds(
+    overlap_tolerance=1.5, bond_length_tolerance_factor=15
+):
+    """compute upper and lower bounds for bonds to assess violations."""
+    restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32)
+    restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32)
+    restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32)
+    residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props()
+    for restype, restype_letter in enumerate(restypes):
+        resname = restype_1to3[restype_letter]
+        atom_list = restype_name_to_atom14_names[resname]
+
+        # create lower and upper bounds for clashes
+        for atom1_idx, atom1_name in enumerate(atom_list):
+            if not atom1_name:
+                continue
+            atom1_radius = van_der_waals_radius[atom1_name[0]]
+            for atom2_idx, atom2_name in enumerate(atom_list):
+                if (not atom2_name) or atom1_idx == atom2_idx:
+                    continue
+                atom2_radius = van_der_waals_radius[atom2_name[0]]
+                lower = atom1_radius + atom2_radius - overlap_tolerance
+                upper = 1e10
+                restype_atom14_bond_lower_bound[
+                    restype, atom1_idx, atom2_idx
+                ] = lower
+                restype_atom14_bond_lower_bound[
+                    restype, atom2_idx, atom1_idx
+                ] = lower
+                restype_atom14_bond_upper_bound[
+                    restype, atom1_idx, atom2_idx
+                ] = upper
+                restype_atom14_bond_upper_bound[
+                    restype, atom2_idx, atom1_idx
+                ] = upper
+
+        # overwrite lower and upper bounds for bonds and angles
+        for b in residue_bonds[resname] + residue_virtual_bonds[resname]:
+            atom1_idx = atom_list.index(b.atom1_name)
+            atom2_idx = atom_list.index(b.atom2_name)
+            lower = b.length - bond_length_tolerance_factor * b.stddev
+            upper = b.length + bond_length_tolerance_factor * b.stddev
+            restype_atom14_bond_lower_bound[
+                restype, atom1_idx, atom2_idx
+            ] = lower
+            restype_atom14_bond_lower_bound[
+                restype, atom2_idx, atom1_idx
+            ] = lower
+            restype_atom14_bond_upper_bound[
+                restype, atom1_idx, atom2_idx
+            ] = upper
+            restype_atom14_bond_upper_bound[
+                restype, atom2_idx, atom1_idx
+            ] = upper
+            restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev
+            restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev
+    return {
+        "lower_bound": restype_atom14_bond_lower_bound,  # shape (21,14,14)
+        "upper_bound": restype_atom14_bond_upper_bound,  # shape (21,14,14)
+        "stddev": restype_atom14_bond_stddev,  # shape (21,14,14)
+    }
+
+
+restype_atom14_ambiguous_atoms = np.zeros((21, 14), dtype=np.float32)
+restype_atom14_ambiguous_atoms_swap_idx = np.tile(
+    np.arange(14, dtype=np.int), (21, 1)
+)
+
+
+def _make_atom14_ambiguity_feats():
+    for res, pairs in residue_atom_renaming_swaps.items():
+        res_idx = restype_order[restype_3to1[res]]
+        for atom1, atom2 in pairs.items():
+            atom1_idx = restype_name_to_atom14_names[res].index(atom1)
+            atom2_idx = restype_name_to_atom14_names[res].index(atom2)
+            restype_atom14_ambiguous_atoms[res_idx, atom1_idx] = 1
+            restype_atom14_ambiguous_atoms[res_idx, atom2_idx] = 1
+            restype_atom14_ambiguous_atoms_swap_idx[
+                res_idx, atom1_idx
+            ] = atom2_idx
+            restype_atom14_ambiguous_atoms_swap_idx[
+                res_idx, atom2_idx
+            ] = atom1_idx
+
+
+_make_atom14_ambiguity_feats()
+
+
+def aatype_to_str_sequence(aatype):
+    return ''.join([
+        restypes_with_x[aatype[i]] 
+        for i in range(len(aatype))
+    ])
+
+
+### ALPHAFOLD MULTIMER STUFF ###    
+def _make_chi_atom_indices():
+  """Returns atom indices needed to compute chi angles for all residue types.
+
+  Returns:
+    A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are
+    in the order specified in residue_constants.restypes + unknown residue type
+    at the end. For chi angles which are not defined on the residue, the
+    positions indices are by default set to 0.
+  """
+  chi_atom_indices = []
+  for residue_name in restypes:
+    residue_name = restype_1to3[residue_name]
+    residue_chi_angles = chi_angles_atoms[residue_name]
+    atom_indices = []
+    for chi_angle in residue_chi_angles:
+      atom_indices.append(
+          [atom_order[atom] for atom in chi_angle])
+    for _ in range(4 - len(atom_indices)):
+      atom_indices.append([0, 0, 0, 0])  # For chi angles not defined on the AA.
+    chi_atom_indices.append(atom_indices)
+
+  chi_atom_indices.append([[0, 0, 0, 0]] * 4)  # For UNKNOWN residue.
+
+  return np.array(chi_atom_indices)
+
+
+def _make_renaming_matrices():
+  """Matrices to map atoms to symmetry partners in ambiguous case."""
+  # As the atom naming is ambiguous for 7 of the 20 amino acids, provide
+  # alternative groundtruth coordinates where the naming is swapped
+  restype_3 = [
+      restype_1to3[res] for res in restypes
+  ]
+  restype_3 += ['UNK']
+  # Matrices for renaming ambiguous atoms.
+  all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3}
+  for resname, swap in residue_atom_renaming_swaps.items():
+    correspondences = np.arange(14)
+    for source_atom_swap, target_atom_swap in swap.items():
+      source_index = restype_name_to_atom14_names[
+          resname].index(source_atom_swap)
+      target_index = restype_name_to_atom14_names[
+          resname].index(target_atom_swap)
+      correspondences[source_index] = target_index
+      correspondences[target_index] = source_index
+      renaming_matrix = np.zeros((14, 14), dtype=np.float32)
+      for index, correspondence in enumerate(correspondences):
+        renaming_matrix[index, correspondence] = 1.
+    all_matrices[resname] = renaming_matrix.astype(np.float32)
+  renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3])
+  return renaming_matrices
+
+
+def _make_restype_atom37_mask():
+  """Mask of which atoms are present for which residue type in atom37."""
+  # create the corresponding mask
+  restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
+  for restype, restype_letter in enumerate(restypes):
+    restype_name = restype_1to3[restype_letter]
+    atom_names = residue_atoms[restype_name]
+    for atom_name in atom_names:
+      atom_type = atom_order[atom_name]
+      restype_atom37_mask[restype, atom_type] = 1
+  return restype_atom37_mask
+
+
+def _make_restype_atom14_mask():
+  """Mask of which atoms are present for which residue type in atom14."""
+  restype_atom14_mask = []
+
+  for rt in restypes:
+    atom_names = restype_name_to_atom14_names[
+        restype_1to3[rt]]
+    restype_atom14_mask.append([(1. if name else 0.) for name in atom_names])
+
+  restype_atom14_mask.append([0.] * 14)
+  restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
+  return restype_atom14_mask
+
+
+def _make_restype_atom37_to_atom14():
+  """Map from atom37 to atom14 per residue type."""
+  restype_atom37_to_atom14 = []  # mapping (restype, atom37) --> atom14
+  for rt in restypes:
+    atom_names = restype_name_to_atom14_names[
+        restype_1to3[rt]]
+    atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
+    restype_atom37_to_atom14.append([
+        (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
+        for name in atom_types
+    ])
+
+  restype_atom37_to_atom14.append([0] * 37)
+  restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
+  return restype_atom37_to_atom14
+
+
+def _make_restype_atom14_to_atom37():
+  """Map from atom14 to atom37 per residue type."""
+  restype_atom14_to_atom37 = []  # mapping (restype, atom14) --> atom37
+  for rt in restypes:
+    atom_names = restype_name_to_atom14_names[
+        restype_1to3[rt]]
+    restype_atom14_to_atom37.append([
+        (atom_order[name] if name else 0)
+        for name in atom_names
+    ])
+  # Add dummy mapping for restype 'UNK'
+  restype_atom14_to_atom37.append([0] * 14)
+  restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
+  return restype_atom14_to_atom37
+
+
+def _make_restype_atom14_is_ambiguous():
+  """Mask which atoms are ambiguous in atom14."""
+  # create an ambiguous atoms mask.  shape: (21, 14)
+  restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32)
+  for resname, swap in residue_atom_renaming_swaps.items():
+    for atom_name1, atom_name2 in swap.items():
+      restype = restype_order[
+          restype_3to1[resname]]
+      atom_idx1 = restype_name_to_atom14_names[resname].index(
+          atom_name1)
+      atom_idx2 = restype_name_to_atom14_names[resname].index(
+          atom_name2)
+      restype_atom14_is_ambiguous[restype, atom_idx1] = 1
+      restype_atom14_is_ambiguous[restype, atom_idx2] = 1
+
+  return restype_atom14_is_ambiguous
+
+
+def _make_restype_rigidgroup_base_atom37_idx():
+  """Create Map from rigidgroups to atom37 indices."""
+  # Create an array with the atom names.
+  # shape (num_restypes, num_rigidgroups, 3_atoms): (21, 8, 3)
+  base_atom_names = np.full([21, 8, 3], '', dtype=object)
+
+  # 0: backbone frame
+  base_atom_names[:, 0, :] = ['C', 'CA', 'N']
+
+  # 3: 'psi-group'
+  base_atom_names[:, 3, :] = ['CA', 'C', 'O']
+
+  # 4,5,6,7: 'chi1,2,3,4-group'
+  for restype, restype_letter in enumerate(restypes):
+    resname = restype_1to3[restype_letter]
+    for chi_idx in range(4):
+      if chi_angles_mask[restype][chi_idx]:
+        atom_names = chi_angles_atoms[resname][chi_idx]
+        base_atom_names[restype, chi_idx + 4, :] = atom_names[1:]
+
+  # Translate atom names into atom37 indices.
+  lookuptable = atom_order.copy()
+  lookuptable[''] = 0
+  restype_rigidgroup_base_atom37_idx = np.vectorize(lambda x: lookuptable[x])(
+      base_atom_names)
+  return restype_rigidgroup_base_atom37_idx
+
+
+CHI_ATOM_INDICES = _make_chi_atom_indices()
+RENAMING_MATRICES = _make_renaming_matrices()
+RESTYPE_ATOM14_TO_ATOM37 = _make_restype_atom14_to_atom37()
+RESTYPE_ATOM37_TO_ATOM14 = _make_restype_atom37_to_atom14()
+RESTYPE_ATOM37_MASK = _make_restype_atom37_mask()
+RESTYPE_ATOM14_MASK = _make_restype_atom14_mask()
+RESTYPE_ATOM14_IS_AMBIGUOUS = _make_restype_atom14_is_ambiguous()
+RESTYPE_RIGIDGROUP_BASE_ATOM37_IDX = _make_restype_rigidgroup_base_atom37_idx()
+
+# Create mask for existing rigid groups.
+RESTYPE_RIGIDGROUP_MASK = np.zeros([21, 8], dtype=np.float32)
+RESTYPE_RIGIDGROUP_MASK[:, 0] = 1
+RESTYPE_RIGIDGROUP_MASK[:, 3] = 1
+RESTYPE_RIGIDGROUP_MASK[:20, 4:] = chi_angles_mask
--- a/fastfold/utils/all_atom_multimer.py
+++ b/fastfold/utils/all_atom_multimer.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#            http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Ops for all atom representations."""
+
+from functools import partial
+from typing import Dict, Text, Tuple
+
+import torch
+
+from fastfold.np import residue_constants as rc
+from fastfold.utils import geometry, tensor_utils
+import numpy as np
+
+
+def squared_difference(x, y):
+    return np.square(x - y)
+
+
+def get_rc_tensor(rc_np, aatype):
+    return torch.tensor(rc_np, device=aatype.device)[aatype]
+
+
+def atom14_to_atom37(
+    atom14_data: torch.Tensor,  # (*, N, 14, ...)
+    aatype: torch.Tensor # (*, N)
+) -> torch.Tensor:    # (*, N, 37, ...)
+    """Convert atom14 to atom37 representation."""
+    idx_atom37_to_atom14 = get_rc_tensor(rc.RESTYPE_ATOM37_TO_ATOM14, aatype)
+    no_batch_dims = len(aatype.shape) - 1
+    atom37_data = tensor_utils.batched_gather(
+        atom14_data, 
+        idx_atom37_to_atom14, 
+        dim=no_batch_dims + 1, 
+        no_batch_dims=no_batch_dims + 1
+    )
+    atom37_mask = get_rc_tensor(rc.RESTYPE_ATOM37_MASK, aatype) 
+    if len(atom14_data.shape) == no_batch_dims + 2:
+        atom37_data *= atom37_mask
+    elif len(atom14_data.shape) == no_batch_dims + 3:
+        atom37_data *= atom37_mask[..., None].astype(atom37_data.dtype)
+    else:
+        raise ValueError("Incorrectly shaped data")
+    return atom37_data
+
+
+def atom37_to_atom14(aatype, all_atom_pos, all_atom_mask):
+    """Convert Atom37 positions to Atom14 positions."""
+    residx_atom14_to_atom37 = get_rc_tensor(
+        rc.RESTYPE_ATOM14_TO_ATOM37, aatype
+    )
+    no_batch_dims = len(aatype.shape)
+    atom14_mask = tensor_utils.batched_gather(
+        all_atom_mask, 
+        residx_atom14_to_atom37, 
+        dim=no_batch_dims + 1,
+        no_batch_dims=no_batch_dims + 1,
+    ).to(torch.float32)
+    # create a mask for known groundtruth positions
+    atom14_mask *= get_rc_tensor(rc.RESTYPE_ATOM14_MASK, aatype) 
+    # gather the groundtruth positions
+    atom14_positions = tensor_utils.batched_gather(
+        all_atom_pos, 
+        residx_atom14_to_atom37, 
+        dim=no_batch_dims + 1,
+        no_batch_dims=no_batch_dims + 1,
+    ),
+    atom14_positions = atom14_mask * atom14_positions
+    return atom14_positions, atom14_mask
+
+
+def get_alt_atom14(aatype, positions: torch.Tensor, mask):
+    """Get alternative atom14 positions."""
+    # pick the transformation matrices for the given residue sequence
+    # shape (num_res, 14, 14)
+    renaming_transform = get_rc_tensor(rc.RENAMING_MATRICES, aatype) 
+    alternative_positions = torch.sum(
+        positions[..., None, :] * renaming_transform[..., None],
+        dim=-2
+    )
+
+    # Create the mask for the alternative ground truth (differs from the
+    # ground truth mask, if only one of the atoms in an ambiguous pair has a
+    # ground truth position)
+    alternative_mask = torch.sum(mask[..., None] * renaming_transform, dim=-2)
+
+    return alternative_positions, alternative_mask
+
+
+def atom37_to_frames(
+    aatype: torch.Tensor,    # (...)
+    all_atom_positions: torch.Tensor,    # (..., 37)
+    all_atom_mask: torch.Tensor,    # (..., 37)
+) -> Dict[Text, torch.Tensor]:
+    """Computes the frames for the up to 8 rigid groups for each residue."""
+    # 0: 'backbone group',
+    # 1: 'pre-omega-group', (empty)
+    # 2: 'phi-group', (currently empty, because it defines only hydrogens)
+    # 3: 'psi-group',
+    # 4,5,6,7: 'chi1,2,3,4-group'
+
+    no_batch_dims = len(aatype.shape) - 1
+
+    # Compute the gather indices for all residues in the chain.
+    # shape (N, 8, 3)
+    residx_rigidgroup_base_atom37_idx = get_rc_tensor(
+        rc.RESTYPE_RIGIDGROUP_BASE_ATOM37_IDX, aatype
+    )
+
+    # Gather the base atom positions for each rigid group.
+    base_atom_pos = tensor_utils.batched_gather(
+        all_atom_positions,
+        residx_rigidgroup_base_atom37_idx,
+        dim = no_batch_dims + 1,
+        batch_dims = no_batch_dims + 1,
+    )
+
+    # Compute the Rigids.
+    point_on_neg_x_axis = base_atom_pos[..., :, :, 0]
+    origin = base_atom_pos[..., :, :, 1]
+    point_on_xy_plane = base_atom_pos[..., :, :, 2]
+    gt_rotation = geometry.Rot3Array.from_two_vectors(
+        origin - point_on_neg_x_axis, point_on_xy_plane - origin
+    )
+
+    gt_frames = geometry.Rigid3Array(gt_rotation, origin)
+
+    # Compute a mask whether the group exists.
+    # (N, 8)
+    group_exists = get_rc_tensor(rc.RESTYPE_RIGIDGROUP_MASK, aatype)
+
+    # Compute a mask whether ground truth exists for the group
+    gt_atoms_exist = tensor_utils.batched_gather(    # shape (N, 8, 3)
+        all_atom_mask.to(dtype=torch.float32),
+        residx_rigidgroup_base_atom37_idx,
+        batch_dims=no_batch_dims + 1,
+    )
+    gt_exists = torch.min(gt_atoms_exist, dim=-1) * group_exists    # (N, 8)
+
+    # Adapt backbone frame to old convention (mirror x-axis and z-axis).
+    rots = np.tile(np.eye(3, dtype=np.float32), [8, 1, 1])
+    rots[0, 0, 0] = -1
+    rots[0, 2, 2] = -1
+    gt_frames = gt_frames.compose_rotation(
+        geometry.Rot3Array.from_array(
+            torch.tensor(rots, device=aatype.device)
+        )
+    )
+
+    # The frames for ambiguous rigid groups are just rotated by 180 degree around
+    # the x-axis. The ambiguous group is always the last chi-group.
+    restype_rigidgroup_is_ambiguous = np.zeros([21, 8], dtype=np.float32)
+    restype_rigidgroup_rots = np.tile(
+        np.eye(3, dtype=np.float32), [21, 8, 1, 1]
+    )
+
+    for resname, _ in rc.residue_atom_renaming_swaps.items():
+        restype = rc.restype_order[
+            rc.restype_3to1[resname]
+        ]
+        chi_idx = int(sum(rc.chi_angles_mask[restype]) - 1)
+        restype_rigidgroup_is_ambiguous[restype, chi_idx + 4] = 1
+        restype_rigidgroup_rots[restype, chi_idx + 4, 1, 1] = -1
+        restype_rigidgroup_rots[restype, chi_idx + 4, 2, 2] = -1
+
+    # Gather the ambiguity information for each residue.
+    residx_rigidgroup_is_ambiguous = torch.tensor(
+        restype_rigidgroup_is_ambiguous,
+        device=aatype.device,
+    )[aatype]
+    ambiguity_rot = torch.tensor(
+        restype_rigidgroup_rots, 
+        device=aatype.device,
+    )[aatype]
+    ambiguity_rot = geometry.Rot3Array.from_array(
+        torch.Tensor(ambiguity_rot, device=aatype.device)
+    )
+
+    # Create the alternative ground truth frames.
+    alt_gt_frames = gt_frames.compose_rotation(ambiguity_rot)
+
+    fix_shape = lambda x: x.reshape(x.shape[:-2] + (8,))
+
+    # reshape back to original residue layout
+    gt_frames = fix_shape(gt_frames) 
+    gt_exists = fix_shape(gt_exists)
+    group_exists = fix_shape(group_exists)
+    residx_rigidgroup_is_ambiguous = fix_shape(residx_rigidgroup_is_ambiguous)
+    alt_gt_frames = fix_shape(alt_gt_frames)
+
+    return {
+            'rigidgroups_gt_frames': gt_frames,    # Rigid (..., 8)
+            'rigidgroups_gt_exists': gt_exists,    # (..., 8)
+            'rigidgroups_group_exists': group_exists,    # (..., 8)
+            'rigidgroups_group_is_ambiguous':
+                    residx_rigidgroup_is_ambiguous,    # (..., 8)
+            'rigidgroups_alt_gt_frames': alt_gt_frames,    # Rigid (..., 8)
+    }
+
+
+def torsion_angles_to_frames(
+    aatype: torch.Tensor,    # (N)
+    backb_to_global: geometry.Rigid3Array,    # (N)
+    torsion_angles_sin_cos: torch.Tensor    # (N, 7, 2)
+) -> geometry.Rigid3Array:    # (N, 8)
+    """Compute rigid group frames from torsion angles."""
+    # Gather the default frames for all rigid groups.
+    # geometry.Rigid3Array with shape (N, 8)
+    m = get_rc_tensor(rc.restype_rigid_group_default_frame, aatype)
+    default_frames = geometry.Rigid3Array.from_array4x4(m)
+
+    # Create the rotation matrices according to the given angles (each frame is
+    # defined such that its rotation is around the x-axis).
+    sin_angles = torsion_angles_sin_cos[..., 0]
+    cos_angles = torsion_angles_sin_cos[..., 1]
+
+    # insert zero rotation for backbone group.
+    num_residues = aatype.shape[-1]
+    sin_angles = torch.cat(
+        [
+            torch.zeros_like(aatype).unsqueeze(), 
+            sin_angles, 
+        ],
+        dim=-1)
+    cos_angles = torch.cat(
+        [
+            torch.ones_like(aatype).unsqueeze(), 
+            cos_angles
+        ],
+        dim=-1
+    )
+    zeros = torch.zeros_like(sin_angles)
+    ones = torch.ones_like(sin_angles)
+
+    # all_rots are geometry.Rot3Array with shape (..., N, 8)
+    all_rots = geometry.Rot3Array(
+        ones, zeros, zeros,
+        zeros, cos_angles, -sin_angles,
+        zeros, sin_angles, cos_angles
+    )
+
+    # Apply rotations to the frames.
+    all_frames = default_frames.compose_rotation(all_rots)
+
+    # chi2, chi3, and chi4 frames do not transform to the backbone frame but to
+    # the previous frame. So chain them up accordingly.
+
+    chi1_frame_to_backb = all_frames[..., 4]
+    chi2_frame_to_backb = chi1_frame_to_backb @ all_frames[..., 5]
+    chi3_frame_to_backb = chi2_frame_to_backb @ all_frames[..., 6]
+    chi4_frame_to_backb = chi3_frame_to_backb @ all_frames[..., 7]
+
+    all_frames_to_backb = Rigid3Array.cat(
+        [
+            all_frames[..., 0:5],
+            chi2_frame_to_backb[..., None], 
+            chi3_frame_to_backb[..., None],
+            chi4_frame_to_backb[..., None]
+        ], 
+        dim=-1
+    )
+
+    # Create the global frames.
+    # shape (N, 8)
+    all_frames_to_global = backb_to_global[..., None] @ all_frames_to_backb
+
+    return all_frames_to_global
+
+
+def frames_and_literature_positions_to_atom14_pos(
+    aatype: torch.Tensor,    # (*, N)
+    all_frames_to_global: geometry.Rigid3Array    # (N, 8)
+) -> geometry.Vec3Array:    # (*, N, 14)
+    """Put atom literature positions (atom14 encoding) in each rigid group."""
+    # Pick the appropriate transform for every atom.
+    residx_to_group_idx = get_rc_tensor(
+        rc.restype_atom14_to_rigid_group, 
+        aatype
+    )
+    group_mask = torch.nn.functional.one_hot(
+        residx_to_group_idx, 
+        num_classes=8
+    )    # shape (*, N, 14, 8)
+
+    # geometry.Rigid3Array with shape (N, 14)
+    map_atoms_to_global = all_frames_to_global[..., None, :] * group_mask
+    map_atoms_to_global = map_atoms_to_global.map_tensor_fn(
+        partial(torch.sum, dim=-1)
+    )
+   
+    # Gather the literature atom positions for each residue.
+    # geometry.Vec3Array with shape (N, 14)
+    lit_positions = geometry.Vec3Array.from_array(
+        get_rc_tensor(
+            rc.restype_atom14_rigid_group_positions, 
+            aatype
+        )
+    )
+
+    # Transform each atom from its local frame to the global frame.
+    # geometry.Vec3Array with shape (N, 14)
+    pred_positions = map_atoms_to_global.apply_to_point(lit_positions)
+
+    # Mask out non-existing atoms.
+    mask = get_rc_tensor(rc.restype_atom14_mask, aatype)
+    pred_positions = pred_positions * mask
+
+    return pred_positions
+
+
+def extreme_ca_ca_distance_violations(
+    positions: geometry.Vec3Array,    # (N, 37(14))
+    mask: torch.Tensor,    # (N, 37(14))
+    residue_index: torch.Tensor,    # (N)
+    max_angstrom_tolerance=1.5,
+    eps: float = 1e-6
+) -> torch.Tensor:
+    """Counts residues whose Ca is a large distance from its neighbor."""
+    this_ca_pos = positions[..., :-1, 1]    # (N - 1,)
+    this_ca_mask = mask[..., :-1, 1]                 # (N - 1)
+    next_ca_pos = positions[..., 1:, 1]    # (N - 1,)
+    next_ca_mask = mask[..., 1:, 1]    # (N - 1)
+    has_no_gap_mask = (
+        (residue_index[..., 1:] - residue_index[..., :-1]) == 1.0
+    ).astype(torch.float32)
+    ca_ca_distance = geometry.euclidean_distance(this_ca_pos, next_ca_pos, eps)
+    violations = (ca_ca_distance - rc.ca_ca) > max_angstrom_tolerance
+    mask = this_ca_mask * next_ca_mask * has_no_gap_mask
+    return tensor_utils.masked_mean(mask=mask, value=violations, dim=-1)
+
+
+def get_chi_atom_indices(device: torch.device):
+    """Returns atom indices needed to compute chi angles for all residue types.
+
+    Returns:
+        A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are
+        in the order specified in rc.restypes + unknown residue type
+        at the end. For chi angles which are not defined on the residue, the
+        positions indices are by default set to 0.
+    """
+    chi_atom_indices = []
+    for residue_name in rc.restypes:
+        residue_name = rc.restype_1to3[residue_name]
+        residue_chi_angles = rc.chi_angles_atoms[residue_name]
+        atom_indices = []
+        for chi_angle in residue_chi_angles:
+            atom_indices.append(
+                [rc.atom_order[atom] for atom in chi_angle]
+            )
+        for _ in range(4 - len(atom_indices)):
+            atom_indices.append([0, 0, 0, 0])    # For chi angles not defined on the AA.
+        chi_atom_indices.append(atom_indices)
+
+    chi_atom_indices.append([[0, 0, 0, 0]] * 4)    # For UNKNOWN residue.
+    return torch.tensor(chi_atom_indices, device=device)
+
+
+def compute_chi_angles(
+    positions: geometry.Vec3Array,
+    mask: torch.Tensor,
+    aatype: torch.Tensor
+):
+    """Computes the chi angles given all atom positions and the amino acid type.
+
+    Args:
+        positions: A Vec3Array of shape
+            [num_res, rc.atom_type_num], with positions of
+            atoms needed to calculate chi angles. Supports up to 1 batch dimension.
+        mask: An optional tensor of shape
+            [num_res, rc.atom_type_num] that masks which atom
+            positions are set for each residue. If given, then the chi mask will be
+            set to 1 for a chi angle only if the amino acid has that chi angle and all
+            the chi atoms needed to calculate that chi angle are set. If not given
+            (set to None), the chi mask will be set to 1 for a chi angle if the amino
+            acid has that chi angle and whether the actual atoms needed to calculate
+            it were set will be ignored.
+        aatype: A tensor of shape [num_res] with amino acid type integer
+            code (0 to 21). Supports up to 1 batch dimension.
+
+    Returns:
+        A tuple of tensors (chi_angles, mask), where both have shape
+        [num_res, 4]. The mask masks out unused chi angles for amino acid
+        types that have less than 4 chi angles. If atom_positions_mask is set, the
+        chi mask will also mask out uncomputable chi angles.
+    """
+
+    # Don't assert on the num_res and batch dimensions as they might be unknown.
+    assert positions.shape[-1] == rc.atom_type_num
+    assert mask.shape[-1] == rc.atom_type_num
+    no_batch_dims = len(aatype.shape) - 1
+
+    # Compute the table of chi angle indices. Shape: [restypes, chis=4, atoms=4].
+    chi_atom_indices = get_chi_atom_indices(aatype.device) 
+    
+    # DISCREPANCY: DeepMind doesn't remove the gaps here. I don't know why 
+    # theirs works.
+    aatype_gapless = torch.clamp(aatype, max=20)
+
+    # Select atoms to compute chis. Shape: [*, num_res, chis=4, atoms=4]. 
+    atom_indices = chi_atom_indices[aatype_gapless]
+    # Gather atom positions. Shape: [num_res, chis=4, atoms=4, xyz=3].
+    chi_angle_atoms = positions.map_tensor_fn(
+        partial(
+            tensor_utils.batched_gather, 
+            inds=atom_indices, 
+            dim=-1, 
+            no_batch_dims=no_batch_dims + 1
+        )
+    )
+    
+    a, b, c, d = [chi_angle_atoms[..., i] for i in range(4)]
+
+    chi_angles = geometry.dihedral_angle(a, b, c, d)
+
+    # Copy the chi angle mask, add the UNKNOWN residue. Shape: [restypes, 4].
+    chi_angles_mask = list(rc.chi_angles_mask)
+    chi_angles_mask.append([0.0, 0.0, 0.0, 0.0])
+    chi_angles_mask = torch.tensor(chi_angles_mask, device=aatype.device)
+    # Compute the chi angle mask. Shape [num_res, chis=4].
+    chi_mask = chi_angles_mask[aatype_gapless]
+
+    # The chi_mask is set to 1 only when all necessary chi angle atoms were set.
+    # Gather the chi angle atoms mask. Shape: [num_res, chis=4, atoms=4].
+    chi_angle_atoms_mask = tensor_utils.batched_gather(
+        mask, 
+        atom_indices, 
+        dim=-1, 
+        no_batch_dims=no_batch_dims + 1
+    )
+    # Check if all 4 chi angle atoms were set. Shape: [num_res, chis=4].
+    chi_angle_atoms_mask = torch.prod(chi_angle_atoms_mask, dim=-1)
+    chi_mask = chi_mask * chi_angle_atoms_mask.to(torch.float32)
+
+    return chi_angles, chi_mask
+
+
+def make_transform_from_reference(
+    a_xyz: geometry.Vec3Array,
+    b_xyz: geometry.Vec3Array,
+    c_xyz: geometry.Vec3Array
+) -> geometry.Rigid3Array:
+    """Returns rotation and translation matrices to convert from reference.
+
+    Note that this method does not take care of symmetries. If you provide the
+    coordinates in the non-standard way, the A atom will end up in the negative
+    y-axis rather than in the positive y-axis. You need to take care of such
+    cases in your code.
+
+    Args:
+        a_xyz: A Vec3Array.
+        b_xyz: A Vec3Array.
+        c_xyz: A Vec3Array.
+
+    Returns:
+        A Rigid3Array which, when applied to coordinates in a canonicalized
+        reference frame, will give coordinates approximately equal
+        the original coordinates (in the global frame).
+    """
+    rotation = geometry.Rot3Array.from_two_vectors(
+        c_xyz - b_xyz,
+        a_xyz - b_xyz
+    )
+    return geometry.Rigid3Array(rotation, b_xyz)
+
+
+def make_backbone_affine(
+    positions: geometry.Vec3Array,
+    mask: torch.Tensor,
+    aatype: torch.Tensor,
+) -> Tuple[geometry.Rigid3Array, torch.Tensor]:
+    a = rc.atom_order['N']
+    b = rc.atom_order['CA']
+    c = rc.atom_order['C']
+
+    rigid_mask = (mask[..., a] * mask[..., b] * mask[..., c])
+
+    rigid = make_transform_from_reference(
+        a_xyz=positions[..., a],
+        b_xyz=positions[..., b],
+        c_xyz=positions[..., c],
+    )
+
+    return rigid, rigid_mask
--- a/fastfold/utils/feats.py
+++ b/fastfold/utils/feats.py
@@ -30,6 +30,22 @@ from fastfold.utils.tensor_utils import (
    tensor_tree_map,
 )

+def dgram_from_positions(
+    pos: torch.Tensor, 
+    min_bin: float = 3.25, 
+    max_bin: float = 50.75, 
+    no_bins: float = 39, 
+    inf: float = 1e8,
+):
+    dgram = torch.sum(
+        (pos[..., None, :] - pos[..., None, :, :]) ** 2, dim=-1, keepdim=True
+    )
+    lower = torch.linspace(min_bin, max_bin, no_bins, device=pos.device) ** 2
+    upper = torch.cat([lower[1:], lower.new_tensor([inf])], dim=-1)
+    dgram = ((dgram > lower) * (dgram < upper)).type(dgram.dtype)
+
+    return dgram
+

 def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
    is_gly = aatype == rc.restype_order["G"]

--- a/fastfold/utils/geometry/__init__.py
+++ b/fastfold/utils/geometry/__init__.py
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Geometry Module."""
+
+from fastfold.utils.geometry import rigid_matrix_vector
+from fastfold.utils.geometry import rotation_matrix
+from fastfold.utils.geometry import vector
+
+Rot3Array = rotation_matrix.Rot3Array
+Rigid3Array = rigid_matrix_vector.Rigid3Array
+
+
+Vec3Array = vector.Vec3Array
+square_euclidean_distance = vector.square_euclidean_distance
+euclidean_distance = vector.euclidean_distance
+dihedral_angle = vector.dihedral_angle
+dot = vector.dot
+cross = vector.cross