protein_test.py 4.49 KB
Newer Older
Augustin-Zidek's avatar
Augustin-Zidek committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for protein."""

import os

from absl.testing import absltest
from absl.testing import parameterized
from alphafold.common import protein
from alphafold.common import residue_constants
Tom Ward's avatar
Tom Ward committed
23
import numpy as np
Augustin-Zidek's avatar
Augustin-Zidek committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Internal import (7716).

TEST_DATA_DIR = 'alphafold/common/testdata/'


class ProteinTest(parameterized.TestCase):

  def _check_shapes(self, prot, num_res):
    """Check that the processed shapes are correct."""
    num_atoms = residue_constants.atom_type_num
    self.assertEqual((num_res, num_atoms, 3), prot.atom_positions.shape)
    self.assertEqual((num_res,), prot.aatype.shape)
    self.assertEqual((num_res, num_atoms), prot.atom_mask.shape)
    self.assertEqual((num_res,), prot.residue_index.shape)
38
    self.assertEqual((num_res,), prot.chain_index.shape)
Augustin-Zidek's avatar
Augustin-Zidek committed
39
40
    self.assertEqual((num_res, num_atoms), prot.b_factors.shape)

41
42
43
44
45
46
47
48
  @parameterized.named_parameters(
      dict(testcase_name='chain_A',
           pdb_file='2rbg.pdb', chain_id='A', num_res=282, num_chains=1),
      dict(testcase_name='chain_B',
           pdb_file='2rbg.pdb', chain_id='B', num_res=282, num_chains=1),
      dict(testcase_name='multichain',
           pdb_file='2rbg.pdb', chain_id=None, num_res=564, num_chains=2))
  def test_from_pdb_str(self, pdb_file, chain_id, num_res, num_chains):
Augustin-Zidek's avatar
Augustin-Zidek committed
49
50
51
52
53
54
55
56
57
    pdb_file = os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR,
                            pdb_file)
    with open(pdb_file) as f:
      pdb_string = f.read()
    prot = protein.from_pdb_string(pdb_string, chain_id)
    self._check_shapes(prot, num_res)
    self.assertGreaterEqual(prot.aatype.min(), 0)
    # Allow equal since unknown restypes have index equal to restype_num.
    self.assertLessEqual(prot.aatype.max(), residue_constants.restype_num)
58
    self.assertLen(np.unique(prot.chain_index), num_chains)
Augustin-Zidek's avatar
Augustin-Zidek committed
59
60
61
62
63
64

  def test_to_pdb(self):
    with open(
        os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR,
                     '2rbg.pdb')) as f:
      pdb_string = f.read()
65
    prot = protein.from_pdb_string(pdb_string)
Augustin-Zidek's avatar
Augustin-Zidek committed
66
    pdb_string_reconstr = protein.to_pdb(prot)
67
68
69
70

    for line in pdb_string_reconstr.splitlines():
      self.assertLen(line, 80)

Augustin-Zidek's avatar
Augustin-Zidek committed
71
72
73
74
75
76
77
78
79
    prot_reconstr = protein.from_pdb_string(pdb_string_reconstr)

    np.testing.assert_array_equal(prot_reconstr.aatype, prot.aatype)
    np.testing.assert_array_almost_equal(
        prot_reconstr.atom_positions, prot.atom_positions)
    np.testing.assert_array_almost_equal(
        prot_reconstr.atom_mask, prot.atom_mask)
    np.testing.assert_array_equal(
        prot_reconstr.residue_index, prot.residue_index)
80
81
    np.testing.assert_array_equal(
        prot_reconstr.chain_index, prot.chain_index)
Augustin-Zidek's avatar
Augustin-Zidek committed
82
83
84
85
86
87
88
89
    np.testing.assert_array_almost_equal(
        prot_reconstr.b_factors, prot.b_factors)

  def test_ideal_atom_mask(self):
    with open(
        os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR,
                     '2rbg.pdb')) as f:
      pdb_string = f.read()
90
    prot = protein.from_pdb_string(pdb_string)
Augustin-Zidek's avatar
Augustin-Zidek committed
91
    ideal_mask = protein.ideal_atom_mask(prot)
92
    non_ideal_residues = set([102] + list(range(127, 286)))
Augustin-Zidek's avatar
Augustin-Zidek committed
93
94
95
96
97
98
99
    for i, (res, atom_mask) in enumerate(
        zip(prot.residue_index, prot.atom_mask)):
      if res in non_ideal_residues:
        self.assertFalse(np.all(atom_mask == ideal_mask[i]), msg=f'{res}')
      else:
        self.assertTrue(np.all(atom_mask == ideal_mask[i]), msg=f'{res}')

100
101
102
103
104
105
106
107
108
109
110
111
  def test_too_many_chains(self):
    num_res = protein.PDB_MAX_CHAINS + 1
    num_atom_type = residue_constants.atom_type_num
    with self.assertRaises(ValueError):
      _ = protein.Protein(
          atom_positions=np.random.random([num_res, num_atom_type, 3]),
          aatype=np.random.randint(0, 21, [num_res]),
          atom_mask=np.random.randint(0, 2, [num_res]).astype(np.float32),
          residue_index=np.arange(1, num_res+1),
          chain_index=np.arange(num_res),
          b_factors=np.random.uniform(1, 100, [num_res]))

Augustin-Zidek's avatar
Augustin-Zidek committed
112
113
114

if __name__ == '__main__':
  absltest.main()