test_permutation.py 5.72 KB
Newer Older
Geoffrey Yu's avatar
Geoffrey Yu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Copyright 2021 AlQuraishi Laboratory
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path
import pickle
import torch
import torch.nn as nn
import numpy as np
import unittest
from openfold.config import model_config
from openfold.data import data_transforms
from openfold.model.model import AlphaFold
Geoffrey Yu's avatar
Geoffrey Yu committed
24
from openfold.utils.loss import AlphaFoldMultimerLoss
Geoffrey Yu's avatar
Geoffrey Yu committed
25
26
from openfold.utils.tensor_utils import tensor_tree_map
from tests.config import consts
27
28
29
import logging
logger = logging.getLogger(__name__)
import os
Geoffrey Yu's avatar
Geoffrey Yu committed
30
import io, contextlib
Geoffrey Yu's avatar
Geoffrey Yu committed
31
32
33
from tests.data_utils import (
    random_template_feats,
    random_extra_msa_feats,
Geoffrey Yu's avatar
Geoffrey Yu committed
34
    random_affines_vector, random_affines_4x4
Geoffrey Yu's avatar
Geoffrey Yu committed
35
)
Geoffrey Yu's avatar
Geoffrey Yu committed
36
37
38
39
40
41
42
from openfold.utils.rigid_utils import (
    Rotation,
    Rigid,
)

class TestPermutation:
    def __init__(self):
Geoffrey Yu's avatar
Geoffrey Yu committed
43
44
45
46
47
48
        """
        Firstly setup model configs and model as in
        test_model.py

        In the test case, use PDB ID 1e4k as the label
        """
49
        self.test_data_dir = os.path.join(os.getcwd(),"tests/test_data")
Geoffrey Yu's avatar
Geoffrey Yu committed
50
51
        self.label_ids = ['label_1','label_1','label_2','label_2','label_2']
        self.asym_id = [1]*9+[2]*9+[3]*13+[4]*13 + [5]*13
Geoffrey Yu's avatar
Geoffrey Yu committed
52
53
54
55
56

    def affine_vector_to_4x4(self,affine):
        r = Rigid.from_tensor_7(affine)
        return r.to_tensor_4x4()
    
Geoffrey Yu's avatar
Geoffrey Yu committed
57
    def test_dry_run(self):
Geoffrey Yu's avatar
Geoffrey Yu committed
58
59
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
60
61
        n_seq = consts.n_seq
        n_templ = consts.n_templ
Geoffrey Yu's avatar
Geoffrey Yu committed
62
        n_res = len(self.asym_id)
63
64
        n_extra_seq = consts.n_extra

Geoffrey Yu's avatar
Geoffrey Yu committed
65
        c = model_config(consts.model, train=True)
66
67
        
        c.loss.masked_msa.num_classes = 22 # somehow need overwrite this part in multimer loss config
Geoffrey Yu's avatar
Geoffrey Yu committed
68
69
70
71
72
        c.model.evoformer_stack.no_blocks = 4  # no need to go overboard here
        c.model.evoformer_stack.blocks_per_ckpt = None  # don't want to set up
        # deepspeed for this test

        model = AlphaFold(c)
Geoffrey Yu's avatar
Geoffrey Yu committed
73
        multimer_loss = AlphaFoldMultimerLoss(c.loss)
74
75
76
77
78
79
80
81
82
83
        example_label = [pickle.load(open(os.path.join(self.test_data_dir,f"{i}.pkl"),'rb')) 
                         for i in self.label_ids]
        batch = {}
        tf = torch.randint(c.model.input_embedder.tf_dim - 1, size=(n_res,))
        batch["target_feat"] = nn.functional.one_hot(
            tf, c.model.input_embedder.tf_dim
        ).float()
        batch["aatype"] = torch.argmax(batch["target_feat"], dim=-1)
        batch["residue_index"] = torch.arange(n_res)

Geoffrey Yu's avatar
Geoffrey Yu committed
84
85
86
87
88
89
90
91
92
        backbone_dict ={
            "backbone_affine_tensor": torch.tensor(random_affines_vector((n_res,))),
            "backbone_affine_mask": torch.from_numpy(np.random.randint(0, 2, (n_res,)).astype(
                np.float32
            )),
            "use_clamped_fape": torch.from_numpy(np.array(0.0)),
        }
        batch['backbone_rigid_tensor'] = self.affine_vector_to_4x4(backbone_dict['backbone_affine_tensor'])
        batch['backbone_rigid_mask'] = backbone_dict['backbone_affine_mask']
93
94
95
96
97
98
99
100
101
        
        true_msa_dict ={
            "true_msa": torch.tensor(np.random.randint(0, 21, (n_res, n_seq))),
            "bert_mask": torch.tensor(np.random.randint(0, 2, (n_res, n_seq)).astype(
                np.float32)
            )
        }
        
        batch.update(true_msa_dict)
Geoffrey Yu's avatar
Geoffrey Yu committed
102

103
104
105
106
107
108
109
110
111
112
113
        batch["msa_feat"] = torch.rand((n_seq, n_res, c.model.input_embedder.msa_dim))
        t_feats = random_template_feats(n_templ, n_res)
        batch.update({k: torch.tensor(v) for k, v in t_feats.items()})
        extra_feats = random_extra_msa_feats(n_extra_seq, n_res)
        batch.update({k: torch.tensor(v) for k, v in extra_feats.items()})
        batch["msa_mask"] = torch.randint(
            low=0, high=2, size=(n_seq, n_res)
        ).float()
        batch["seq_mask"] = torch.randint(low=0, high=2, size=(n_res,)).float()
        batch.update(data_transforms.make_atom14_masks(batch))
        batch["no_recycling_iters"] = torch.tensor(2.)
Geoffrey Yu's avatar
Geoffrey Yu committed
114

115
116
117
118
119
        if consts.is_multimer:
            #
            # Modify asym_id, entity_id and sym_id so that it encodes 
            # 2 chains
            # #
Geoffrey Yu's avatar
Geoffrey Yu committed
120
            asym_id = self.asym_id
121
122
            batch["asym_id"] = torch.tensor(asym_id,dtype=torch.float64)
            # batch["entity_id"] = torch.randint(0, 1, size=(n_res,))
Geoffrey Yu's avatar
Geoffrey Yu committed
123
            batch['entity_id'] = torch.tensor([1]*18+[2]*39,dtype=torch.float64)
124
            batch["sym_id"] = torch.tensor(asym_id,dtype=torch.float64)
Geoffrey Yu's avatar
Geoffrey Yu committed
125
            # batch["num_sym"] = torch.tensor([1]*18+[2]*13,dtype=torch.int64) # currently there are just 2 chains
126
127
128
129
            batch["extra_deletion_matrix"] = torch.randint(0, 2, size=(n_extra_seq, n_res))
        add_recycling_dims = lambda t: (
            t.unsqueeze(-1).expand(*t.shape, c.data.common.max_recycling_iters)
        )
Geoffrey Yu's avatar
Geoffrey Yu committed
130
131
132
        add_batch_size_dimension = lambda t: (
            t.unsqueeze(0)
        )
Geoffrey Yu's avatar
Geoffrey Yu committed
133
        batch = tensor_tree_map(add_recycling_dims, batch)
Geoffrey Yu's avatar
Geoffrey Yu committed
134
        batch = tensor_tree_map(add_batch_size_dimension, batch)
Geoffrey Yu's avatar
Geoffrey Yu committed
135
     
136
        with torch.no_grad():
Geoffrey Yu's avatar
Geoffrey Yu committed
137
            out = model(batch)
Geoffrey Yu's avatar
Geoffrey Yu committed
138
            print(f"finished foward on batch with batch_size dim")
Geoffrey Yu's avatar
Geoffrey Yu committed
139
140
141
142
143
144
            permutated_labels = multimer_loss(out,(batch,example_label))
            # print(f"permuated_labels is {type(permutated_labels)} and keys are:\n {permutated_labels.keys()}")

if __name__ == "__main__":
    test = TestPermutation()
    test.test_dry_run()