"torchvision/models/vision_transformer.py" did not exist on "b9da6db4f923e49ab7431c7b6915037101622611"
neighbor_stat.py 5.42 KB
Newer Older
zhangqha's avatar
zhangqha committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import math
import logging
import numpy as np
from deepmd.env import tf
from typing import Tuple, List
from deepmd.env import op_module
from deepmd.env import default_tf_session_config
from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
from deepmd.utils.data_system import DeepmdDataSystem
from deepmd.utils.parallel_op import ParallelOp

log = logging.getLogger(__name__)

class NeighborStat():
    """
    Class for getting training data information. 

    It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix.

    Parameters
    ----------
    ntypes
            The num of atom types
    rcut
            The cut-off radius
    one_type : bool, optional, default=False
        Treat all types as a single type.
    """
    def __init__(self,
                 ntypes : int,
                 rcut: float,
                 one_type : bool = False,
                 ) -> None:
        """
        Constructor
        """
        self.rcut = rcut
        self.ntypes = ntypes
        self.one_type = one_type
        sub_graph = tf.Graph()

        def builder():
            place_holders = {}
            for ii in ['coord', 'box']:
                place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name='t_'+ii)
            place_holders['type'] = tf.placeholder(tf.int32, [None, None], name='t_type')
            place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name='t_natoms')
            place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name='t_mesh')
            t_type = place_holders['type']
            t_natoms = place_holders['natoms_vec']
            if self.one_type:
                # all types = 0, natoms_vec = [natoms, natoms, natoms]
                t_type = tf.zeros_like(t_type, dtype=tf.int32)
                t_natoms = tf.repeat(t_natoms[0], 3)

            _max_nbor_size, _min_nbor_dist \
                = op_module.neighbor_stat(place_holders['coord'],
                                         t_type,
                                         t_natoms,
                                         place_holders['box'],
                                         place_holders['default_mesh'],
                                         rcut = self.rcut)
            place_holders['dir'] = tf.placeholder(tf.string)
            return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders['dir'])

        with sub_graph.as_default():
            self.p = ParallelOp(builder, config=default_tf_session_config)

        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)

    def get_stat(self,
                 data : DeepmdDataSystem) -> Tuple[float, List[int]]:
        """
        get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms

        Parameters
        ----------
        data
                Class for manipulating many data systems. It is implemented with the help of DeepmdData.
        
        Returns
        -------
        min_nbor_dist
                The nearest distance between neighbor atoms
        max_nbor_size
                A list with ntypes integers, denotes the actual achieved max sel
        """
        self.min_nbor_dist = 100.0
        self.max_nbor_size = [0]
        if not self.one_type:
            self.max_nbor_size *= self.ntypes            

        def feed():
            for ii in range(len(data.system_dirs)):
                for jj in data.data_systems[ii].dirs:
                    data_set = data.data_systems[ii]._load_set(jj)
                    for kk in range(np.array(data_set['type']).shape[0]):
                        yield {
                            'coord': np.array(data_set['coord'])[kk].reshape([-1, data.natoms[ii] * 3]),
                            'type': np.array(data_set['type'])[kk].reshape([-1, data.natoms[ii]]),
                            'natoms_vec': np.array(data.natoms_vec[ii]),
                            'box': np.array(data_set['box'])[kk].reshape([-1, 9]),
                            'default_mesh': np.array(data.default_mesh[ii]),
                            'dir': str(jj),
                        }

        for mn, dt, jj in self.p.generate(self.sub_sess, feed()):
            if dt.size != 0:
                dt = np.min(dt)
            else:
                dt = self.rcut
                log.warning("Atoms with no neighbors found in %s. Please make sure it's what you expected." % jj)
            if dt < self.min_nbor_dist:
                if math.isclose(dt, 0., rel_tol=1e-6):
                    # it's unexpected that the distance between two atoms is zero
                    # zero distance will cause nan (#874) 
                    raise RuntimeError(
                        "Some atoms are overlapping in %s. Please check your"
                        " training data to remove duplicated atoms." % jj
                    )
                self.min_nbor_dist = dt
            var = np.max(mn, axis=0)
            self.max_nbor_size = np.maximum(var, self.max_nbor_size)

        log.info('training data with min nbor dist: ' + str(self.min_nbor_dist))
        log.info('training data with max nbor size: ' + str(self.max_nbor_size))
        return self.min_nbor_dist, self.max_nbor_size