BladeDISC DeePMD code

6b33aeb8 · zhangqha · 6b33aeb8 · 6b33aeb8 · 6b33aeb8 · 6b33aeb8
Commit 6b33aeb8 authored Apr 17, 2023 by zhangqha
4 changed files
--- a/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/train/trainer.py
+++ b/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/train/trainer.py
+#!/usr/bin/env python3
+from deepmd.descriptor.descriptor import Descriptor
+import logging
+import os
+import glob
+import platform
+import time
+import shutil
+import google.protobuf.message
+import numpy as np
+from packaging.version import Version
+
+from deepmd.env import tf, tfv2
+from deepmd.env import get_tf_session_config
+from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
+from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.fit import EnerFitting, WFCFitting, PolarFittingLocFrame, PolarFittingSeA, GlobalPolarFittingSeA, DipoleFittingSeA
+from deepmd.descriptor import Descriptor
+from deepmd.model import EnerModel, WFCModel, DipoleModel, PolarModel, GlobalPolarModel
+from deepmd.loss import EnerStdLoss, EnerDipoleLoss, TensorLoss
+from deepmd.utils.errors import GraphTooLargeError
+from deepmd.utils.learning_rate import LearningRateExp
+from deepmd.utils.neighbor_stat import NeighborStat
+from deepmd.utils.sess import run_sess
+from deepmd.utils.type_embed import TypeEmbedNet
+from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
+from deepmd.utils.argcheck import type_embedding_args
+
+from tensorflow.python.client import timeline
+from deepmd.env import op_module, TF_VERSION
+from deepmd.utils.errors import GraphWithoutTensorError
+
+# load grad of force module
+import deepmd.op
+
+from deepmd.common import j_must_have, ClassArg, data_requirement, get_precision
+
+log = logging.getLogger(__name__)
+
+# nvnmd
+from deepmd.nvnmd.utils.config import nvnmd_cfg
+
+def _is_subdir(path, directory):
+    path = os.path.realpath(path)
+    directory = os.path.realpath(directory)
+    if path == directory:
+        return False
+    relative = os.path.relpath(path, directory) + os.sep
+    return not relative.startswith(os.pardir + os.sep)
+    
+
+class DPTrainer (object):
+    def __init__(self, 
+                 jdata, 
+                 run_opt,
+                 is_compress = False):
+        self.run_opt = run_opt
+        self._init_param(jdata)
+        self.is_compress = is_compress
+
+    def _init_param(self, jdata):
+        # model config        
+        model_param = j_must_have(jdata, 'model')
+        descrpt_param = j_must_have(model_param, 'descriptor')
+        fitting_param = j_must_have(model_param, 'fitting_net')
+        typeebd_param = model_param.get('type_embedding', None)
+        self.model_param    = model_param
+        self.descrpt_param  = descrpt_param
+        
+        # nvnmd
+        self.nvnmd_param = jdata.get('nvnmd', {})
+        nvnmd_cfg.init_from_jdata(self.nvnmd_param)
+        if nvnmd_cfg.enable:
+            nvnmd_cfg.init_from_deepmd_input(model_param)
+            nvnmd_cfg.disp_message()
+            nvnmd_cfg.save()
+        
+        # descriptor
+        try:
+            descrpt_type = descrpt_param['type']
+            self.descrpt_type = descrpt_type
+        except KeyError:
+            raise KeyError('the type of descriptor should be set by `type`')
+
+        if descrpt_param['type'] in ['se_atten']:
+            descrpt_param['ntypes'] = len(model_param['type_map'])
+        self.descrpt = Descriptor(**descrpt_param)
+
+        # fitting net
+        fitting_type = fitting_param.get('type', 'ener')
+        self.fitting_type = fitting_type
+        fitting_param.pop('type', None)
+        fitting_param['descrpt'] = self.descrpt
+        if fitting_type == 'ener':
+            self.fitting = EnerFitting(**fitting_param)
+        # elif fitting_type == 'wfc':            
+        #     self.fitting = WFCFitting(fitting_param, self.descrpt)
+        elif fitting_type == 'dipole':
+            if descrpt_type == 'se_e2_a':
+                self.fitting = DipoleFittingSeA(**fitting_param)
+            else :
+                raise RuntimeError('fitting dipole only supports descrptors: se_e2_a')
+        elif fitting_type == 'polar':
+            # if descrpt_type == 'loc_frame':
+            #     self.fitting = PolarFittingLocFrame(fitting_param, self.descrpt)
+            if descrpt_type == 'se_e2_a':
+                self.fitting = PolarFittingSeA(**fitting_param)
+            else :
+                raise RuntimeError('fitting polar only supports descrptors: loc_frame and se_e2_a')
+        elif fitting_type == 'global_polar':
+            if descrpt_type == 'se_e2_a':
+                self.fitting = GlobalPolarFittingSeA(**fitting_param)
+            else :
+                raise RuntimeError('fitting global_polar only supports descrptors: loc_frame and se_e2_a')
+        else :
+            raise RuntimeError('unknow fitting type ' + fitting_type)
+
+        # type embedding
+        padding = False
+        if descrpt_type == 'se_atten':
+            padding = True
+        if typeebd_param is not None:
+            self.typeebd = TypeEmbedNet(
+                neuron=typeebd_param['neuron'],
+                resnet_dt=typeebd_param['resnet_dt'],
+                activation_function=typeebd_param['activation_function'],
+                precision=typeebd_param['precision'],
+                trainable=typeebd_param['trainable'],
+                seed=typeebd_param['seed'],
+                padding=padding
+            )
+        elif descrpt_type == 'se_atten':
+            default_args = type_embedding_args()
+            default_args_dict = {i.name: i.default for i in default_args}
+            self.typeebd = TypeEmbedNet(
+                neuron=default_args_dict['neuron'],
+                resnet_dt=default_args_dict['resnet_dt'],
+                activation_function=None,
+                precision=default_args_dict['precision'],
+                trainable=default_args_dict['trainable'],
+                seed=default_args_dict['seed'],
+                padding=padding
+            )
+        else:
+            self.typeebd = None
+
+        # init model
+        # infer model type by fitting_type
+        if fitting_type == 'ener':
+            self.model = EnerModel(
+                self.descrpt, 
+                self.fitting, 
+                self.typeebd,
+                model_param.get('type_map'),
+                model_param.get('data_stat_nbatch', 10),
+                model_param.get('data_stat_protect', 1e-2),
+                model_param.get('use_srtab'),
+                model_param.get('smin_alpha'),
+                model_param.get('sw_rmin'),
+                model_param.get('sw_rmax')
+            )
+        # elif fitting_type == 'wfc':
+        #     self.model = WFCModel(model_param, self.descrpt, self.fitting)
+        elif fitting_type == 'dipole':
+            self.model = DipoleModel(
+                self.descrpt, 
+                self.fitting, 
+                model_param.get('type_map'),
+                model_param.get('data_stat_nbatch', 10),
+                model_param.get('data_stat_protect', 1e-2)
+            )
+        elif fitting_type == 'polar':
+            self.model = PolarModel(
+                self.descrpt, 
+                self.fitting,
+                model_param.get('type_map'),
+                model_param.get('data_stat_nbatch', 10),
+                model_param.get('data_stat_protect', 1e-2)
+            )
+        elif fitting_type == 'global_polar':
+            self.model = GlobalPolarModel(
+                self.descrpt, 
+                self.fitting,
+                model_param.get('type_map'),
+                model_param.get('data_stat_nbatch', 10),
+                model_param.get('data_stat_protect', 1e-2)
+            )
+        else :
+            raise RuntimeError('get unknown fitting type when building model')
+
+        # learning rate
+        lr_param = j_must_have(jdata, 'learning_rate')
+        scale_by_worker = lr_param.get('scale_by_worker', 'linear')
+        if scale_by_worker == 'linear':
+            self.scale_lr_coef = float(self.run_opt.world_size)
+        elif scale_by_worker == 'sqrt':
+            self.scale_lr_coef = np.sqrt(self.run_opt.world_size).real
+        else:
+            self.scale_lr_coef = 1.
+        lr_type = lr_param.get('type', 'exp')
+        if lr_type == 'exp':
+            self.lr = LearningRateExp(lr_param['start_lr'],
+                                      lr_param['stop_lr'],
+                                      lr_param['decay_steps'])
+        else :
+            raise RuntimeError('unknown learning_rate type ' + lr_type)        
+
+        # loss
+        # infer loss type by fitting_type
+        loss_param = jdata.get('loss', None)
+        loss_type = loss_param.get('type', 'ener')
+
+        if fitting_type == 'ener':
+            loss_param.pop('type', None)
+            loss_param['starter_learning_rate'] = self.lr.start_lr()
+            if loss_type == 'ener':
+                self.loss = EnerStdLoss(**loss_param)
+            elif loss_type == 'ener_dipole':
+                self.loss = EnerDipoleLoss(**loss_param)
+            else:
+                raise RuntimeError('unknow loss type')
+        elif fitting_type == 'wfc':
+            self.loss = TensorLoss(loss_param, 
+                                   model = self.model, 
+                                   tensor_name = 'wfc',
+                                   tensor_size = self.model.get_out_size(),
+                                   label_name = 'wfc')
+        elif fitting_type == 'dipole':
+            self.loss = TensorLoss(loss_param, 
+                                   model = self.model, 
+                                   tensor_name = 'dipole',
+                                   tensor_size = 3,
+                                   label_name = 'dipole')
+        elif fitting_type == 'polar':
+            self.loss = TensorLoss(loss_param, 
+                                   model = self.model, 
+                                   tensor_name = 'polar',
+                                   tensor_size = 9,
+                                   label_name = 'polarizability')
+        elif fitting_type == 'global_polar':
+            self.loss = TensorLoss(loss_param, 
+                                   model = self.model, 
+                                   tensor_name = 'global_polar',
+                                   tensor_size = 9,
+                                   atomic = False,
+                                   label_name = 'polarizability')
+        else :
+            raise RuntimeError('get unknown fitting type when building loss function')
+
+        # training
+        tr_data = jdata['training']
+        self.disp_file = tr_data.get('disp_file', 'lcurve.out')
+        self.disp_freq = tr_data.get('disp_freq', 1000)
+        self.save_freq = tr_data.get('save_freq', 1000)
+        self.save_ckpt = tr_data.get('save_ckpt', 'model.ckpt')
+        self.display_in_training = tr_data.get('disp_training', True)
+        self.timing_in_training  = tr_data.get('time_training', True)
+        self.profiling = self.run_opt.is_chief and tr_data.get('profiling', False)
+        self.profiling_file = tr_data.get('profiling_file', 'timeline.json')
+        self.enable_profiler = tr_data.get('enable_profiler', False)
+        self.tensorboard = self.run_opt.is_chief and tr_data.get('tensorboard', False)
+        self.tensorboard_log_dir = tr_data.get('tensorboard_log_dir', 'log')
+        self.tensorboard_freq = tr_data.get('tensorboard_freq', 1)
+        self.mixed_prec = tr_data.get('mixed_precision', None)
+        if self.mixed_prec is not None:
+            if (self.mixed_prec['compute_prec'] != 'float16' or self.mixed_prec['output_prec'] != 'float32'):
+                raise RuntimeError(
+                    "Unsupported mixed precision option [output_prec, compute_prec]: [%s, %s], "
+                    " Supported: [float32, float16], Please set mixed precision option correctly!"
+                     % (self.mixed_prec['output_prec'], self.mixed_prec['compute_prec']))
+        # self.sys_probs = tr_data['sys_probs']
+        # self.auto_prob_style = tr_data['auto_prob']
+        self.useBN = False
+        if fitting_type == 'ener' and  self.fitting.get_numb_fparam() > 0 :
+            self.numb_fparam = self.fitting.get_numb_fparam()
+        else :
+            self.numb_fparam = 0
+
+        if tr_data.get("validation_data", None) is not None:
+            self.valid_numb_batch = tr_data["validation_data"].get("numb_btch", 1)
+        else:
+            self.valid_numb_batch = 1
+
+        # if init the graph with the frozen model
+        self.frz_model = None
+        self.model_type = None
+
+
+    def build (self, 
+               data = None, 
+               stop_batch = 0,
+               suffix = "") :
+        self.ntypes = self.model.get_ntypes()
+        self.stop_batch = stop_batch
+
+        if not self.is_compress and data.mixed_type:
+            assert self.descrpt_type in ['se_atten'], 'Data in mixed_type format must use attention descriptor!'
+            assert self.fitting_type in ['ener'], 'Data in mixed_type format must use ener fitting!'
+
+        if self.numb_fparam > 0 :
+            log.info("training with %d frame parameter(s)" % self.numb_fparam)
+        else:
+            log.info("training without frame parameter")
+
+        if not self.is_compress:
+            # Usually, the type number of the model should be equal to that of the data
+            # However, nt_model > nt_data should be allowed, since users may only want to 
+            # train using a dataset that only have some of elements 
+            if self.ntypes < data.get_ntypes():
+                raise ValueError(
+                    "The number of types of the training data is %d, but that of the "
+                    "model is only %d. The latter must be no less than the former. "
+                    "You may need to reset one or both of them. Usually, the former "
+                    "is given by `model/type_map` in the training parameter (if set) "
+                    "or the maximum number in the training data. The latter is given "
+                    "by `model/descriptor/sel` in the training parameter." % (
+                        data.get_ntypes(), self.ntypes
+                ))
+            self.type_map = data.get_type_map()
+            self.batch_size = data.get_batch_size()
+            if self.run_opt.init_mode not in ('init_from_model', 'restart', 'init_from_frz_model'):
+                # self.saver.restore (in self._init_session) will restore avg and std variables, so data_stat is useless
+                # init_from_frz_model will restore data_stat variables in `init_variables` method
+                log.info("data stating... (this step may take long time)")
+                self.model.data_stat(data)
+
+            # config the init_frz_model command
+            if self.run_opt.init_mode == 'init_from_frz_model':
+                self._init_from_frz_model()
+            
+            # neighbor_stat is moved to train.py as duplicated
+            # TODO: this is a simple fix but we should have a clear
+            #       architecture to call neighbor stat
+        else :
+            graph, graph_def = load_graph_def(self.model_param['compress']['model_file'])
+            self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3])
+            self.fitting.init_variables(graph, graph_def)
+            # for fparam or aparam settings in 'ener' type fitting net
+            if self.fitting_type == 'ener':
+                self.fitting.enable_compression(self.model_param['compress']['model_file'])
+        
+        if self.is_compress or self.model_type == 'compressed_model':
+            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+        else:
+            tf.constant("original_model", name = 'model_type', dtype = tf.string)
+        
+        if self.mixed_prec is not None:
+            self.descrpt.enable_mixed_precision(self.mixed_prec)
+            self.fitting.enable_mixed_precision(self.mixed_prec)
+
+        self._build_lr()
+        self._build_network(data, suffix)
+        self._build_training()
+
+
+    def _build_lr(self):
+        self._extra_train_ops   = []
+        self.global_step = tf.train.get_or_create_global_step()
+        self.learning_rate = self.lr.build(self.global_step, self.stop_batch)
+        log.info("built lr")
+
+    def _build_network(self, data, suffix=""):
+        self.place_holders = {}
+        if self.is_compress :
+            for kk in ['coord', 'box']:
+                self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk)
+            self._get_place_horders(data_requirement)
+        else :
+            self._get_place_horders(data.get_data_dict())
+
+        self.place_holders['type']      = tf.placeholder(tf.int32,   [None], name='t_type')
+        self.place_holders['natoms_vec']        = tf.placeholder(tf.int32,   [self.ntypes+2], name='t_natoms')
+        self.place_holders['default_mesh']      = tf.placeholder(tf.int32,   [None], name='t_mesh')
+        self.place_holders['is_training']       = tf.placeholder(tf.bool)
+        self.model_pred\
+            = self.model.build (self.place_holders['coord'], 
+                                self.place_holders['type'], 
+                                self.place_holders['natoms_vec'], 
+                                self.place_holders['box'], 
+                                self.place_holders['default_mesh'],
+                                self.place_holders,
+                                self.frz_model,
+                                suffix = suffix,
+                                reuse = False)
+
+        self.l2_l, self.l2_more\
+            = self.loss.build (self.learning_rate,
+                               self.place_holders['natoms_vec'], 
+                               self.model_pred,
+                               self.place_holders,
+                               suffix = "test")
+
+        if self.mixed_prec is not None:
+            self.l2_l = tf.cast(self.l2_l, get_precision(self.mixed_prec['output_prec']))
+        log.info("built network")
+
+    def _build_training(self):
+        trainable_variables = tf.trainable_variables()
+        if self.run_opt.is_distrib:
+            if self.scale_lr_coef > 1.:
+                log.info('Scale learning rate by coef: %f', self.scale_lr_coef)
+                optimizer = tf.train.AdamOptimizer(self.learning_rate*self.scale_lr_coef)
+            else:
+                optimizer = tf.train.AdamOptimizer(self.learning_rate)
+            optimizer = self.run_opt._HVD.DistributedOptimizer(optimizer)
+        else:
+            optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
+        if self.mixed_prec is not None:
+            _TF_VERSION = Version(TF_VERSION)
+            # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
+            if _TF_VERSION < Version('1.14.0'):
+                raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!" % TF_VERSION)
+            elif _TF_VERSION < Version('2.4.0'):
+                optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
+            else:
+                optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(optimizer)
+        apply_op = optimizer.minimize(loss=self.l2_l,
+                                      global_step=self.global_step,
+                                      var_list=trainable_variables,
+                                      name='train_step')
+        train_ops = [apply_op] + self._extra_train_ops
+        self.train_op = tf.group(*train_ops)
+        log.info("built training")
+
+    def _init_session(self):
+        config = get_tf_session_config()
+        device, idx = self.run_opt.my_device.split(":", 1)
+        if device == "gpu":
+            config.gpu_options.visible_device_list = idx
+        self.sess = tf.Session(config=config)
+
+        # Initializes or restore global variables
+        init_op = tf.global_variables_initializer()
+        if self.run_opt.is_chief:
+            self.saver = tf.train.Saver(save_relative_paths=True)
+            if self.run_opt.init_mode == 'init_from_scratch' :
+                log.info("initialize model from scratch")
+                run_sess(self.sess, init_op)
+                if not self.is_compress:
+                    fp = open(self.disp_file, "w")
+                    fp.close ()
+            elif self.run_opt.init_mode == 'init_from_model' :
+                log.info("initialize from model %s" % self.run_opt.init_model)
+                run_sess(self.sess, init_op)
+                self.saver.restore (self.sess, self.run_opt.init_model)            
+                run_sess(self.sess, self.global_step.assign(0))
+                fp = open(self.disp_file, "w")
+                fp.close ()
+            elif self.run_opt.init_mode == 'restart' :
+                log.info("restart from model %s" % self.run_opt.restart)
+                run_sess(self.sess, init_op)
+                self.saver.restore (self.sess, self.run_opt.restart)
+            elif self.run_opt.init_mode == 'init_from_frz_model' :
+                log.info("initialize training from the frozen model")
+                run_sess(self.sess, init_op)
+                fp = open(self.disp_file, "w")
+                fp.close ()
+            else :
+                raise RuntimeError ("unkown init mode")
+        else:
+            run_sess(self.sess, init_op)
+            self.saver = None
+
+        # Ensure variable consistency among tasks when training starts
+        if self.run_opt.is_distrib:
+            bcast_op = self.run_opt._HVD.broadcast_global_variables(0)
+            if self.run_opt.is_chief:
+                log.info('broadcast global variables to other tasks')
+            else:
+                log.info('receive global variables from task#0')
+            run_sess(self.sess, bcast_op)
+
+    def train (self, train_data = None, valid_data=None) :
+
+        # if valid_data is None:  # no validation set specified.
+        #     valid_data = train_data  # using training set as validation set.
+
+        stop_batch = self.stop_batch
+        self._init_session()
+
+        # Before data shard is enabled, only cheif do evaluation and record it
+        # self.print_head()
+        fp = None
+        if self.run_opt.is_chief :
+            fp = open(self.disp_file, "a")
+
+        cur_batch = run_sess(self.sess, self.global_step)
+        is_first_step = True
+        self.cur_batch = cur_batch
+        log.info("start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % 
+                 (run_sess(self.sess, self.learning_rate),
+                  self.lr.value(cur_batch), 
+                  self.lr.decay_steps_,
+                  self.lr.decay_rate_,
+                  self.lr.value(stop_batch)) 
+        )
+
+        prf_options = None
+        prf_run_metadata = None
+        if self.profiling:
+            prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+            prf_run_metadata = tf.RunMetadata()
+
+        # set tensorboard execution environment
+        if self.tensorboard:
+            summary_merged_op = tf.summary.merge_all()
+            # Remove TB old logging directory from previous run
+            try:
+                shutil.rmtree(self.tensorboard_log_dir)
+            except FileNotFoundError:
+                pass  # directory does not exist, this is OK
+            except Exception as e:
+                # general error when removing directory, warn user
+                log.exception(
+                    f"Could not remove old tensorboard logging directory: "
+                    f"{self.tensorboard_log_dir}. Error: {e}"
+                )
+            else:
+                log.debug("Removing old tensorboard log directory.")
+            tb_train_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/train', self.sess.graph)
+            tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/test')
+        else:
+            tb_train_writer = None
+            tb_valid_writer = None
+        if self.enable_profiler:
+            # https://www.tensorflow.org/guide/profiler
+            tfv2.profiler.experimental.start(self.tensorboard_log_dir)
+        
+        train_time = 0
+
+        while cur_batch < stop_batch :
+
+            # first round validation:
+            train_batch = train_data.get_batch()
+            if self.display_in_training and is_first_step:
+                if self.run_opt.is_chief:
+                    valid_batches = [valid_data.get_batch() for ii in range(self.valid_numb_batch)] if valid_data is not None else None
+                    self.valid_on_the_fly(fp, [train_batch], valid_batches, print_header=True)
+                is_first_step = False
+
+            if self.timing_in_training: tic = time.time()
+            train_feed_dict = self.get_feed_dict(train_batch, is_training=True)
+            # use tensorboard to visualize the training of deepmd-kit
+            # it will takes some extra execution time to generate the tensorboard data
+            if self.tensorboard and (cur_batch % self.tensorboard_freq == 0):
+                summary, _ = run_sess(self.sess, [summary_merged_op, self.train_op], feed_dict=train_feed_dict,
+                                           options=prf_options, run_metadata=prf_run_metadata)
+                tb_train_writer.add_summary(summary, cur_batch)
+            else:
+                run_sess(self.sess, [self.train_op], feed_dict=train_feed_dict,
+                              options=prf_options, run_metadata=prf_run_metadata)
+            if self.timing_in_training: toc = time.time()
+            if self.timing_in_training: train_time += toc - tic
+            cur_batch = run_sess(self.sess, self.global_step)
+            self.cur_batch = cur_batch
+
+            # on-the-fly validation
+            if self.display_in_training and (cur_batch % self.disp_freq == 0):
+                if self.timing_in_training:
+                    tic = time.time()
+                if self.run_opt.is_chief:
+                    valid_batches = [valid_data.get_batch() for ii in range(self.valid_numb_batch)] if valid_data is not None else None
+                    self.valid_on_the_fly(fp, [train_batch], valid_batches)
+                if self.timing_in_training:
+                    toc = time.time()
+                    test_time = toc - tic
+                    log.info("batch %7d training time %.2f s, testing time %.2f s"
+                                  % (cur_batch, train_time, test_time))
+                    train_time = 0
+                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.saver is not None:
+                    self.save_checkpoint(cur_batch)
+        if (self.save_freq == 0 or cur_batch == 0 or cur_batch % self.save_freq != 0) and self.saver is not None:
+            self.save_checkpoint(cur_batch)
+        if self.run_opt.is_chief: 
+            fp.close ()
+        if self.profiling and self.run_opt.is_chief :
+            fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
+            chrome_trace = fetched_timeline.generate_chrome_trace_format()
+            with open(self.profiling_file, 'w') as f:
+                f.write(chrome_trace)
+        if self.enable_profiler and self.run_opt.is_chief:
+            tfv2.profiler.experimental.stop()
+
+    def save_checkpoint(self, cur_batch: int):
+        try:
+            ckpt_prefix = self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt), global_step=cur_batch)
+        except google.protobuf.message.DecodeError as e:
+            raise GraphTooLargeError(
+                "The graph size exceeds 2 GB, the hard limitation of protobuf."
+                " Then a DecodeError was raised by protobuf. You should "
+                "reduce the size of your model."
+            ) from e
+        # make symlinks from prefix with step to that without step to break nothing
+        # get all checkpoint files
+        original_files = glob.glob(ckpt_prefix + ".*")
+        for ori_ff in original_files:
+            new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix):]
+            try:
+                # remove old one
+                os.remove(new_ff)
+            except OSError:
+                pass
+            if platform.system() != 'Windows':
+                # by default one does not have access to create symlink on Windows
+                os.symlink(ori_ff, new_ff)
+            else:
+                shutil.copyfile(ori_ff, new_ff)
+        log.info("saved checkpoint %s" % self.save_ckpt)
+
+    def get_feed_dict(self, batch, is_training):
+        feed_dict = {}
+        for kk in batch.keys():
+            if kk == 'find_type' or kk == 'type' or kk == 'real_natoms_vec':
+                continue
+            if 'find_' in kk:
+                feed_dict[self.place_holders[kk]] = batch[kk]
+            else:
+                feed_dict[self.place_holders[kk]] = np.reshape(batch[kk], [-1])
+        for ii in ['type']:
+            feed_dict[self.place_holders[ii]] = np.reshape(batch[ii], [-1])
+        for ii in ['natoms_vec', 'default_mesh']:
+            feed_dict[self.place_holders[ii]] = batch[ii]
+        feed_dict[self.place_holders['is_training']] = is_training
+        return feed_dict
+
+    def get_global_step(self):
+        return run_sess(self.sess, self.global_step)
+
+    # def print_head (self) :  # depreciated
+    #     if self.run_opt.is_chief:
+    #         fp = open(self.disp_file, "a")
+    #         print_str = "# %5s" % 'batch'
+    #         print_str += self.loss.print_header()
+    #         print_str += '   %8s\n' % 'lr'
+    #         fp.write(print_str)
+    #         fp.close ()
+
+    def valid_on_the_fly(self,
+                         fp,
+                         train_batches,
+                         valid_batches,
+                         print_header=False):
+        train_results = self.get_evaluation_results(train_batches)
+        valid_results = self.get_evaluation_results(valid_batches)
+
+        cur_batch = self.cur_batch
+        current_lr = run_sess(self.sess, self.learning_rate)
+        if print_header:
+            self.print_header(fp, train_results, valid_results)
+        self.print_on_training(fp, train_results, valid_results, cur_batch, current_lr)
+
+    @staticmethod
+    def print_header(fp, train_results, valid_results):
+        print_str = ''
+        print_str += "# %5s" % 'step'
+        if valid_results is not None:
+            prop_fmt =  '   %11s %11s'
+            for k in train_results.keys():
+                print_str += prop_fmt % (k + '_val', k + '_trn')
+        else:
+            prop_fmt = '   %11s'
+            for k in train_results.keys():
+                print_str += prop_fmt % (k + '_trn')
+        print_str += '   %8s\n' % 'lr'
+        fp.write(print_str)
+        fp.flush()
+
+    @staticmethod
+    def print_on_training(fp, train_results, valid_results, cur_batch, cur_lr):
+        print_str = ''
+        print_str += "%7d" % cur_batch
+        if valid_results is not None:
+            prop_fmt = "   %11.2e %11.2e"
+            for k in valid_results.keys():
+                # assert k in train_results.keys()
+                print_str += prop_fmt % (valid_results[k], train_results[k])
+        else:
+            prop_fmt = "   %11.2e"
+            for k in train_results.keys():
+                print_str += prop_fmt % (train_results[k])
+        print_str += "   %8.1e\n" % cur_lr
+        fp.write(print_str)
+        fp.flush()
+
+    def get_evaluation_results(self, batch_list):
+        if batch_list is None: return None
+        numb_batch = len(batch_list)
+
+        sum_results = {}    # sum of losses on all atoms
+        sum_natoms = 0
+        for i in range(numb_batch):
+            batch = batch_list[i]
+            natoms = batch["natoms_vec"]
+            feed_dict = self.get_feed_dict(batch, is_training=False)
+            results = self.loss.eval(self.sess, feed_dict, natoms)
+
+            for k, v in results.items():
+                if k == "natoms":
+                    sum_natoms += v
+                else:
+                    sum_results[k] = sum_results.get(k, 0.) + v * results["natoms"]
+        avg_results = {k: v / sum_natoms for k, v in sum_results.items() if not k == "natoms"}
+        return avg_results
+    
+    def save_compressed(self):
+        """
+        Save the compressed graph
+        """
+        self._init_session()
+        if self.is_compress:
+            self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt))
+
+    def _get_place_horders(self, data_dict):
+        for kk in data_dict.keys():
+            if kk == 'type':
+                continue
+            prec = GLOBAL_TF_FLOAT_PRECISION
+            if data_dict[kk]['high_prec'] :
+                prec = GLOBAL_ENER_FLOAT_PRECISION
+            self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk)
+            self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk)
+
+    def _init_from_frz_model(self):
+        try:
+            graph, graph_def = load_graph_def(self.run_opt.init_frz_model)
+        except FileNotFoundError as e:
+            # throw runtime error if there's no frozen model
+            raise RuntimeError(
+                "The input frozen model %s (%s) does not exist! Please check the path of the frozen model. " % (self.run_opt.init_frz_model, os.path.abspath(self.run_opt.init_frz_model))
+            ) from e
+        # get the model type from the frozen model(self.run_opt.init_frz_model)
+        try:
+            t_model_type = get_tensor_by_name_from_graph(graph, 'model_type')
+        except GraphWithoutTensorError as e:
+            # throw runtime error if the frozen_model has no model type information...
+            raise RuntimeError(
+                "The input frozen model: %s has no 'model_type' information, "
+                "which is not supported by the 'dp train init-frz-model' interface. " % self.run_opt.init_frz_model
+            ) from e
+        else:
+            self.model_type = bytes.decode(t_model_type)
+        if self.model_type == 'compressed_model':
+            self.frz_model = self.run_opt.init_frz_model
+        self.model.init_variables(graph, graph_def, model_type=self.model_type)
--- a/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/utils/__init__.py
+++ b/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/utils/__init__.py
+#
+from .data import DeepmdData
+from .data_system import DeepmdDataSystem
+
+# out-of-dated
+from .data import DataSets
+from .data_system import DataSystem
+from .pair_tab import PairTab
+from .learning_rate import LearningRateExp
+from .plugin import Plugin, PluginVariant
--- a/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/utils/argcheck.py
+++ b/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/utils/argcheck.py
+from typing import List, Callable
+
+from dargs import dargs, Argument, Variant, ArgumentEncoder
+from deepmd import descriptor
+from deepmd.common import ACTIVATION_FN_DICT, PRECISION_DICT
+from deepmd.utils.plugin import Plugin
+import json
+
+from deepmd.nvnmd.utils.argcheck import nvnmd_args
+
+def list_to_doc(xx):
+    items = []
+    for ii in xx:
+        if len(items) == 0:
+            items.append(f'"{ii}"')
+        else:
+            items.append(f', "{ii}"')
+    items.append('.')
+    return ''.join(items)
+
+
+def make_link(content, ref_key):
+    return f'`{content} <{ref_key}_>`_' if not dargs.RAW_ANCHOR \
+        else f'`{content} <#{ref_key}>`_'
+
+
+def type_embedding_args():
+    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_seed = 'Random seed for parameter initialization'
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_trainable = 'If the parameters in the embedding net are trainable'
+    
+    return [
+        Argument("neuron", list, optional = True, default = [8], doc = doc_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
+        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
+        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
+        Argument("seed", [int,None], optional = True, default = None, doc = doc_seed),
+    ]        
+
+
+#  --- Descriptor configurations: --- #
+
+class ArgsPlugin:
+    def __init__(self) -> None:
+        self.__plugin = Plugin()
+
+    def register(self, name : str, alias : List[str] = None) -> Callable[[], List[Argument]]:
+        """Regiester a descriptor argument plugin.
+        
+        Parameters
+        ----------
+        name : str
+            the name of a descriptor
+        alias : List[str], optional
+            the list of aliases of this descriptor
+
+        Returns
+        -------
+        Callable[[], List[Argument]]
+            the regiestered descriptor argument method
+        
+        Examples
+        --------
+        >>> some_plugin = ArgsPlugin()
+        >>> @some_plugin.register("some_descrpt")
+            def descrpt_some_descrpt_args():
+                return []
+        """
+        # convert alias to hashed item
+        if isinstance(alias, list):
+            alias = tuple(alias)
+        return self.__plugin.register((name, alias))
+
+    def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
+        """Get all arguments.
+
+        Parameters
+        ----------
+        exclude_hybrid : bool
+            exclude hybrid descriptor to prevent circular calls
+        
+        Returns
+        -------
+        List[Argument]
+            all arguments
+        """
+        arguments = []
+        for (name, alias), metd in self.__plugin.plugins.items():
+            if exclude_hybrid and name == "hybrid":
+                continue
+            arguments.append(Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias))
+        return arguments
+
+
+descrpt_args_plugin = ArgsPlugin()
+
+@descrpt_args_plugin.register("loc_frame")
+def descrpt_local_frame_args ():
+    doc_sel_a = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor.'
+    doc_sel_r = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius.'
+    doc_rcut = 'The cut-off radius. The default value is 6.0'
+    doc_axis_rule = 'A list of integers. The length should be 6 times of the number of types. \n\n\
+- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
+- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
+- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
+- axis_rule[i*6+3]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
+- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
+- axis_rule[i*6+5]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.'
+    
+    return [
+        Argument("sel_a", list, optional = False, doc = doc_sel_a),
+        Argument("sel_r", list, optional = False, doc = doc_sel_r),
+        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
+        Argument("axis_rule", list, optional = False, doc = doc_axis_rule)
+    ]
+
+
+@descrpt_args_plugin.register("se_e2_a", alias=["se_a"])
+def descrpt_se_a_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = 'The cut-off radius.'
+    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
+    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_axis_neuron = 'Size of the submatrix of G (embedding matrix).'
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = 'Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets'
+    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_trainable = 'If the parameters in the embedding net is trainable'
+    doc_seed = 'Random seed for parameter initialization'
+    doc_exclude_types = 'The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.'
+    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
+    
+    return [
+        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
+        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
+        Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
+        Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
+        Argument("axis_neuron", int, optional = True, default = 4, alias = ['n_axis_neuron'], doc = doc_axis_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
+        Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
+        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
+        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
+        Argument("seed", [int,None], optional = True, doc = doc_seed),
+        Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
+        Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
+    ]
+
+
+@descrpt_args_plugin.register("se_e3", alias=['se_at', 'se_a_3be', 'se_t'])
+def descrpt_se_t_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = 'The cut-off radius.'
+    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
+    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_trainable = 'If the parameters in the embedding net are trainable'
+    doc_seed = 'Random seed for parameter initialization'
+    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
+    
+    return [
+        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
+        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
+        Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
+        Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
+        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
+        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
+        Argument("seed", [int,None], optional = True, doc = doc_seed),
+        Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
+    ]
+
+
+
+@descrpt_args_plugin.register("se_a_tpe", alias=['se_a_ebd'])
+def descrpt_se_a_tpe_args():
+    doc_type_nchanl = 'number of channels for type embedding'
+    doc_type_nlayer = 'number of hidden layers of type embedding net'
+    doc_numb_aparam = 'dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded.'
+
+    return descrpt_se_a_args() + [        
+        Argument("type_nchanl", int, optional = True, default = 4, doc = doc_type_nchanl),
+        Argument("type_nlayer", int, optional = True, default = 2, doc = doc_type_nlayer),
+        Argument("numb_aparam", int, optional = True, default = 0, doc = doc_numb_aparam)
+    ]
+
+
+@descrpt_args_plugin.register("se_e2_r", alias=['se_r'])
+def descrpt_se_r_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = 'The cut-off radius.'
+    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
+    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = 'Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets'
+    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_trainable = 'If the parameters in the embedding net are trainable'
+    doc_seed = 'Random seed for parameter initialization'
+    doc_exclude_types = 'The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.'
+    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
+    
+    return [
+        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
+        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
+        Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
+        Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
+        Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
+        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
+        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
+        Argument("seed", [int,None], optional = True, doc = doc_seed),
+        Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
+        Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
+    ]
+
+
+@descrpt_args_plugin.register("hybrid")
+def descrpt_hybrid_args():
+    doc_list = f'A list of descriptor definitions'
+    
+    return [
+        Argument("list", list, optional = False, doc = doc_list)
+    ]
+
+
+@descrpt_args_plugin.register("se_atten")
+def descrpt_se_atten_args():
+    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = 'The cut-off radius.'
+    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
+    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_axis_neuron = 'Size of the submatrix of G (embedding matrix).'
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = 'Whether to consider the information from only one side or both sides.'
+    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_trainable = 'If the parameters in the embedding net is trainable'
+    doc_seed = 'Random seed for parameter initialization'
+    doc_exclude_types = 'The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.'
+    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
+    doc_attn = 'The length of hidden vectors in attention layers'
+    doc_attn_layer = 'The number of attention layers'
+    doc_attn_dotr = 'Whether to do dot product with the normalized relative coordinates'
+    doc_attn_mask = 'Whether to do mask on the diagonal in the attention matrix'
+
+    return [
+        Argument("sel", [int, list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument("axis_neuron", int, optional=True, default=4, alias=['n_axis_neuron'], doc=doc_axis_neuron),
+        Argument("activation_function", str, optional=True, default='tanh', doc=doc_activation_function),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("type_one_side", bool, optional=True, default=False, doc=doc_type_one_side),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument("exclude_types", list, optional=True, default=[], doc=doc_exclude_types),
+        Argument("set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero),
+        Argument("attn", int, optional=True, default=128, doc=doc_attn),
+        Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
+        Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
+        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask)
+    ]
+
+def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
+    link_lf = make_link('loc_frame', 'model/descriptor[loc_frame]')
+    link_se_e2_a = make_link('se_e2_a', 'model/descriptor[se_e2_a]')
+    link_se_e2_r = make_link('se_e2_r', 'model/descriptor[se_e2_r]')
+    link_se_e3 = make_link('se_e3', 'model/descriptor[se_e3]')
+    link_se_a_tpe = make_link('se_a_tpe', 'model/descriptor[se_a_tpe]')
+    link_hybrid = make_link('hybrid', 'model/descriptor[hybrid]')
+    link_se_atten = make_link('se_atten', 'model/descriptor[se_atten]')
+    doc_descrpt_type = f'The type of the descritpor. See explanation below. \n\n\
+- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
+- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
+- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
+- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
+- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
+- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
+- `hybrid`: Concatenate of a list of descriptors as a new descriptor.'
+    
+    return Variant("type", descrpt_args_plugin.get_all_argument(), doc = doc_descrpt_type)
+
+
+#  --- Fitting net configurations: --- #
+def fitting_ener():
+    doc_numb_fparam = 'The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams.'
+    doc_numb_aparam = 'The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams.'
+    doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.'
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_trainable = 'Whether the parameters in the fitting net are trainable. This option can be\n\n\
+- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1.'
+    doc_rcond = 'The condition number used to determine the inital energy shift for each type of atoms.'
+    doc_seed = 'Random seed for parameter initialization of the fitting net'
+    doc_atom_ener = 'Specify the atomic energy in vacuum for each type'
+
+    return [
+        Argument("numb_fparam", int, optional = True, default = 0, doc = doc_numb_fparam),
+        Argument("numb_aparam", int, optional = True, default = 0, doc = doc_numb_aparam),
+        Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("precision", str, optional = True, default = 'default', doc = doc_precision),
+        Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
+        Argument("trainable", [list,bool], optional = True, default = True, doc = doc_trainable),
+        Argument("rcond", float, optional = True, default = 1e-3, doc = doc_rcond),
+        Argument("seed", [int,None], optional = True, doc = doc_seed),
+        Argument("atom_ener", list, optional = True, default = [], doc = doc_atom_ener)
+    ]
+
+
+def fitting_polar():
+    doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.'
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_scale = 'The output of the fitting net (polarizability matrix) will be scaled by ``scale``'
+    #doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
+    doc_fit_diag = 'Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix.'
+    doc_sel_type = 'The atom types for which the atomic polarizability will be provided. If not set, all types will be selected.'
+    doc_seed = 'Random seed for parameter initialization of the fitting net'
+    
+    # YWolfeee: user can decide whether to use shift diag
+    doc_shift_diag = 'Whether to shift the diagonal of polar, which is beneficial to training. Default is true.'
+
+    return [
+        Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
+        Argument("precision", str, optional = True, default = 'default', doc = doc_precision),
+        Argument("fit_diag", bool, optional = True, default = True, doc = doc_fit_diag),
+        Argument("scale", [list,float], optional = True, default = 1.0, doc = doc_scale),
+        #Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
+        Argument("shift_diag", bool, optional = True, default = True, doc = doc_shift_diag),
+        Argument("sel_type", [list,int,None], optional = True, alias = ['pol_type'], doc = doc_sel_type),
+        Argument("seed", [int,None], optional = True, doc = doc_seed)
+    ]
+
+
+#def fitting_global_polar():
+#    return fitting_polar()
+
+
+def fitting_dipole():
+    doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.'
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_sel_type = 'The atom types for which the atomic dipole will be provided. If not set, all types will be selected.'
+    doc_seed = 'Random seed for parameter initialization of the fitting net'
+    return [
+        Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
+        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
+        Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
+        Argument("precision", str, optional = True, default = 'default', doc = doc_precision),
+        Argument("sel_type", [list,int,None], optional = True, alias = ['dipole_type'], doc = doc_sel_type),
+        Argument("seed", [int,None], optional = True, doc = doc_seed)
+    ]    
+
+#   YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support.
+def fitting_variant_type_args():
+    doc_descrpt_type = 'The type of the fitting. See explanation below. \n\n\
+- `ener`: Fit an energy model (potential energy surface).\n\n\
+- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
+- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n'
+
+    return Variant("type", [Argument("ener", dict, fitting_ener()),
+                            Argument("dipole", dict, fitting_dipole()),
+                            Argument("polar", dict, fitting_polar()),
+                            ], 
+                   optional = True,
+                   default_tag = 'ener',
+                   doc = doc_descrpt_type)
+
+
+#  --- Modifier configurations: --- #
+def modifier_dipole_charge():
+    doc_model_name = "The name of the frozen dipole model file."
+    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. "
+    doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
+    doc_ewald_h = f"The grid spacing of the FFT grid. Unit is A"
+    doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
+    
+    return [
+        Argument("model_name", str, optional = False, doc = doc_model_name),
+        Argument("model_charge_map", list, optional = False, doc = doc_model_charge_map),
+        Argument("sys_charge_map", list, optional = False, doc = doc_sys_charge_map),
+        Argument("ewald_beta", float, optional = True, default = 0.4, doc = doc_ewald_beta),
+        Argument("ewald_h", float, optional = True, default = 1.0, doc = doc_ewald_h),        
+    ]
+
+
+def modifier_variant_type_args():
+    doc_modifier_type = "The type of modifier. See explanation below.\n\n\
+-`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction"
+    return Variant("type", 
+                   [
+                       Argument("dipole_charge", dict, modifier_dipole_charge()),
+                   ],
+                   optional = False,
+                   doc = doc_modifier_type)
+
+#  --- model compression configurations: --- #
+def model_compression():
+    doc_model_file = f"The input model file, which will be compressed by the DeePMD-kit."
+    doc_table_config = f"The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)."
+    doc_min_nbor_dist = f"The nearest distance between neighbor atoms saved in the frozen model."
+    
+    return [
+        Argument("model_file", str, optional = False, doc = doc_model_file),
+        Argument("table_config", list, optional = False, doc = doc_table_config),
+        Argument("min_nbor_dist", float, optional = False, doc = doc_min_nbor_dist),
+    ]
+
+#  --- model compression configurations: --- #
+def model_compression_type_args():
+    doc_compress_type = "The type of model compression, which should be consistent with the descriptor type."
+    
+    return Variant("type", [
+            Argument("se_e2_a", dict, model_compression(), alias = ['se_a'])
+        ],
+        optional = True,
+        default_tag = 'se_e2_a',
+        doc = doc_compress_type)
+
+
+def model_args ():    
+    doc_type_map = 'A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment.'
+    doc_data_stat_nbatch = 'The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics.'
+    doc_data_stat_protect = 'Protect parameter for atomic energy regression.'
+    doc_type_embedding = "The type embedding."
+    doc_descrpt = 'The descriptor of atomic environment.'
+    doc_fitting = 'The fitting of physical properties.'
+    doc_modifier = 'The modifier of model output.'
+    doc_use_srtab = 'The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.'
+    doc_smin_alpha = 'The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.'
+    doc_sw_rmin = 'The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.'
+    doc_sw_rmax = 'The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.'
+    doc_compress_config = 'Model compression configurations'
+
+    ca = Argument("model", dict, 
+                  [Argument("type_map", list, optional = True, doc = doc_type_map),
+                   Argument("data_stat_nbatch", int, optional = True, default = 10, doc = doc_data_stat_nbatch),
+                   Argument("data_stat_protect", float, optional = True, default = 1e-2, doc = doc_data_stat_protect),
+                   Argument("use_srtab", str, optional = True, doc = doc_use_srtab),
+                   Argument("smin_alpha", float, optional = True, doc = doc_smin_alpha),
+                   Argument("sw_rmin", float, optional = True, doc = doc_sw_rmin),
+                   Argument("sw_rmax", float, optional = True, doc = doc_sw_rmax),
+                   Argument("type_embedding", dict, type_embedding_args(), [], optional = True, doc = doc_type_embedding),
+                   Argument("descriptor", dict, [], [descrpt_variant_type_args()], doc = doc_descrpt),
+                   Argument("fitting_net", dict, [], [fitting_variant_type_args()], doc = doc_fitting),
+                   Argument("modifier", dict, [], [modifier_variant_type_args()], optional = True, doc = doc_modifier),
+                   Argument("compress", dict, [], [model_compression_type_args()], optional = True, doc = doc_compress_config)
+                  ])
+    # print(ca.gen_doc())
+    return ca
+
+
+#  --- Learning rate configurations: --- #
+def learning_rate_exp():
+    doc_start_lr = 'The learning rate the start of the training.'
+    doc_stop_lr = 'The desired learning rate at the end of the training.'
+    doc_decay_steps = 'The learning rate is decaying every this number of training steps.'
+    
+    args =  [
+        Argument("start_lr", float, optional = True, default = 1e-3, doc = doc_start_lr),
+        Argument("stop_lr", float, optional = True, default = 1e-8, doc = doc_stop_lr),
+        Argument("decay_steps", int, optional = True, default = 5000, doc = doc_decay_steps)
+    ]
+    return args
+    
+
+def learning_rate_variant_type_args():
+    doc_lr = 'The type of the learning rate.'
+
+    return Variant("type", 
+                   [Argument("exp", dict, learning_rate_exp())],
+                   optional = True,
+                   default_tag = 'exp',
+                   doc = doc_lr)
+
+
+def learning_rate_args():
+    doc_scale_by_worker = 'When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`.'
+    doc_lr = "The definitio of learning rate" 
+    return Argument("learning_rate", dict,
+                    [Argument("scale_by_worker", str, optional=True, default='linear', doc=doc_scale_by_worker)],
+                    [learning_rate_variant_type_args()],
+                    doc = doc_lr)
+
+
+#  --- Loss configurations: --- #
+def start_pref(item):
+    return f'The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {item} label should be provided by file {item}.npy in each data system. If both start_pref_{item} and limit_pref_{item} are set to 0, then the {item} will be ignored.'
+
+
+def limit_pref(item):
+    return f'The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.'
+
+
+def loss_ener():
+    doc_start_pref_e = start_pref('energy')
+    doc_limit_pref_e = limit_pref('energy')
+    doc_start_pref_f = start_pref('force')
+    doc_limit_pref_f = limit_pref('force')
+    doc_start_pref_v = start_pref('virial')
+    doc_limit_pref_v = limit_pref('virial')
+    doc_start_pref_ae = start_pref('atom_ener')
+    doc_limit_pref_ae = limit_pref('atom_ener')
+    doc_start_pref_pf = start_pref('atom_pref')
+    doc_limit_pref_pf = limit_pref('atom_pref')
+    doc_relative_f = 'If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label.'
+    doc_enable_atom_ener_coeff = "If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
+    return [
+        Argument("start_pref_e", [float,int], optional = True, default = 0.02, doc = doc_start_pref_e),
+        Argument("limit_pref_e", [float,int], optional = True, default = 1.00, doc = doc_limit_pref_e),
+        Argument("start_pref_f", [float,int], optional = True, default = 1000, doc = doc_start_pref_f),
+        Argument("limit_pref_f", [float,int], optional = True, default = 1.00, doc = doc_limit_pref_f),
+        Argument("start_pref_v", [float,int], optional = True, default = 0.00, doc = doc_start_pref_v),
+        Argument("limit_pref_v", [float,int], optional = True, default = 0.00, doc = doc_limit_pref_v),
+        Argument("start_pref_ae", [float,int], optional = True, default = 0.00, doc = doc_start_pref_ae),
+        Argument("limit_pref_ae", [float,int], optional = True, default = 0.00, doc = doc_limit_pref_ae),
+        Argument("start_pref_pf", [float,int], optional = True, default = 0.00, doc = doc_start_pref_pf),
+        Argument("limit_pref_pf", [float,int], optional = True, default = 0.00, doc = doc_limit_pref_pf),
+        Argument("relative_f", [float,None], optional = True, doc = doc_relative_f),
+        Argument("enable_atom_ener_coeff", [bool], optional=True, default=False, doc=doc_enable_atom_ener_coeff),
+    ]
+
+# YWolfeee: Modified to support tensor type of loss args.
+def loss_tensor():
+    #doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]." 
+    #doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well." 
+    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included." 
+    doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0." 
+    return [
+        Argument("pref", [float,int], optional = False, default = None, doc = doc_global_weight),
+        Argument("pref_atomic", [float,int], optional = False, default = None, doc = doc_local_weight),
+    ]
+
+
+def loss_variant_type_args():
+    doc_loss = 'The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`. \n\.'
+
+    
+    return Variant("type", 
+                   [Argument("ener", dict, loss_ener()),
+                    Argument("tensor", dict, loss_tensor()),
+                    #Argument("polar", dict, loss_tensor()),
+                    #Argument("global_polar", dict, loss_tensor("global"))
+                    ],
+                   optional = True,
+                   default_tag = 'ener',
+                   doc = doc_loss)
+
+
+def loss_args():
+    doc_loss = 'The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset.\n\.'
+    ca = Argument('loss', dict, [], 
+                  [loss_variant_type_args()],
+                  optional = True,
+                  doc = doc_loss)
+    return ca
+
+
+#  --- Training configurations: --- #
+def training_data_args():  # ! added by Ziyao: new specification style for data systems.
+    link_sys = make_link("systems", "training/training_data/systems")
+    doc_systems = 'The data systems for training. ' \
+        'This key can be provided with a list that specifies the systems, or be provided with a string ' \
+        'by which the prefix of all systems are given and the list of the systems is automatically generated.'
+    doc_set_prefix = f'The prefix of the sets in the {link_sys}.'
+    doc_batch_size = f'This key can be \n\n\
+- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
+- int: all {link_sys} use the same batch size.\n\n\
+- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
+    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
+- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
+- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+    doc_sys_probs = "A list of float if specified. " \
+        "Should be of the same length as `systems`, " \
+        "specifying the probability of each system."
+
+
+    args = [
+        Argument("systems", [list, str], optional=False, default=".", doc=doc_systems),
+        Argument("set_prefix", str, optional=True, default='set', doc=doc_set_prefix),
+        Argument("batch_size", [list, int, str], optional=True, default='auto', doc=doc_batch_size),
+        Argument("auto_prob", str, optional=True, default="prob_sys_size",
+                 doc=doc_auto_prob_style, alias=["auto_prob_style",]),
+        Argument("sys_probs", list, optional=True, default=None, doc=doc_sys_probs, alias=["sys_weights"]),
+    ]
+
+    doc_training_data = "Configurations of training data."
+    return Argument("training_data", dict, optional=False,
+                    sub_fields=args, sub_variants=[], doc=doc_training_data)
+
+
+def validation_data_args():  # ! added by Ziyao: new specification style for data systems.
+    link_sys = make_link("systems", "training/validation_data/systems")
+    doc_systems = 'The data systems for validation. ' \
+                  'This key can be provided with a list that specifies the systems, or be provided with a string ' \
+                  'by which the prefix of all systems are given and the list of the systems is automatically generated.'
+    doc_set_prefix = f'The prefix of the sets in the {link_sys}.'
+    doc_batch_size = f'This key can be \n\n\
+- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
+- int: all {link_sys} use the same batch size.\n\n\
+- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
+    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
+- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
+- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+    doc_sys_probs = "A list of float if specified. " \
+                    "Should be of the same length as `systems`, " \
+                    "specifying the probability of each system."
+    doc_numb_btch = "An integer that specifies the number of systems to be sampled for each validation period."
+
+    args = [
+        Argument("systems", [list, str], optional=False, default=".", doc=doc_systems),
+        Argument("set_prefix", str, optional=True, default='set', doc=doc_set_prefix),
+        Argument("batch_size", [list, int, str], optional=True, default='auto', doc=doc_batch_size),
+        Argument("auto_prob", str, optional=True, default="prob_sys_size",
+                 doc=doc_auto_prob_style, alias=["auto_prob_style", ]),
+        Argument("sys_probs", list, optional=True, default=None, doc=doc_sys_probs, alias=["sys_weights"]),
+        Argument("numb_btch", int, optional=True, default=1, doc=doc_numb_btch, alias=["numb_batch", ])
+    ]
+
+    doc_validation_data = "Configurations of validation data. Similar to that of training data, " \
+                        "except that a `numb_btch` argument may be configured"
+    return Argument("validation_data", dict, optional=True, default=None,
+                    sub_fields=args, sub_variants=[], doc=doc_validation_data)
+
+
+def mixed_precision_args():  # ! added by Denghui.
+    doc_output_prec  = 'The precision for mixed precision params. " \
+        "The trainable variables precision during the mixed precision training process, " \
+        "supported options are float32 only currently.'
+    doc_compute_prec  = 'The precision for mixed precision compute. " \
+        "The compute precision during the mixed precision training process, "" \
+        "supported options are float16 only currently.'
+
+    args = [
+        Argument("output_prec", str, optional=True, default="float32", doc=doc_output_prec),
+        Argument("compute_prec", str, optional=False, default="float16", doc=doc_compute_prec),
+    ]
+
+    doc_mixed_precision = "Configurations of mixed precision."
+    return Argument("mixed_precision", dict, optional=True,
+                    sub_fields=args, sub_variants=[], doc=doc_mixed_precision)
+
+
+def training_args():  # ! modified by Ziyao: data configuration isolated.
+    doc_numb_steps = 'Number of training batch. Each training uses one batch of data.'
+    doc_seed = 'The random seed for getting frames from the training data set.'
+    doc_disp_file = 'The file for printing learning curve.'
+    doc_disp_freq = 'The frequency of printing learning curve.'
+    doc_save_freq = 'The frequency of saving check point.'
+    doc_save_ckpt = 'The file name of saving check point.'
+    doc_disp_training = 'Displaying verbose information during training.'
+    doc_time_training = 'Timing durining training.'
+    doc_profiling = 'Profiling during training.'
+    doc_profiling_file = 'Output file for profiling.'
+    doc_enable_profiler = 'Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`.'
+    doc_tensorboard = 'Enable tensorboard'
+    doc_tensorboard_log_dir = 'The log directory of tensorboard outputs'
+    doc_tensorboard_freq = 'The frequency of writing tensorboard events.'
+
+    arg_training_data = training_data_args()
+    arg_validation_data = validation_data_args()
+    mixed_precision_data = mixed_precision_args()
+
+    args = [
+        arg_training_data,
+        arg_validation_data,
+        mixed_precision_data,
+        Argument("numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]),
+        Argument("seed", [int,None], optional=True, doc=doc_seed),
+        Argument("disp_file", str, optional=True, default='lcurve.out', doc=doc_disp_file),
+        Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq),
+        Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq),
+        Argument("save_ckpt", str, optional=True, default='model.ckpt', doc=doc_save_ckpt),
+        Argument("disp_training", bool, optional=True, default=True, doc=doc_disp_training),
+        Argument("time_training", bool, optional=True, default=True, doc=doc_time_training),
+        Argument("profiling", bool, optional=True, default=False, doc=doc_profiling),
+        Argument("profiling_file", str, optional=True, default='timeline.json', doc=doc_profiling_file),
+        Argument("enable_profiler", bool, optional=True, default=False, doc=doc_enable_profiler),
+        Argument("tensorboard", bool, optional=True, default=False, doc=doc_tensorboard),
+        Argument("tensorboard_log_dir", str, optional=True, default='log', doc=doc_tensorboard_log_dir),
+        Argument("tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq),
+    ]
+
+    doc_training = 'The training options.'
+    return Argument("training", dict, args, [], doc = doc_training)
+
+
+def make_index(keys):
+    ret = []
+    for ii in keys:
+        ret.append(make_link(ii, ii))
+    return ', '.join(ret)
+
+
+def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
+    if make_link:
+        make_anchor = True
+    ma = model_args()
+    lra = learning_rate_args()
+    la = loss_args()
+    ta = training_args()
+    nvnmda = nvnmd_args()
+    ptr = []
+    ptr.append(ma.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+    ptr.append(la.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+    ptr.append(lra.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+    ptr.append(ta.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+    ptr.append(nvnmda.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+
+    key_words = []
+    for ii in "\n\n".join(ptr).split('\n'):
+        if 'argument path' in ii:
+            key_words.append(ii.split(':')[1].replace('`','').strip())
+    #ptr.insert(0, make_index(key_words))
+
+    return "\n\n".join(ptr)
+
+def gen_json(**kwargs):
+    return json.dumps((
+        model_args(),
+        learning_rate_args(),
+        loss_args(),
+        training_args(),
+        nvnmd_args(),
+    ), cls=ArgumentEncoder)
+
+def normalize_hybrid_list(hy_list):
+    new_list = []
+    base = Argument("base", dict, [], [descrpt_variant_type_args()], doc = "")
+    for ii in range(len(hy_list)):
+        data = base.normalize_value(hy_list[ii], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_list.append(data)
+    return new_list
+
+
+def normalize(data):
+    if "hybrid" == data["model"]["descriptor"]["type"]:
+        data["model"]["descriptor"]["list"] \
+            = normalize_hybrid_list(data["model"]["descriptor"]["list"])
+
+    ma = model_args()
+    lra = learning_rate_args()
+    la = loss_args()
+    ta = training_args()
+    nvnmda = nvnmd_args()
+
+    base = Argument("base", dict, [ma, lra, la, ta, nvnmda])
+    data = base.normalize_value(data, trim_pattern="_*")
+    base.check_value(data, strict=True)
+
+    return data
+
+
+if __name__ == '__main__':
+    gen_doc()
+
--- a/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/utils/batch_size.py
+++ b/_skbuild/linux-x86_64-3.6/setuptools/lib/deepmd/utils/batch_size.py
+import logging
+from typing import Callable, Tuple
+
+import numpy as np
+
+from deepmd.utils.errors import OutOfMemoryError
+
+class AutoBatchSize:
+    """This class allows DeePMD-kit to automatically decide the maximum
+    batch size that will not cause an OOM error.
+
+    Notes
+    -----
+    We assume all OOM error will raise :class:`OutOfMemoryError`.
+
+    Parameters
+    ----------
+    initial_batch_size : int, default: 1024
+        initial batch size (number of total atoms)
+    factor : float, default: 2.
+        increased factor
+
+    Attributes
+    ----------
+    current_batch_size : int
+        current batch size (number of total atoms)
+    maximum_working_batch_size : int
+        maximum working batch size 
+    minimal_not_working_batch_size : int
+        minimal not working batch size
+    """
+    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.) -> None:
+        # See also PyTorchLightning/pytorch-lightning#1638
+        # TODO: discuss a proper initial batch size
+        self.current_batch_size = initial_batch_size
+        self.maximum_working_batch_size = 0
+        self.minimal_not_working_batch_size = 2**31
+        self.factor = factor
+
+    def execute(self, callable: Callable, start_index: int, natoms: int) -> Tuple[int, tuple]:
+        """Excuate a method with given batch size.
+        
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept the batch size and start_index as parameters,
+            and returns executed batch size and data.
+        start_index : int
+            start index
+        natoms : int
+            natoms
+        
+        Returns
+        -------
+        int
+            executed batch size * number of atoms
+        tuple
+            result from callable, None if failing to execute
+
+        Raises
+        ------
+        OutOfMemoryError
+            OOM when batch size is 1
+        """
+        try:
+            n_batch, result = callable(max(self.current_batch_size // natoms, 1), start_index)
+        except OutOfMemoryError as e:
+            # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
+            # but luckily we only need to catch once
+            self.minimal_not_working_batch_size = min(self.minimal_not_working_batch_size, self.current_batch_size)
+            if self.maximum_working_batch_size >= self.minimal_not_working_batch_size:
+                self.maximum_working_batch_size = int(self.minimal_not_working_batch_size / self.factor)
+            if self.minimal_not_working_batch_size <= natoms:
+                raise OutOfMemoryError("The callable still throws an out-of-memory (OOM) error even when batch size is 1!") from e
+            # adjust the next batch size
+            self._adjust_batch_size(1./self.factor)
+            return 0, None
+        else:
+            n_tot = n_batch * natoms
+            self.maximum_working_batch_size = max(self.maximum_working_batch_size, n_tot)
+            # adjust the next batch size
+            if n_tot + natoms > self.current_batch_size and self.current_batch_size * self.factor < self.minimal_not_working_batch_size:
+                self._adjust_batch_size(self.factor)
+            return n_batch, result
+
+    def _adjust_batch_size(self, factor: float):
+        old_batch_size = self.current_batch_size
+        self.current_batch_size = int(self.current_batch_size * factor)
+        logging.info("Adjust batch size from %d to %d" % (old_batch_size, self.current_batch_size))
+
+    def execute_all(self, callable: Callable, total_size: int, natoms: int, *args, **kwargs) -> Tuple[np.ndarray]:
+        """Excuate a method with all given data. 
+        
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept *args and **kwargs as input and return the similiar array.
+        total_size : int
+            Total size
+        natoms : int
+            The number of atoms
+        **kwargs
+            If 2D np.ndarray, assume the first axis is batch; otherwise do nothing.
+        """
+        def execute_with_batch_size(batch_size: int, start_index: int) -> Tuple[int, Tuple[np.ndarray]]:
+            end_index = start_index + batch_size
+            end_index = min(end_index, total_size)
+            return (end_index - start_index), callable(
+                *[(vv[start_index:end_index] if isinstance(vv, np.ndarray) and vv.ndim > 1 else vv) for vv in args],
+                **{kk: (vv[start_index:end_index] if isinstance(vv, np.ndarray) and vv.ndim > 1 else vv) for kk, vv in kwargs.items()},
+            )
+
+        index = 0
+        results = []
+        while index < total_size:
+            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
+            if not isinstance(result, tuple):
+                result = (result,)
+            index += n_batch
+            if n_batch:
+                for rr in result:
+                    rr.reshape((n_batch, -1))
+                results.append(result)
+        
+        r = tuple([np.concatenate(r, axis=0) for r in zip(*results)])
+        if len(r) == 1:
+            # avoid returning tuple if callable doesn't return tuple
+            r = r[0]
+        return r