network.py


import numpy as np

from deepmd.env import tf
from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
from deepmd.env import op_module

from deepmd.nvnmd.utils.config import nvnmd_cfg
from deepmd.nvnmd.utils.weight import get_constant_initializer
from deepmd.utils.network import variable_summaries


def get_sess():
    init_op = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init_op)
    return sess


def matmul2_qq(a, b, nbit):
    r"""Quantized matmul operation for 2d tensor.
    a and b is input tensor, nbit represent quantification precision
    """
    sh_a = a.get_shape().as_list()
    sh_b = b.get_shape().as_list()
    a = tf.reshape(a, [-1, 1, sh_a[1]])
    b = tf.reshape(tf.transpose(b), [1, sh_b[1], sh_b[0]])
    y = a * b
    y = qf(y, nbit)
    y = tf.reduce_sum(y, axis=2)
    return y


def matmul3_qq(a, b, nbit):
    r"""Quantized matmul operation for 3d tensor.
    a and b is input tensor, nbit represent quantification precision
    """
    sh_a = a.get_shape().as_list()
    sh_b = b.get_shape().as_list()
    a = tf.reshape(a, [-1, sh_a[1], 1, sh_a[2]])
    b = tf.reshape(tf.transpose(b, [0, 2, 1]), [-1, 1, sh_b[2], sh_b[1]])
    y = a * b
    if nbit == -1:
        y = y
    else:
        y = qf(y, nbit)
    y = tf.reduce_sum(y, axis=3)
    return y


def qf(x, nbit):
    r"""Quantize and floor tensor `x` with quantification precision `nbit`.
    """
    prec = 2**nbit

    y = tf.floor(x * prec) / prec
    y = x + tf.stop_gradient(y - x)
    return y


def qr(x, nbit):
    r"""Quantize and round tensor `x` with quantification precision `nbit`.
    """
    prec = 2**nbit

    y = tf.round(x * prec) / prec
    y = x + tf.stop_gradient(y - x)
    return y


# fitting_net
def tanh2(x, nbit=-1, nbit2=-1):
    r"""User-defined activation function tanh2

    Parameter
    ---------
    x
        input tensor
    nbit
        quantification precision for forward calculation
    nbit2
        quantification precision for backward calculation
    """
    y = op_module.tanh2_nvnmd(x, 0, nbit, nbit2, -1)
    return y


def tanh4(x, nbit=-1, nbit2=-1):
    r"""User-defined activation function tanh4

    Parameter
    ---------
    x
        input tensor
    nbit
        quantification precision for forward calculation
    nbit2
        quantification precision for backward calculation
    """
    y = op_module.tanh4_nvnmd(x, 0, nbit, nbit2, -1)
    return y


def one_layer_wb(
    shape,
    outputs_size,
    bavg,
    stddev,
    precision,
    trainable,
    initial_variables,
    seed,
    uniform_seed,
    name
):
    if nvnmd_cfg.restore_fitting_net:
        # initializer
        w_initializer = get_constant_initializer(nvnmd_cfg.weight, 'matrix')
        b_initializer = get_constant_initializer(nvnmd_cfg.weight, 'bias')
    else:
        w_initializer = tf.random_normal_initializer(
            stddev=stddev / np.sqrt(shape[1] + outputs_size),
            seed=seed if (seed is None or uniform_seed) else seed + 0)
        b_initializer = tf.random_normal_initializer(
            stddev=stddev,
            mean=bavg,
            seed=seed if (seed is None or uniform_seed) else seed + 1)
        if initial_variables is not None:
            w_initializer = tf.constant_initializer(initial_variables[name + '/matrix'])
            b_initializer = tf.constant_initializer(initial_variables[name + '/bias'])
    # variable
    w = tf.get_variable('matrix',
                        [shape[1], outputs_size],
                        precision,
                        w_initializer,
                        trainable=trainable)
    variable_summaries(w, 'matrix')
    b = tf.get_variable('bias',
                        [outputs_size],
                        precision,
                        b_initializer,
                        trainable=trainable)
    variable_summaries(b, 'bias')

    return w, b


def one_layer(inputs,
              outputs_size,
              activation_fn=tf.nn.tanh,
              precision=GLOBAL_TF_FLOAT_PRECISION,
              stddev=1.0,
              bavg=0.0,
              name='linear',
              reuse=None,
              seed=None,
              use_timestep=False,
              trainable=True,
              useBN=False,
              uniform_seed=False,
              initial_variables=None,
              mixed_prec=None,
              final_layer=False):
    r"""Build one layer with continuous or quantized value.
    Its weight and bias can be initialed with random or constant value.
    """
    if activation_fn is not None:
        activation_fn = tanh4
    with tf.variable_scope(name, reuse=reuse):
        shape = inputs.get_shape().as_list()
        w, b = one_layer_wb(shape, outputs_size, bavg, stddev, precision, trainable, initial_variables, seed, uniform_seed, name)
        if nvnmd_cfg.quantize_fitting_net:
            NBIT_DATA_FL = nvnmd_cfg.nbit['NBIT_DATA_FL']
            NBIT_WEIGHT_FL = nvnmd_cfg.nbit['NBIT_WEIGHT_FL']
            #
            inputs = qf(inputs, NBIT_DATA_FL)
            w = qr(w, NBIT_WEIGHT_FL)
            with tf.variable_scope('wx', reuse=reuse):
                wx = op_module.matmul_nvnmd(inputs, w, 0, NBIT_DATA_FL, NBIT_DATA_FL, -1)
            #
            b = qr(b, NBIT_DATA_FL)
            with tf.variable_scope('wxb', reuse=reuse):
                hidden = wx + b
            #
            with tf.variable_scope('actfun', reuse=reuse):
                if activation_fn is not None:
                    y = activation_fn(hidden, NBIT_DATA_FL, NBIT_DATA_FL)
                else:
                    y = hidden
        else:
            hidden = tf.matmul(inputs, w) + b
            y = activation_fn(hidden, -1, -1) if (activation_fn is not None) else hidden
    # 'reshape' is necessary
    # the next layer needs shape of input tensor to build weight
    y = tf.reshape(y, [-1, outputs_size])
    return y