Initial commit

17d17806 · yongshk · 17d17806 · 17d17806 · 17d17806 · 17d17806
Commit 17d17806 authored Apr 24, 2023 by yongshk
18 changed files
--- a/tensorlayer/__pycache__/ops.cpython-36.pyc
+++ b/tensorlayer/__pycache__/ops.cpython-36.pyc
--- a/tensorlayer/__pycache__/prepro.cpython-36.pyc
+++ b/tensorlayer/__pycache__/prepro.cpython-36.pyc
--- a/tensorlayer/__pycache__/rein.cpython-36.pyc
+++ b/tensorlayer/__pycache__/rein.cpython-36.pyc
--- a/tensorlayer/__pycache__/utils.cpython-36.pyc
+++ b/tensorlayer/__pycache__/utils.cpython-36.pyc
--- a/tensorlayer/__pycache__/visualize.cpython-36.pyc
+++ b/tensorlayer/__pycache__/visualize.cpython-36.pyc
--- a/tensorlayer/activation.py
+++ b/tensorlayer/activation.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+
+def identity(x, name=None):
+    """The identity activation function, Shortcut is ``linear``.
+
+    Parameters
+    ----------
+    x : a tensor input
+        input(s)
+
+
+    Returns
+    --------
+    A `Tensor` with the same type as `x`.
+    """
+    return x
+
+# Shortcut
+linear = identity
+
+def ramp(x=None, v_min=0, v_max=1, name=None):
+    """The ramp activation function.
+
+    Parameters
+    ----------
+    x : a tensor input
+        input(s)
+    v_min : float
+        if input(s) smaller than v_min, change inputs to v_min
+    v_max : float
+        if input(s) greater than v_max, change inputs to v_max
+    name : a string or None
+        An optional name to attach to this activation function.
+
+
+    Returns
+    --------
+    A `Tensor` with the same type as `x`.
+    """
+    return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name)
+
+def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"):
+    """The LeakyReLU, Shortcut is ``lrelu``.
+
+    Modified version of ReLU, introducing a nonzero gradient for negative
+    input.
+
+    Parameters
+    ----------
+    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
+        `int16`, or `int8`.
+    alpha : `float`. slope.
+    name : a string or None
+        An optional name to attach to this activation function.
+
+    Examples
+    ---------
+    >>> network = tl.layers.DenseLayer(network, n_units=100, name = 'dense_lrelu',
+    ...                 act= lambda x : tl.act.lrelu(x, 0.2))
+
+    References
+    ------------
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) <http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf>`_
+    """
+    with tf.name_scope(name) as scope:
+        # x = tf.nn.relu(x)
+        # m_x = tf.nn.relu(-x)
+        # x -= alpha * m_x
+        x = tf.maximum(x, alpha * x)
+    return x
+
+#Shortcut
+lrelu = leaky_relu
+
+def pixel_wise_softmax(output, name='pixel_wise_softmax'):
+    """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1.
+    Usually be used for image segmentation.
+
+    Parameters
+    ------------
+    output : tensor
+        - For 2d image, 4D tensor [batch_size, height, weight, channel], channel >= 2.
+        - For 3d image, 5D tensor [batch_size, depth, height, weight, channel], channel >= 2.
+
+    Examples
+    ---------
+    >>> outputs = pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
+
+    References
+    -----------
+    - `tf.reverse <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#reverse>`_
+    """
+    with tf.name_scope(name) as scope:
+        return tf.nn.softmax(output)
+        ## old implementation
+        # exp_map = tf.exp(output)
+        # if output.get_shape().ndims == 4:   # 2d image
+        #     evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, True]))
+        # elif output.get_shape().ndims == 5: # 3d image
+        #     evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, False, True]))
+        # else:
+        #     raise Exception("output parameters should be 2d or 3d image, not %s" % str(output._shape))
+        # return tf.div(exp_map, evidence)
--- a/tensorlayer/cost.py
+++ b/tensorlayer/cost.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+import numbers
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import standard_ops
+
+## Cost Functions
+
+def cross_entropy(output, target, name=None):
+    """It is a softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy of two distributions, implement
+    softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
+
+    Parameters
+    ----------
+    output : Tensorflow variable
+        A distribution with shape: [batch_size, n_feature].
+    target : Tensorflow variable
+        A batch of index with shape: [batch_size, ].
+    name : string
+        Name of this loss.
+
+    Examples
+    --------
+    >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits, 'my_loss')
+
+    References
+    -----------
+    - About cross-entropy: `wiki <https://en.wikipedia.org/wiki/Cross_entropy>`_.\n
+    - The code is borrowed from: `here <https://en.wikipedia.org/wiki/Cross_entropy>`_.
+    """
+    try: # old
+        return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, targets=target))
+    except: # TF 1.0
+        assert name is not None, "Please give a unique name to tl.cost.cross_entropy for TF1.0+"
+        return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output, name=name))
+
+def sigmoid_cross_entropy(output, target, name=None):
+    """It is a sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``.
+    """
+    try: # TF 1.0
+        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output, name=name))
+    except:
+        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, targets=target))
+
+
+def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
+    """Computes binary cross entropy given `output`.
+
+    For brevity, let `x = output`, `z = target`.  The binary cross entropy loss is
+
+        loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
+
+    Parameters
+    ----------
+    output : tensor of type `float32` or `float64`.
+    target : tensor of the same type and shape as `output`.
+    epsilon : float
+        A small value to avoid output is zero.
+    name : string
+        An optional name to attach to this layer.
+
+    References
+    -----------
+    - `DRAW <https://github.com/ericjang/draw/blob/master/draw.py#L73>`_
+    """
+#     from tensorflow.python.framework import ops
+#     with ops.op_scope([output, target], name, "bce_loss") as name:
+#         output = ops.convert_to_tensor(output, name="preds")
+#         target = ops.convert_to_tensor(targets, name="target")
+    with tf.name_scope(name):
+        return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) +
+                              (1. - target) * tf.log(1. - output + epsilon)), axis=1))
+
+
+def mean_squared_error(output, target, is_mean=False):
+    """Return the TensorFlow expression of mean-squre-error of two distributions.
+
+    Parameters
+    ----------
+    output : 2D or 4D tensor.
+    target : 2D or 4D tensor.
+    is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default).
+
+    References
+    ------------
+    - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`_
+    """
+    with tf.name_scope("mean_squared_error_loss"):
+        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
+            if is_mean:
+                mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1))
+            else:
+                mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1))
+        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
+            if is_mean:
+                mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3]))
+            else:
+                mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2, 3]))
+        return mse
+
+def normalized_mean_square_error(output, target):
+    """Return the TensorFlow expression of normalized mean-squre-error of two distributions.
+
+    Parameters
+    ----------
+    output : 2D or 4D tensor.
+    target : 2D or 4D tensor.
+    """
+    with tf.name_scope("mean_squared_error_loss"):
+        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
+            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=1))
+            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=1))
+        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
+            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2,3]))
+            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2,3]))
+        nmse = tf.reduce_mean(nmse_a / nmse_b)
+    return nmse
+
+
+def dice_coe(output, target, epsilon=1e-10):
+    """Sørensen–Dice coefficient for comparing the similarity of two distributions,
+    usually be used for binary image segmentation i.e. labels are binary.
+    The coefficient = [0, 1], 1 if totally match.
+
+    Parameters
+    -----------
+    output : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    target : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    epsilon : float
+        An optional name to attach to this layer.
+
+    Examples
+    ---------
+    >>> outputs = tl.act.pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_, epsilon=1e-5)
+
+    References
+    -----------
+    - `wiki-dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
+    """
+    # inse = tf.reduce_sum( tf.mul(output, target) )
+    # l = tf.reduce_sum( tf.mul(output, output) )
+    # r = tf.reduce_sum( tf.mul(target, target) )
+    inse = tf.reduce_sum( output * target )
+    l = tf.reduce_sum( output * output )
+    r = tf.reduce_sum( target * target )
+    dice = 2 * (inse) / (l + r)
+    if epsilon == 0:
+        return dice
+    else:
+        return tf.clip_by_value(dice, 0, 1.0-epsilon)
+
+
+def dice_hard_coe(output, target, epsilon=1e-10):
+    """Non-differentiable Sørensen–Dice coefficient for comparing the similarity of two distributions,
+    usually be used for binary image segmentation i.e. labels are binary.
+    The coefficient = [0, 1], 1 if totally match.
+
+    Parameters
+    -----------
+    output : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    target : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    epsilon : float
+        An optional name to attach to this layer.
+
+    Examples
+    ---------
+    >>> outputs = pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
+
+    References
+    -----------
+    - `wiki-dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
+    """
+    output = tf.cast(output > 0.5, dtype=tf.float32)
+    target = tf.cast(target > 0.5, dtype=tf.float32)
+    inse = tf.reduce_sum( output * target )
+    l = tf.reduce_sum( output * output )
+    r = tf.reduce_sum( target * target )
+    dice = 2 * (inse) / (l + r)
+    if epsilon == 0:
+        return dice
+    else:
+        return tf.clip_by_value(dice, 0, 1.0-epsilon)
+
+def iou_coe(output, target, threshold=0.5, epsilon=1e-10):
+    """Non-differentiable Intersection over Union, usually be used for evaluating binary image segmentation.
+    The coefficient = [0, 1], 1 means totally match.
+
+    Parameters
+    -----------
+    output : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    target : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    threshold : float
+        The threshold value to be true.
+    epsilon : float
+        A small value to avoid zero denominator when both output and target output nothing.
+
+    Examples
+    ---------
+    >>> outputs = tl.act.pixel_wise_softmax(network.outputs)
+    >>> iou = tl.cost.iou_coe(outputs[:,:,:,0], y_[:,:,:,0])
+
+    Notes
+    ------
+    - IOU cannot be used as training loss, people usually use dice coefficient for training, and IOU for evaluating.
+    """
+    pre = tf.cast(output > threshold, dtype=tf.float32)
+    truth = tf.cast(target > threshold, dtype=tf.float32)
+    intersection = tf.reduce_sum(pre * truth)
+    union = tf.reduce_sum(tf.cast((pre + truth) > threshold, dtype=tf.float32))
+    return tf.reduce_sum(intersection) / (tf.reduce_sum(union) + epsilon)
+
+
+def cross_entropy_seq(logits, target_seqs, batch_size=None):#, batch_size=1, num_steps=None):
+    """Returns the expression of cross-entropy of two sequences, implement
+    softmax internally. Normally be used for Fixed Length RNN outputs.
+
+    Parameters
+    ----------
+    logits : Tensorflow variable
+        2D tensor, ``network.outputs``, [batch_size*n_steps (n_examples), number of output units]
+    target_seqs : Tensorflow variable
+        target : 2D tensor [batch_size, n_steps], if the number of step is dynamic, please use ``cross_entropy_seq_with_mask`` instead.
+    batch_size : None or int.
+        If not None, the return cost will be divided by batch_size.
+
+    Examples
+    --------
+    >>> see PTB tutorial for more details
+    >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
+    >>> targets = tf.placeholder(tf.int32, [batch_size, num_steps])
+    >>> cost = tl.cost.cross_entropy_seq(network.outputs, targets)
+    """
+    try: # TF 1.0
+        sequence_loss_by_example_fn = tf.contrib.legacy_seq2seq.sequence_loss_by_example
+    except:
+        sequence_loss_by_example_fn = tf.nn.seq2seq.sequence_loss_by_example
+
+    loss = sequence_loss_by_example_fn(
+        [logits],
+        [tf.reshape(target_seqs, [-1])],
+        [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)])
+        # [tf.ones([batch_size * num_steps])])
+    cost = tf.reduce_sum(loss) #/ batch_size
+    if batch_size is not None:
+        cost = cost / batch_size
+    return cost
+
+
+def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False, name=None):
+    """Returns the expression of cross-entropy of two sequences, implement
+    softmax internally. Normally be used for Dynamic RNN outputs.
+
+    Parameters
+    -----------
+    logits : network identity outputs
+        2D tensor, ``network.outputs``, [batch_size, number of output units].
+    target_seqs : int of tensor, like word ID.
+        [batch_size, ?]
+    input_mask : the mask to compute loss
+        The same size with target_seqs, normally 0 and 1.
+    return_details : boolean
+        - If False (default), only returns the loss.
+        - If True, returns the loss, losses, weights and targets (reshape to one vetcor).
+
+    Examples
+    --------
+    - see Image Captioning Example.
+    """
+    targets = tf.reshape(target_seqs, [-1])   # to one vector
+    weights = tf.to_float(tf.reshape(input_mask, [-1]))   # to one vector like targets
+    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name) * weights
+    #losses = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name)) # for TF1.0 and others
+
+    try: ## TF1.0
+        loss = tf.divide(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
+                        tf.reduce_sum(weights),
+                        name="seq_loss_with_mask")
+    except: ## TF0.12
+        loss = tf.div(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
+                        tf.reduce_sum(weights),
+                        name="seq_loss_with_mask")
+    if return_details:
+        return loss, losses, weights, targets
+    else:
+        return loss
+
+
+def cosine_similarity(v1, v2):
+    """Cosine similarity [-1, 1], `wiki <https://en.wikipedia.org/wiki/Cosine_similarity>`_.
+
+    Parameters
+    -----------
+    v1, v2 : tensor of [batch_size, n_feature], with the same number of features.
+
+    Returns
+    -----------
+    a tensor of [batch_size, ]
+    """
+    try: ## TF1.0
+        cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1)))
+    except: ## TF0.12
+        cost = tf.reduce_sum(tf.mul(v1, v2), reduction_indices=1) / (tf.sqrt(tf.reduce_sum(tf.mul(v1, v1), reduction_indices=1)) * tf.sqrt(tf.reduce_sum(tf.mul(v2, v2), reduction_indices=1)))
+    return cost
+
+
+## Regularization Functions
+def li_regularizer(scale, scope=None):
+  """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n
+  Returns a function that can be used to apply group li regularization to weights.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+  scope: An optional scope name for TF12+.
+
+  Returns
+  --------
+  A function with signature `li(weights, name=None)` that apply Li regularization.
+
+  Raises
+  ------
+  ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+  # from tensorflow.python.platform import tf_logging as logging
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    if scale >= 1.:
+      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+                       scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def li(weights, name=None):
+    """Applies li regularization to weights."""
+    with tf.name_scope('li_regularizer') as scope:
+        my_scale = ops.convert_to_tensor(scale,
+                                           dtype=weights.dtype.base_dtype,
+                                           name='scale')
+        if tf.__version__ <= '0.12':
+            standard_ops_fn = standard_ops.mul
+        else:
+            standard_ops_fn = standard_ops.multiply
+            return standard_ops_fn(
+              my_scale,
+              standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))),
+              name=scope)
+  return li
+
+
+
+def lo_regularizer(scale, scope=None):
+  """lo regularization removes the neurons of current layer, `o` represents `outputs`\n
+  Returns a function that can be used to apply group lo regularization to weights.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+  scope: An optional scope name for TF12+.
+
+  Returns
+  -------
+  A function with signature `lo(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  ------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+  # from tensorflow.python.platform import tf_logging as logging
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    if scale >= 1.:
+      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+                       scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def lo(weights, name='lo_regularizer'):
+    """Applies group column regularization to weights."""
+    with tf.name_scope(name) as scope:
+        my_scale = ops.convert_to_tensor(scale,
+                                       dtype=weights.dtype.base_dtype,
+                                       name='scale')
+        if tf.__version__ <= '0.12':
+            standard_ops_fn = standard_ops.mul
+        else:
+            standard_ops_fn = standard_ops.multiply
+        return standard_ops_fn(
+          my_scale,
+          standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))),
+          name=scope)
+  return lo
+
+def maxnorm_regularizer(scale=1.0, scope=None):
+  """Max-norm regularization returns a function that can be used
+  to apply max-norm regularization to weights.
+  About max-norm: `wiki <https://en.wikipedia.org/wiki/Matrix_norm#Max_norm>`_.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+  scope: An optional scope name.
+
+  Returns
+  ---------
+  A function with signature `mn(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  --------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    # if scale >= 1.:
+    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+    #                    scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def mn(weights, name='max_regularizer'):
+    """Applies max-norm regularization to weights."""
+    with tf.name_scope(name) as scope:
+          my_scale = ops.convert_to_tensor(scale,
+                                           dtype=weights.dtype.base_dtype,
+                                           name='scale')
+          if tf.__version__ <= '0.12':
+              standard_ops_fn = standard_ops.mul
+          else:
+              standard_ops_fn = standard_ops.multiply
+          return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope)
+  return mn
+
+def maxnorm_o_regularizer(scale, scope):
+  """Max-norm output regularization removes the neurons of current layer.\n
+  Returns a function that can be used to apply max-norm regularization to each column of weight matrix.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+  scope: An optional scope name.
+
+  Returns
+  ---------
+  A function with signature `mn_o(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  ---------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    # if scale >= 1.:
+    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+    #                    scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def mn_o(weights, name='maxnorm_o_regularizer'):
+     """Applies max-norm regularization to weights."""
+     with tf.name_scope(name) as scope:
+          my_scale = ops.convert_to_tensor(scale,
+                                           dtype=weights.dtype.base_dtype,
+                                                   name='scale')
+          if tf.__version__ <= '0.12':
+             standard_ops_fn = standard_ops.mul
+          else:
+             standard_ops_fn = standard_ops.multiply
+          return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope)
+  return mn_o
+
+def maxnorm_i_regularizer(scale, scope=None):
+  """Max-norm input regularization removes the neurons of previous layer.\n
+  Returns a function that can be used to apply max-norm regularization to each row of weight matrix.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+  scope: An optional scope name.
+
+  Returns
+  ---------
+  A function with signature `mn_i(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  ---------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    # if scale >= 1.:
+    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+    #                    scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def mn_i(weights, name='maxnorm_i_regularizer'):
+     """Applies max-norm regularization to weights."""
+     with tf.name_scope(name) as scope:
+          my_scale = ops.convert_to_tensor(scale,
+                                           dtype=weights.dtype.base_dtype,
+                                                   name='scale')
+          if tf.__version__ <= '0.12':
+             standard_ops_fn = standard_ops.mul
+          else:
+             standard_ops_fn = standard_ops.multiply
+          return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
+  return mn_i
+
+
+
+
+
+#
--- a/tensorlayer/db.py
+++ b/tensorlayer/db.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+"""
+Experimental Database Management System.
+
+Latest Version
+"""
+
+
+import tensorflow as tf
+import tensorlayer as tl
+import numpy as np
+import time
+import math
+
+
+import uuid
+
+import pymongo
+import gridfs
+import pickle
+from pymongo import MongoClient
+from datetime import datetime
+
+import inspect
+
+def AutoFill(func):
+    def func_wrapper(self,*args,**kwargs):
+        d=inspect.getcallargs(func,self,*args,**kwargs)
+        d['args'].update({"studyID":self.studyID})
+        return  func(**d)
+    return func_wrapper
+
+
+
+
+
+
+class TensorDB(object):
+    """TensorDB is a MongoDB based manager that help you to manage data, network topology, parameters and logging.
+
+    Parameters
+    -------------
+    ip : string, localhost or IP address.
+    port : int, port number.
+    db_name : string, database name.
+    user_name : string, set to None if it donnot need authentication.
+    password : string.
+
+    Properties
+    ------------
+    db : ``pymongo.MongoClient[db_name]``, xxxxxx
+    datafs : ``gridfs.GridFS(self.db, collection="datafs")``, xxxxxxxxxx
+    modelfs : ``gridfs.GridFS(self.db, collection="modelfs")``,
+    paramsfs : ``gridfs.GridFS(self.db, collection="paramsfs")``,
+    db.Params : Collection for
+    db.TrainLog : Collection for
+    db.ValidLog : Collection for
+    db.TestLog : Collection for
+    studyID : string, unique ID, if None random generate one.
+
+    Dependencies
+    -------------
+    1 : MongoDB, as TensorDB is based on MongoDB, you need to install it in your
+       local machine or remote machine.
+    2 : pip install pymongo, for MongoDB python API.
+
+    Optional Tools
+    ----------------
+    1 : You may like to install MongoChef or Mongo Management Studo APP for
+       visualizing or testing your MongoDB.
+    """
+    def __init__(
+        self,
+        ip = 'localhost',
+        port = 27017,
+        db_name = 'db_name',
+        user_name = None,
+        password = 'password',
+        studyID=None
+    ):
+        ## connect mongodb
+        client = MongoClient(ip, port)
+        self.db = client[db_name]
+        if user_name != None:
+            self.db.authenticate(user_name, password)
+
+
+        if studyID is None:
+            self.studyID=str(uuid.uuid1())
+        else:
+            self.studyID=studyID
+
+        ## define file system (Buckets)
+        self.datafs = gridfs.GridFS(self.db, collection="datafs")
+        self.modelfs = gridfs.GridFS(self.db, collection="modelfs")
+        self.paramsfs = gridfs.GridFS(self.db, collection="paramsfs")
+        self.archfs=gridfs.GridFS(self.db,collection="ModelArchitecture")
+        ##
+        print("[TensorDB] Connect SUCCESS {}:{} {} {} {}".format(ip, port, db_name, user_name, studyID))
+
+        self.ip = ip
+        self.port = port
+        self.db_name = db_name
+        self.user_name = user_name
+
+    def __autofill(self,args):
+        return args.update({'studyID':self.studyID})
+
+    def __serialization(self,ps):
+        return pickle.dumps(ps, protocol=2)
+
+    def __deserialization(self,ps):
+        return pickle.loads(ps)
+
+    def save_params(self, params=[], args={}):#, file_name='parameters'):
+        """ Save parameters into MongoDB Buckets, and save the file ID into Params Collections.
+
+        Parameters
+        ----------
+        params : a list of parameters
+        args : dictionary, item meta data.
+
+        Returns
+        ---------
+        f_id : the Buckets ID of the parameters.
+        """
+        self.__autofill(args)
+        s = time.time()
+        f_id = self.paramsfs.put(self.__serialization(params))#, file_name=file_name)
+        args.update({'f_id': f_id, 'time': datetime.utcnow()})
+        self.db.Params.insert_one(args)
+        # print("[TensorDB] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2)))
+        print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time()-s, 2)))
+        return f_id
+
+    @AutoFill
+    def find_one_params(self, args={},sort=None):
+        """ Find one parameter from MongoDB Buckets.
+
+        Parameters
+        ----------
+        args : dictionary, find items.
+
+        Returns
+        --------
+        params : the parameters, return False if nothing found.
+        f_id : the Buckets ID of the parameters, return False if nothing found.
+        """
+
+        s = time.time()
+        # print(args)
+        d = self.db.Params.find_one(filter=args,sort=sort)
+
+        if d is not None:
+            f_id = d['f_id']
+        else:
+            print("[TensorDB] FAIL! Cannot find: {}".format(args))
+            return False, False
+        try:
+            params = self.__deserialization(self.paramsfs.get(f_id).read())
+            print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time()-s, 2)))
+            return params, f_id
+        except:
+            return False, False
+
+    @AutoFill
+    def find_all_params(self, args={}):
+        """ Find all parameter from MongoDB Buckets
+
+        Parameters
+        ----------
+        args : dictionary, find items
+
+        Returns
+        --------
+        params : the parameters, return False if nothing found.
+
+        """
+
+        s = time.time()
+        pc = self.db.Params.find(args)
+
+        if pc is not None:
+            f_id_list = pc.distinct('f_id')
+            params = []
+            for f_id in f_id_list: # you may have multiple Buckets files
+                tmp = self.paramsfs.get(f_id).read()
+                params.append(self.__deserialization(tmp))
+        else:
+            print("[TensorDB] FAIL! Cannot find any: {}".format(args))
+            return False
+
+        print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time()-s, 2)))
+        return params
+
+    @AutoFill
+    def del_params(self, args={}):
+        """ Delete params in MongoDB uckets.
+
+        Parameters
+        -----------
+        args : dictionary, find items to delete, leave it empty to delete all parameters.
+        """
+
+        pc = self.db.Params.find(args)
+        f_id_list = pc.distinct('f_id')
+        # remove from Buckets
+        for f in f_id_list:
+            self.paramsfs.delete(f)
+        # remove from Collections
+        self.db.Params.remove(args)
+
+        print("[TensorDB] Delete params SUCCESS: {}".format(args))
+
+    def _print_dict(self, args):
+        # return " / ".join(str(key) + ": "+ str(value) for key, value in args.items())
+
+        string = ''
+        for key, value in args.items():
+            if key is not '_id':
+                string += str(key) + ": "+ str(value) + " / "
+        return string
+
+    ## =========================== LOG =================================== ##
+    @AutoFill
+    def train_log(self, args={}):
+        """Save the training log.
+
+        Parameters
+        -----------
+        args : dictionary, items to save.
+
+        Examples
+        ---------
+        >>> db.train_log(time=time.time(), {'loss': loss, 'acc': acc})
+        """
+
+        _result = self.db.TrainLog.insert_one(args)
+        _log = self._print_dict(args)
+        #print("[TensorDB] TrainLog: " +_log)
+        return _result
+
+    @AutoFill
+    def del_train_log(self, args={}):
+        """ Delete train log.
+
+        Parameters
+        -----------
+        args : dictionary, find items to delete, leave it empty to delete all log.
+        """
+
+        self.db.TrainLog.delete_many(args)
+        print("[TensorDB] Delete TrainLog SUCCESS")
+
+    @AutoFill
+    def valid_log(self, args={}):
+        """Save the validating log.
+
+        Parameters
+        -----------
+        args : dictionary, items to save.
+
+        Examples
+        ---------
+        >>> db.valid_log(time=time.time(), {'loss': loss, 'acc': acc})
+        """
+
+        _result = self.db.ValidLog.insert_one(args)
+        # _log = "".join(str(key) + ": " + str(value) for key, value in args.items())
+        _log = self._print_dict(args)
+        print("[TensorDB] ValidLog: " +_log)
+        return _result
+
+    @AutoFill
+    def del_valid_log(self, args={}):
+        """ Delete validation log.
+
+        Parameters
+        -----------
+        args : dictionary, find items to delete, leave it empty to delete all log.
+        """
+        self.db.ValidLog.delete_many(args)
+        print("[TensorDB] Delete ValidLog SUCCESS")
+
+    @AutoFill
+    def test_log(self, args={}):
+        """Save the testing log.
+
+        Parameters
+        -----------
+        args : dictionary, items to save.
+
+        Examples
+        ---------
+        >>> db.test_log(time=time.time(), {'loss': loss, 'acc': acc})
+        """
+
+        _result = self.db.TestLog.insert_one(args)
+        # _log = "".join(str(key) + str(value) for key, value in args.items())
+        _log = self._print_dict(args)
+        print("[TensorDB] TestLog: " +_log)
+        return _result
+
+    @AutoFill
+    def del_test_log(self, args={}):
+        """ Delete test log.
+
+        Parameters
+        -----------
+        args : dictionary, find items to delete, leave it empty to delete all log.
+        """
+
+        self.db.TestLog.delete_many(args)
+        print("[TensorDB] Delete TestLog SUCCESS")
+
+    ## =========================== Network Architecture ================== ##
+    @AutoFill
+    def save_model_architecture(self,s,args={}):
+        self.__autofill(args)
+        fid=self.archfs.put(s,filename="modelarchitecture")
+        args.update({"fid":fid})
+        self.db.march.insert_one(args)
+
+    @AutoFill
+    def load_model_architecture(self,args={}):
+
+        d = self.db.march.find_one(args)
+        if d is not None:
+            fid = d['fid']
+            print(d)
+            print(fid)
+            # "print find"
+        else:
+            print("[TensorDB] FAIL! Cannot find: {}".format(args))
+            print ("no idtem")
+            return False, False
+        try:
+            archs = self.archfs.get(fid).read()
+            '''print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time()-s, 2)))'''
+            return archs, fid
+        except Exception as e:
+            print("exception")
+            print(e)
+            return False, False
+
+    @AutoFill
+    def save_job(self, script=None, args={}):
+        """Save the job.
+
+        Parameters
+        -----------
+        script : a script file name or None.
+        args : dictionary, items to save.
+
+        Examples
+        ---------
+        >>> # Save your job
+        >>> db.save_job('your_script.py', {'job_id': 1, 'learning_rate': 0.01, 'n_units': 100})
+        >>> # Run your job
+        >>> temp = db.find_one_job(args={'job_id': 1})
+        >>> print(temp['learning_rate'])
+        ... 0.01
+        >>> import _your_script
+        ... running your script
+        """
+        self.__autofill(args)
+        if script is not None:
+            _script = open(script, 'rb').read()
+            args.update({'script': _script, 'script_name': script})
+        # _result = self.db.Job.insert_one(args)
+        _result = self.db.Job.replace_one(args, args, upsert=True)
+        _log = self._print_dict(args)
+        print("[TensorDB] Save Job: script={}, args={}".format(script, args))
+        return _result
+
+    @AutoFill
+    def find_one_job(self, args={}):
+        """ Find one job from MongoDB Job Collections.
+
+        Parameters
+        ----------
+        args : dictionary, find items.
+
+        Returns
+        --------
+        dictionary : contains all meta data and script.
+        """
+
+
+        temp = self.db.Job.find_one(args)
+
+        if temp is not None:
+            if 'script_name' in temp.keys():
+                f = open('_' + temp['script_name'], 'wb')
+                f.write(temp['script'])
+                f.close()
+            print("[TensorDB] Find Job: {}".format(args))
+        else:
+            print("[TensorDB] FAIL! Cannot find any: {}".format(args))
+            return False
+
+        return temp
+
+    def push_job(self,margs, wargs,dargs,epoch):
+
+        ms,mid=self.load_model_architecture(margs)
+        weight,wid=self.find_one_params(wargs)
+        args={"weight":wid,"model":mid,"dargs":dargs,"epoch":epoch,"time":datetime.utcnow(),"Running":False}
+        self.__autofill(args)
+        self.db.JOBS.insert_one(args)
+
+    def peek_job(self):
+        args={'Running':False}
+        self.__autofill(args)
+        m=self.db.JOBS.find_one(args)
+        print(m)
+        if m is None:
+            return False
+
+        s=self.paramsfs.get(m['weight']).read()
+        w=self.__deserialization(s)
+
+        ach=self.archfs.get(m['model']).read()
+
+        return m['_id'], ach,w,m["dargs"],m['epoch']
+
+    def run_job(self,jid):
+        self.db.JOBS.find_one_and_update({'_id':jid},{'$set': {'Running': True,"Since":datetime.utcnow()}})
+
+    def del_job(self,jid):
+        self.db.JOBS.find_one_and_update({'_id':jid},{'$set': {'Running': True,"Finished":datetime.utcnow()}})
+
+    def __str__(self):
+        _s = "[TensorDB] Info:\n"
+        _t = _s + "    " + str(self.db)
+        return _t
+
+    # def save_bulk_data(self, data=None, filename='filename'):
+    #     """ Put bulk data into TensorDB.datafs, return file ID.
+    #     When you have a very large data, you may like to save it into GridFS Buckets
+    #     instead of Collections, then when you want to load it, XXXX
+    #
+    #     Parameters
+    #     -----------
+    #     data : serialized data.
+    #     filename : string, GridFS Buckets.
+    #
+    #     References
+    #     -----------
+    #     - MongoDB find, xxxxx
+    #     """
+    #     s = time.time()
+    #     f_id = self.datafs.put(data, filename=filename)
+    #     print("[TensorDB] save_bulk_data: {} took: {}s".format(filename, round(time.time()-s, 2)))
+    #     return f_id
+    #
+    # def save_collection(self, data=None, collect_name='collect_name'):
+    #     """ Insert data into MongoDB Collections, return xx.
+    #
+    #     Parameters
+    #     -----------
+    #     data : serialized data.
+    #     collect_name : string, MongoDB collection name.
+    #
+    #     References
+    #     -----------
+    #     - MongoDB find, xxxxx
+    #     """
+    #     s = time.time()
+    #     rl = self.db[collect_name].insert_many(data)
+    #     print("[TensorDB] save_collection: {} took: {}s".format(collect_name, round(time.time()-s, 2)))
+    #     return rl
+    #
+    # def find(self, args={}, collect_name='collect_name'):
+    #     """ Find data from MongoDB Collections.
+    #
+    #     Parameters
+    #     -----------
+    #     args : dictionary, arguments for finding.
+    #     collect_name : string, MongoDB collection name.
+    #
+    #     References
+    #     -----------
+    #     - MongoDB find, xxxxx
+    #     """
+    #     s = time.time()
+    #
+    #     pc = self.db[collect_name].find(args)  # pymongo.cursor.Cursor object
+    #     flist = pc.distinct('f_id')
+    #     fldict = {}
+    #     for f in flist: # you may have multiple Buckets files
+    #         # fldict[f] = pickle.loads(self.datafs.get(f).read())
+    #         # s2 = time.time()
+    #         tmp = self.datafs.get(f).read()
+    #         # print(time.time()-s2)
+    #         fldict[f] = pickle.loads(tmp)
+    #         # print(time.time()-s2)
+    #         # exit()
+    #     # print(round(time.time()-s, 2))
+    #     data = [fldict[x['f_id']][x['id']] for x in pc]
+    #     data = np.asarray(data)
+    #     print("[TensorDB] find: {} get: {} took: {}s".format(collect_name, pc.count(), round(time.time()-s, 2)))
+    #     return data
+
+
+
+class DBLogger:
+    """ """
+    def __init__(self,db,model):
+        self.db=db
+        self.model=model
+
+    def on_train_begin(self,logs={}):
+        print("start")
+
+    def on_train_end(self,logs={}):
+        print("end")
+
+    def on_epoch_begin(self,epoch,logs={}):
+        self.epoch=epoch
+        self.et=time.time()
+        return
+
+    def on_epoch_end(self, epoch, logs={}):
+        self.et=time.time()-self.et
+        print("ending")
+        print(epoch)
+        logs['epoch']=epoch
+        logs['time']=datetime.utcnow()
+        logs['stepTime']=self.et
+        logs['acc']=np.asscalar(logs['acc'])
+        print(logs)
+
+        w=self.model.Params
+        fid=self.db.save_params(w,logs)
+        logs.update({'params':fid})
+        self.db.valid_log(logs)
+    def on_batch_begin(self, batch,logs={}):
+        self.t=time.time()
+        self.losses = []
+        self.batch=batch
+
+    def on_batch_end(self, batch, logs={}):
+        self.t2=time.time()-self.t
+        logs['acc']=np.asscalar(logs['acc'])
+        #logs['loss']=np.asscalar(logs['loss'])
+        logs['step_time']=self.t2
+        logs['time']=datetime.utcnow()
+        logs['epoch']=self.epoch
+        logs['batch']=self.batch
+        self.db.train_log(logs)
--- a/tensorlayer/files.py
+++ b/tensorlayer/files.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import tensorflow as tf
+import os
+import numpy as np
+import re
+import sys
+import tarfile
+import gzip
+import zipfile
+from . import visualize
+from . import nlp
+import pickle
+from six.moves import urllib
+from six.moves import cPickle
+from six.moves import zip
+from tensorflow.python.platform import gfile
+
+
+## Load dataset functions
+def load_mnist_dataset(shape=(-1,784), path="data/mnist/"):
+    """Automatically download MNIST dataset
+    and return the training, validation and test set with 50000, 10000 and 10000
+    digit images respectively.
+
+    Parameters
+    ----------
+    shape : tuple
+        The shape of digit images, defaults to (-1,784)
+    path : string
+        Path to download data to, defaults to data/mnist/
+
+    Examples
+    --------
+    >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784))
+    >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
+    """
+    # We first define functions for loading MNIST images and labels.
+    # For convenience, they also download the requested files if needed.
+    def load_mnist_images(path, filename):
+        filepath = maybe_download_and_extract(filename, path, 'http://yann.lecun.com/exdb/mnist/')
+
+        print(filepath)
+        # Read the inputs in Yann LeCun's binary format.
+        with gzip.open(filepath, 'rb') as f:
+            data = np.frombuffer(f.read(), np.uint8, offset=16)
+        # The inputs are vectors now, we reshape them to monochrome 2D images,
+        # following the shape convention: (examples, channels, rows, columns)
+        data = data.reshape(shape)
+        # The inputs come as bytes, we convert them to float32 in range [0,1].
+        # (Actually to range [0, 255/256], for compatibility to the version
+        # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
+        return data / np.float32(256)
+
+    def load_mnist_labels(path, filename):
+        filepath = maybe_download_and_extract(filename, path, 'http://yann.lecun.com/exdb/mnist/')
+        # Read the labels in Yann LeCun's binary format.
+        with gzip.open(filepath, 'rb') as f:
+            data = np.frombuffer(f.read(), np.uint8, offset=8)
+        # The labels are vectors of integers now, that's exactly what we want.
+        return data
+
+    # Download and read the training and test set images and labels.
+    print("Load or Download MNIST > {}".format(path))
+    X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz')
+    y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz')
+    X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz')
+    y_test = load_mnist_labels(path, 't10k-labels-idx1-ubyte.gz')
+
+    # We reserve the last 10000 training examples for validation.
+    X_train, X_val = X_train[:-10000], X_train[-10000:]
+    y_train, y_val = y_train[:-10000], y_train[-10000:]
+
+    # We just return all the arrays in order, as expected in main().
+    # (It doesn't matter how we do this as long as we can read them again.)
+    X_train = np.asarray(X_train, dtype=np.float32)
+    y_train = np.asarray(y_train, dtype=np.int32)
+    X_val = np.asarray(X_val, dtype=np.float32)
+    y_val = np.asarray(y_val, dtype=np.int32)
+    X_test = np.asarray(X_test, dtype=np.float32)
+    y_test = np.asarray(y_test, dtype=np.int32)
+    return X_train, y_train, X_val, y_val, X_test, y_test
+
+
+def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data/cifar10/', plotable=False, second=3):
+    """The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with
+    6000 images per class. There are 50000 training images and 10000 test images.
+
+    The dataset is divided into five training batches and one test batch, each with
+    10000 images. The test batch contains exactly 1000 randomly-selected images from
+    each class. The training batches contain the remaining images in random order,
+    but some training batches may contain more images from one class than another.
+    Between them, the training batches contain exactly 5000 images from each class.
+
+    Parameters
+    ----------
+    shape : tupe
+        The shape of digit images: e.g. (-1, 3, 32, 32) , (-1, 32, 32, 3) , (-1, 32*32*3)
+    plotable : True, False
+        Whether to plot some image examples.
+    second : int
+        If ``plotable`` is True, ``second`` is the display time.
+    path : string
+        Path to download data to, defaults to data/cifar10/
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=True)
+
+    Notes
+    ------
+    CIFAR-10 images can only be display without color change under uint8.
+    >>> X_train = np.asarray(X_train, dtype=np.uint8)
+    >>> plt.ion()
+    >>> fig = plt.figure(1232)
+    >>> count = 1
+    >>> for row in range(10):
+    >>>     for col in range(10):
+    >>>         a = fig.add_subplot(10, 10, count)
+    >>>         plt.imshow(X_train[count-1], interpolation='nearest')
+    >>>         plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
+    >>>         plt.gca().yaxis.set_major_locator(plt.NullLocator())
+    >>>         count = count + 1
+    >>> plt.draw()
+    >>> plt.pause(3)
+
+    References
+    ----------
+    - `CIFAR website <https://www.cs.toronto.edu/~kriz/cifar.html>`_
+    - `Data download link <https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz>`_
+    - `Code references <https://teratail.com/questions/28932>`_
+    """
+
+    print("Load or Download cifar10 > {}".format(path))
+
+    #Helper function to unpickle the data
+    def unpickle(file):
+        fp = open(file, 'rb')
+        if sys.version_info.major == 2:
+            data = pickle.load(fp)
+        elif sys.version_info.major == 3:
+            data = pickle.load(fp, encoding='latin-1')
+        fp.close()
+        return data
+
+    filename = 'cifar-10-python.tar.gz'
+    url = 'https://www.cs.toronto.edu/~kriz/'
+    #Download and uncompress file
+    maybe_download_and_extract(filename, path, url, extract=True)
+
+    #Unpickle file and fill in data
+    X_train = None
+    y_train = []
+    for i in range(1,6):
+        data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i)))
+        if i == 1:
+            X_train = data_dic['data']
+        else:
+            X_train = np.vstack((X_train, data_dic['data']))
+        y_train += data_dic['labels']
+
+    test_data_dic = unpickle(os.path.join(path,  'cifar-10-batches-py/', "test_batch"))
+    X_test = test_data_dic['data']
+    y_test = np.array(test_data_dic['labels'])
+
+    if shape == (-1, 3, 32, 32):
+        X_test = X_test.reshape(shape)
+        X_train = X_train.reshape(shape)
+    elif shape == (-1, 32, 32, 3):
+        X_test = X_test.reshape(shape, order='F')
+        X_train = X_train.reshape(shape, order='F')
+        X_test = np.transpose(X_test, (0, 2, 1, 3))
+        X_train = np.transpose(X_train, (0, 2, 1, 3))
+    else:
+        X_test = X_test.reshape(shape)
+        X_train = X_train.reshape(shape)
+
+    y_train = np.array(y_train)
+
+    if plotable == True:
+        print('\nCIFAR-10')
+        import matplotlib.pyplot as plt
+        fig = plt.figure(1)
+
+        print('Shape of a training image: X_train[0]',X_train[0].shape)
+
+        plt.ion()       # interactive mode
+        count = 1
+        for row in range(10):
+            for col in range(10):
+                a = fig.add_subplot(10, 10, count)
+                if shape == (-1, 3, 32, 32):
+                    # plt.imshow(X_train[count-1], interpolation='nearest')
+                    plt.imshow(np.transpose(X_train[count-1], (1, 2, 0)), interpolation='nearest')
+                    # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest')
+                elif shape == (-1, 32, 32, 3):
+                    plt.imshow(X_train[count-1], interpolation='nearest')
+                    # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest')
+                else:
+                    raise Exception("Do not support the given 'shape' to plot the image examples")
+                plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
+                plt.gca().yaxis.set_major_locator(plt.NullLocator())
+                count = count + 1
+        plt.draw()      # interactive mode
+        plt.pause(3)   # interactive mode
+
+        print("X_train:",X_train.shape)
+        print("y_train:",y_train.shape)
+        print("X_test:",X_test.shape)
+        print("y_test:",y_test.shape)
+
+    X_train = np.asarray(X_train, dtype=np.float32)
+    X_test = np.asarray(X_test, dtype=np.float32)
+    y_train = np.asarray(y_train, dtype=np.int32)
+    y_test = np.asarray(y_test, dtype=np.int32)
+
+    return X_train, y_train, X_test, y_test
+
+
+def load_ptb_dataset(path='data/ptb/'):
+    """Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers,
+    including "Empirical Evaluation and Combination of Advanced Language
+    Modeling Techniques", "Recurrent Neural Network Regularization".
+
+    It consists of 929k training words, 73k validation words, and 82k test
+    words. It has 10k words in its vocabulary.
+
+    In "Recurrent Neural Network Regularization", they trained regularized LSTMs
+    of two sizes; these are denoted the medium LSTM and large LSTM. Both LSTMs
+    have two layers and are unrolled for 35 steps. They initialize the hidden
+    states to zero. They then use the final hidden states of the current
+    minibatch as the initial hidden state of the subsequent minibatch
+    (successive minibatches sequentially traverse the training set).
+    The size of each minibatch is 20.
+
+    The medium LSTM has 650 units per layer and its parameters are initialized
+    uniformly in [−0.05, 0.05]. They apply 50% dropout on the non-recurrent
+    connections. They train the LSTM for 39 epochs with a learning rate of 1,
+    and after 6 epochs they decrease it by a factor of 1.2 after each epoch.
+    They clip the norm of the gradients (normalized by minibatch size) at 5.
+
+    The large LSTM has 1500 units per layer and its parameters are initialized
+    uniformly in [−0.04, 0.04]. We apply 65% dropout on the non-recurrent
+    connections. They train the model for 55 epochs with a learning rate of 1;
+    after 14 epochs they start to reduce the learning rate by a factor of 1.15
+    after each epoch. They clip the norm of the gradients (normalized by
+    minibatch size) at 10.
+
+    Parameters
+    ----------
+    path : : string
+        Path to download data to, defaults to data/ptb/
+
+    Returns
+    --------
+    train_data, valid_data, test_data, vocabulary size
+
+    Examples
+    --------
+    >>> train_data, valid_data, test_data, vocab_size = tl.files.load_ptb_dataset()
+
+    Code References
+    ---------------
+    - ``tensorflow.models.rnn.ptb import reader``
+
+    Download Links
+    ---------------
+    - `Manual download <http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz>`_
+    """
+    print("Load or Download Penn TreeBank (PTB) dataset > {}".format(path))
+
+    #Maybe dowload and uncompress tar, or load exsisting files
+    filename = 'simple-examples.tgz'
+    url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/'
+    maybe_download_and_extract(filename, path, url, extract=True)
+
+    data_path = os.path.join(path, 'simple-examples', 'data')
+    train_path = os.path.join(data_path, "ptb.train.txt")
+    valid_path = os.path.join(data_path, "ptb.valid.txt")
+    test_path = os.path.join(data_path, "ptb.test.txt")
+
+    word_to_id = nlp.build_vocab(nlp.read_words(train_path))
+
+    train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id)
+    valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id)
+    test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id)
+    vocabulary = len(word_to_id)
+
+    # print(nlp.read_words(train_path))     # ... 'according', 'to', 'mr.', '<unk>', '<eos>']
+    # print(train_data)                 # ...  214,         5,    23,    1,       2]
+    # print(word_to_id)                 # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '<eos>': 2 ... }
+    # print(vocabulary)                 # 10000
+    # exit()
+    return train_data, valid_data, test_data, vocabulary
+
+
+def load_matt_mahoney_text8_dataset(path='data/mm_test8/'):
+    """Download a text file from Matt Mahoney's website
+    if not present, and make sure it's the right size.
+    Extract the first file enclosed in a zip file as a list of words.
+    This dataset can be used for Word Embedding.
+
+    Parameters
+    ----------
+    path : : string
+        Path to download data to, defaults to data/mm_test8/
+
+    Returns
+    --------
+    word_list : a list
+        a list of string (word).\n
+        e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...]
+
+    Examples
+    --------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> print('Data size', len(words))
+    """
+
+    print("Load or Download matt_mahoney_text8 Dataset> {}".format(path))
+
+    filename = 'text8.zip'
+    url = 'http://mattmahoney.net/dc/'
+    maybe_download_and_extract(filename, path, url, expected_bytes=31344016)
+
+    with zipfile.ZipFile(os.path.join(path, filename)) as f:
+        word_list = f.read(f.namelist()[0]).split()
+
+    return word_list
+
+
+def load_imdb_dataset(path='data/imdb/', nb_words=None, skip_top=0,
+              maxlen=None, test_split=0.2, seed=113,
+              start_char=1, oov_char=2, index_from=3):
+    """Load IMDB dataset
+
+    Parameters
+    ----------
+    path : : string
+        Path to download data to, defaults to data/imdb/
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_imbd_dataset(
+    ...                                 nb_words=20000, test_split=0.2)
+    >>> print('X_train.shape', X_train.shape)
+    ... (20000,)  [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..]
+    >>> print('y_train.shape', y_train.shape)
+    ... (20000,)  [1 0 0 ..., 1 0 1]
+
+    References
+    -----------
+    - `Modified from keras. <https://github.com/fchollet/keras/blob/master/keras/datasets/imdb.py>`_
+    """
+
+    filename = "imdb.pkl"
+    url = 'https://s3.amazonaws.com/text-datasets/'
+    maybe_download_and_extract(filename, path, url)
+
+    if filename.endswith(".gz"):
+        f = gzip.open(os.path.join(path, filename), 'rb')
+    else:
+        f = open(os.path.join(path, filename), 'rb')
+
+    X, labels = cPickle.load(f)
+    f.close()
+
+    np.random.seed(seed)
+    np.random.shuffle(X)
+    np.random.seed(seed)
+    np.random.shuffle(labels)
+
+    if start_char is not None:
+        X = [[start_char] + [w + index_from for w in x] for x in X]
+    elif index_from:
+        X = [[w + index_from for w in x] for x in X]
+
+    if maxlen:
+        new_X = []
+        new_labels = []
+        for x, y in zip(X, labels):
+            if len(x) < maxlen:
+                new_X.append(x)
+                new_labels.append(y)
+        X = new_X
+        labels = new_labels
+    if not X:
+        raise Exception('After filtering for sequences shorter than maxlen=' +
+                        str(maxlen) + ', no sequence was kept. '
+                        'Increase maxlen.')
+    if not nb_words:
+        nb_words = max([max(x) for x in X])
+
+    # by convention, use 2 as OOV word
+    # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
+    if oov_char is not None:
+        X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
+    else:
+        nX = []
+        for x in X:
+            nx = []
+            for w in x:
+                if (w >= nb_words or w < skip_top):
+                    nx.append(w)
+            nX.append(nx)
+        X = nX
+
+    X_train = np.array(X[:int(len(X) * (1 - test_split))])
+    y_train = np.array(labels[:int(len(X) * (1 - test_split))])
+
+    X_test = np.array(X[int(len(X) * (1 - test_split)):])
+    y_test = np.array(labels[int(len(X) * (1 - test_split)):])
+
+    return X_train, y_train, X_test, y_test
+
+def load_nietzsche_dataset(path='data/nietzsche/'):
+    """Load Nietzsche dataset.
+    Returns a string.
+
+    Parameters
+    ----------
+    path : string
+        Path to download data to, defaults to data/nietzsche/
+
+    Examples
+    --------
+    >>> see tutorial_generate_text.py
+    >>> words = tl.files.load_nietzsche_dataset()
+    >>> words = basic_clean_str(words)
+    >>> words = words.split()
+    """
+    print("Load or Download nietzsche dataset > {}".format(path))
+
+    filename = "nietzsche.txt"
+    url = 'https://s3.amazonaws.com/text-datasets/'
+    filepath = maybe_download_and_extract(filename, path, url)
+
+    with open(filepath, "r") as f:
+        words = f.read()
+        return words
+
+def load_wmt_en_fr_dataset(path='data/wmt_en_fr/'):
+    """It will download English-to-French translation data from the WMT'15
+    Website (10^9-French-English corpus), and the 2013 news test from
+    the same site as development set.
+    Returns the directories of training data and test data.
+
+    Parameters
+    ----------
+    path : string
+        Path to download data to, defaults to data/wmt_en_fr/
+
+    References
+    ----------
+    - Code modified from /tensorflow/models/rnn/translation/data_utils.py
+
+    Notes
+    -----
+    Usually, it will take a long time to download this dataset.
+    """
+    # URLs for WMT data.
+    _WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/"
+    _WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/"
+
+    def gunzip_file(gz_path, new_path):
+        """Unzips from gz_path into new_path."""
+        print("Unpacking %s to %s" % (gz_path, new_path))
+        with gzip.open(gz_path, "rb") as gz_file:
+            with open(new_path, "wb") as new_file:
+                for line in gz_file:
+                    new_file.write(line)
+
+    def get_wmt_enfr_train_set(path):
+        """Download the WMT en-fr training corpus to directory unless it's there."""
+        filename = "training-giga-fren.tar"
+        maybe_download_and_extract(filename, path, _WMT_ENFR_TRAIN_URL, extract=True)
+        train_path = os.path.join(path, "giga-fren.release2.fixed")
+        gunzip_file(train_path + ".fr.gz", train_path + ".fr")
+        gunzip_file(train_path + ".en.gz", train_path + ".en")
+        return train_path
+
+    def get_wmt_enfr_dev_set(path):
+        """Download the WMT en-fr training corpus to directory unless it's there."""
+        filename = "dev-v2.tgz"
+        dev_file = maybe_download_and_extract(filename, path, _WMT_ENFR_DEV_URL, extract=False)
+        dev_name = "newstest2013"
+        dev_path = os.path.join(path, "newstest2013")
+        if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")):
+            print("Extracting tgz file %s" % dev_file)
+            with tarfile.open(dev_file, "r:gz") as dev_tar:
+              fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
+              en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
+              fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
+              en_dev_file.name = dev_name + ".en"
+              dev_tar.extract(fr_dev_file, path)
+              dev_tar.extract(en_dev_file, path)
+        return dev_path
+
+    print("Load or Download WMT English-to-French translation > {}".format(path))
+
+    train_path = get_wmt_enfr_train_set(path)
+    dev_path = get_wmt_enfr_dev_set(path)
+
+    return train_path, dev_path
+
+
+## Load and save network
+def save_npz(save_list=[], name='model.npz', sess=None):
+    """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.
+
+    Parameters
+    ----------
+    save_list : a list
+        Parameters want to be saved.
+    name : a string or None
+        The name of the .npz file.
+    sess : None or Session
+
+    Examples
+    --------
+    >>> tl.files.save_npz(network.all_params, name='model_test.npz', sess=sess)
+    ... File saved to: model_test.npz
+    >>> load_params = tl.files.load_npz(name='model_test.npz')
+    ... Loading param0, (784, 800)
+    ... Loading param1, (800,)
+    ... Loading param2, (800, 800)
+    ... Loading param3, (800,)
+    ... Loading param4, (800, 10)
+    ... Loading param5, (10,)
+    >>> put parameters into a TensorLayer network, please see assign_params()
+
+    Notes
+    -----
+    If you got session issues, you can change the value.eval() to value.eval(session=sess)
+
+    References
+    ----------
+    - `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`_
+    """
+    ## save params into a list
+    save_list_var = []
+    if sess:
+        save_list_var = sess.run(save_list)
+    else:
+        try:
+            for k, value in enumerate(save_list):
+                save_list_var.append(value.eval())
+        except:
+            print(" Fail to save model, Hint: pass the session into this function, save_npz(network.all_params, name='model.npz', sess=sess)")
+    np.savez(name, params=save_list_var)
+    save_list_var = None
+    del save_list_var
+    print("[*] %s saved" % name)
+
+    ## save params into a dictionary
+    # rename_dict = {}
+    # for k, value in enumerate(save_dict):
+    #     rename_dict.update({'param'+str(k) : value.eval()})
+    # np.savez(name, **rename_dict)
+    # print('Model is saved to: %s' % name)
+
+def save_npz_dict(save_list=[], name='model.npz', sess=None):
+    """Input parameters and the file name, save parameters as a dictionary into .npz file. Use tl.utils.load_npz_dict() to restore.
+
+    Parameters
+    ----------
+    save_list : a list
+        Parameters want to be saved.
+    name : a string or None
+        The name of the .npz file.
+    sess : None or Session
+
+    Notes
+    -----
+    This function tries to avoid a potential broadcasting error raised by numpy.
+
+    """
+    ## save params into a list
+    save_list_var = []
+    if sess:
+        save_list_var = sess.run(save_list)
+    else:
+        try:
+            for k, value in enumerate(save_list):
+                save_list_var.append(value.eval())
+        except:
+            print(" Fail to save model, Hint: pass the session into this function, save_npz_dict(network.all_params, name='model.npz', sess=sess)")
+    save_var_dict = {str(idx):val for idx, val in enumerate(save_list_var)}
+    np.savez(name, **save_var_dict)
+    save_list_var = None
+    save_var_dict = None
+    del save_list_var
+    del save_var_dict
+    print("[*] %s saved" % name)
+
+def load_npz(path='', name='model.npz'):
+    """Load the parameters of a Model saved by tl.files.save_npz().
+
+    Parameters
+    ----------
+    path : a string
+        Folder path to .npz file.
+    name : a string or None
+        The name of the .npz file.
+
+    Returns
+    --------
+    params : list
+        A list of parameters in order.
+
+    Examples
+    --------
+    - See save_npz and assign_params
+
+    References
+    ----------
+    - `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`_
+    """
+    ## if save_npz save params into a dictionary
+    # d = np.load( path+name )
+    # params = []
+    # print('Load Model')
+    # for key, val in sorted( d.items() ):
+    #     params.append(val)
+    #     print('Loading %s, %s' % (key, str(val.shape)))
+    # return params
+    ## if save_npz save params into a list
+    d = np.load( path+name )
+    # for val in sorted( d.items() ):
+    #     params = val
+    #     return params
+    return d['params']
+    # print(d.items()[0][1]['params'])
+    # exit()
+    # return d.items()[0][1]['params']
+
+def load_npz_dict(path='', name='model.npz'):
+    """Load the parameters of a Model saved by tl.files.save_npz_dict().
+
+    Parameters
+    ----------
+    path : a string
+        Folder path to .npz file.
+    name : a string or None
+        The name of the .npz file.
+
+    Returns
+    --------
+    params : list
+        A list of parameters in order.
+    """
+    d = np.load( path+name )
+    saved_list_var = [val[1] for val in sorted(d.items(), key=lambda tup: int(tup[0]))]
+    return saved_list_var
+
+def assign_params(sess, params, network):
+    """Assign the given parameters to the TensorLayer network.
+
+    Parameters
+    ----------
+    sess : TensorFlow Session. Automatically run when sess is not None.
+    params : a list
+        A list of parameters in order.
+    network : a :class:`Layer` class
+        The network to be assigned
+
+    Returns
+    --------
+    ops : list
+        A list of tf ops in order that assign params. Support sess.run(ops) manually.
+
+    Examples
+    --------
+    >>> Save your network as follow:
+    >>> tl.files.save_npz(network.all_params, name='model_test.npz')
+    >>> network.print_params()
+    ...
+    ... Next time, load and assign your network as follow:
+    >>> tl.layers.initialize_global_variables(sess)
+    >>> load_params = tl.files.load_npz(name='model_test.npz')
+    >>> tl.files.assign_params(sess, load_params, network)
+    >>> network.print_params()
+
+    References
+    ----------
+    - `Assign value to a TensorFlow variable <http://stackoverflow.com/questions/34220532/how-to-assign-value-to-a-tensorflow-variable>`_
+    """
+    ops = []
+    for idx, param in enumerate(params):
+        ops.append(network.all_params[idx].assign(param))
+    if sess is not None:
+        sess.run(ops)
+    return ops
+
+def load_and_assign_npz(sess=None, name=None, network=None):
+    """Load model from npz and assign to a network.
+
+    Parameters
+    -------------
+    sess : TensorFlow Session
+    name : string
+        Model path.
+    network : a :class:`Layer` class
+        The network to be assigned
+
+    Returns
+    --------
+    Returns False if faild to model is not exist.
+
+    Examples
+    ---------
+    >>> tl.files.load_and_assign_npz(sess=sess, name='net.npz', network=net)
+    """
+    assert network is not None
+    assert sess is not None
+    if not os.path.exists(name):
+        print("[!] Load {} failed!".format(name))
+        return False
+    else:
+        params = load_npz(name=name)
+        assign_params(sess, params, network)
+        print("[*] Load {} SUCCESS!".format(name))
+        return network
+
+# Load and save variables
+def save_any_to_npy(save_dict={}, name='file.npy'):
+    """Save variables to .npy file.
+
+    Examples
+    ---------
+    >>> tl.files.save_any_to_npy(save_dict={'data': ['a','b']}, name='test.npy')
+    >>> data = tl.files.load_npy_to_any(name='test.npy')
+    >>> print(data)
+    ... {'data': ['a','b']}
+    """
+    np.save(name, save_dict)
+
+def load_npy_to_any(path='', name='file.npy'):
+    """Load .npy file.
+
+    Examples
+    ---------
+    - see save_any_to_npy()
+    """
+    file_path = os.path.join(path, name)
+    try:
+        npy = np.load(file_path).item()
+    except:
+        npy = np.load(file_path)
+    finally:
+        try:
+            return npy
+        except:
+            print("[!] Fail to load %s" % file_path)
+            exit()
+
+
+# Visualizing npz files
+def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
+    """Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
+
+    Parameters
+    ----------
+    path : a string or None
+        A folder path to npz files.
+    regx : a string
+        Regx for the file name.
+
+    Examples
+    --------
+    >>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
+    >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
+    """
+    file_list = load_file_list(path=path, regx=regx)
+    for f in file_list:
+        W = load_npz(path, f)[0]
+        print("%s --> %s" % (f, f.split('.')[0]+'.pdf'))
+        visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012)
+
+
+## Helper functions
+def load_file_list(path=None, regx='\.npz', printable=True):
+    """Return a file list in a folder by given a path and regular expression.
+
+    Parameters
+    ----------
+    path : a string or None
+        A folder path.
+    regx : a string
+        The regx of file name.
+    printable : boolean, whether to print the files infomation.
+
+    Examples
+    ----------
+    >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)')
+    """
+    if path == False:
+        path = os.getcwd()
+    file_list = os.listdir(path)
+    return_list = []
+    for idx, f in enumerate(file_list):
+        if re.search(regx, f):
+            return_list.append(f)
+    # return_list.sort()
+    if printable:
+        print('Match file list = %s' % return_list)
+        print('Number of files = %d' % len(return_list))
+    return return_list
+
+def load_folder_list(path=""):
+    """Return a folder list in a folder by given a folder path.
+
+    Parameters
+    ----------
+    path : a string or None
+        A folder path.
+    """
+    return [os.path.join(path,o) for o in os.listdir(path) if os.path.isdir(os.path.join(path,o))]
+
+def exists_or_mkdir(path, verbose=True):
+    """Check a folder by given name, if not exist, create the folder and return False,
+    if directory exists, return True.
+
+    Parameters
+    ----------
+    path : a string
+        A folder path.
+    verbose : boolean
+        If True, prints results, deaults is True
+
+    Returns
+    --------
+    True if folder exist, otherwise, returns False and create the folder
+
+    Examples
+    --------
+    >>> tl.files.exists_or_mkdir("checkpoints/train")
+    """
+    if not os.path.exists(path):
+        if verbose:
+            print("[*] creates %s ..." % path)
+        os.makedirs(path)
+        return False
+    else:
+        if verbose:
+            print("[!] %s exists ..." % path)
+        return True
+
+def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None):
+    """Checks if file exists in working_directory otherwise tries to dowload the file,
+    and optionally also tries to extract the file if format is ".zip" or ".tar"
+
+    Parameters
+    ----------
+    filename : string
+        The name of the (to be) dowloaded file.
+    working_directory : string
+        A folder path to search for the file in and dowload the file to
+    url : string
+        The URL to download the file from
+    extract : bool, defaults to False
+        If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file
+    expected_bytes : int/None
+        If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception,
+        defaults to None which corresponds to no check being performed
+    Returns
+    ----------
+    filepath to dowloaded (uncompressed) file
+
+    Examples
+    --------
+    >>> down_file = tl.files.maybe_download_and_extract(filename = 'train-images-idx3-ubyte.gz',
+                                                        working_directory = 'data/',
+                                                        url_source = 'http://yann.lecun.com/exdb/mnist/')
+    >>> tl.files.maybe_download_and_extract(filename = 'ADEChallengeData2016.zip',
+                                            working_directory = 'data/',
+                                            url_source = 'http://sceneparsing.csail.mit.edu/data/',
+                                            extract=True)
+    """
+    # We first define a download function, supporting both Python 2 and 3.
+    def _download(filename, working_directory, url_source):
+        def _dlProgress(count, blockSize, totalSize):
+            if(totalSize != 0):
+                percent = float(count * blockSize) / float(totalSize) * 100.0
+                sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent)
+                sys.stdout.flush()
+        if sys.version_info[0] == 2:
+            from urllib import urlretrieve
+        else:
+            from urllib.request import urlretrieve
+        filepath = os.path.join(working_directory, filename)
+        urlretrieve(url_source+filename, filepath, reporthook=_dlProgress)
+
+    exists_or_mkdir(working_directory, verbose=False)
+    filepath = os.path.join(working_directory, filename)
+
+    if not os.path.exists(filepath):
+        _download(filename, working_directory, url_source)
+        print()
+        statinfo = os.stat(filepath)
+        print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
+        if(not(expected_bytes is None) and (expected_bytes != statinfo.st_size)):
+            raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?')
+        if(extract):
+            if tarfile.is_tarfile(filepath):
+                print('Trying to extract tar file')
+                tarfile.open(filepath, 'r').extractall(working_directory)
+                print('... Success!')
+            elif zipfile.is_zipfile(filepath):
+                print('Trying to extract zip file')
+                with zipfile.ZipFile(filepath) as zf:
+                    zf.extractall(working_directory)
+                print('... Success!')
+            else:
+                print("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported")
+    return filepath
--- a/tensorlayer/iterate.py
+++ b/tensorlayer/iterate.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import numpy as np
+from six.moves import xrange
+
+def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
+    """Generate a generator that input a group of example in numpy.array and
+    their labels, return the examples and labels by the given batchsize.
+
+    Parameters
+    ----------
+    inputs : numpy.array
+        (X) The input features, every row is a example.
+    targets : numpy.array
+        (y) The labels of inputs, every row is a example.
+    batch_size : int
+        The batch size.
+    shuffle : boolean
+        Indicating whether to use a shuffling queue, shuffle the dataset before return.
+
+    Hints
+    -------
+    - If you have two inputs, e.g. X1 (1000, 100) and X2 (1000, 80), you can ``np.hstack((X1, X2))
+    into (1000, 180) and feed into ``inputs``, then you can split a batch of X1 and X2.
+
+    Examples
+    --------
+    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
+    >>> y = np.asarray([0,1,2,3,4,5])
+    >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False):
+    >>>     print(batch)
+    ... (array([['a', 'a'],
+    ...        ['b', 'b']],
+    ...         dtype='<U1'), array([0, 1]))
+    ... (array([['c', 'c'],
+    ...        ['d', 'd']],
+    ...         dtype='<U1'), array([2, 3]))
+    ... (array([['e', 'e'],
+    ...        ['f', 'f']],
+    ...         dtype='<U1'), array([4, 5]))
+    """
+    assert len(inputs) == len(targets)
+    if shuffle:
+        indices = np.arange(len(inputs))
+        np.random.shuffle(indices)
+    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
+        if shuffle:
+            excerpt = indices[start_idx:start_idx + batch_size]
+        else:
+            excerpt = slice(start_idx, start_idx + batch_size)
+        yield inputs[excerpt], targets[excerpt]
+
+def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
+    """Generate a generator that return a batch of sequence inputs and targets.
+    If ``batch_size = 100, seq_length = 5``, one return will have ``500`` rows (examples).
+
+    Examples
+    --------
+    - Synced sequence input and output.
+    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
+    >>> y = np.asarray([0, 1, 2, 3, 4, 5])
+    >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1):
+    >>>     print(batch)
+    ... (array([['a', 'a'],
+    ...        ['b', 'b'],
+    ...         ['b', 'b'],
+    ...         ['c', 'c']],
+    ...         dtype='<U1'), array([0, 1, 1, 2]))
+    ... (array([['c', 'c'],
+    ...         ['d', 'd'],
+    ...         ['d', 'd'],
+    ...         ['e', 'e']],
+    ...         dtype='<U1'), array([2, 3, 3, 4]))
+    ...
+    ...
+
+    - Many to One
+    >>> return_last = True
+    >>> num_steps = 2
+    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
+    >>> Y = np.asarray([0,1,2,3,4,5])
+    >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1):
+    >>>     x, y = batch
+    >>>     if return_last:
+    >>>         tmp_y = y.reshape((-1, num_steps) + y.shape[1:])
+    >>>     y = tmp_y[:, -1]
+    >>>     print(x, y)
+    ... [['a' 'a']
+    ... ['b' 'b']
+    ... ['b' 'b']
+    ... ['c' 'c']] [1 2]
+    ... [['c' 'c']
+    ... ['d' 'd']
+    ... ['d' 'd']
+    ... ['e' 'e']] [3 4]
+    """
+    assert len(inputs) == len(targets)
+    n_loads = (batch_size * stride) + (seq_length - stride)
+    for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)):
+        seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:],
+                              dtype=inputs.dtype)
+        seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:],
+                               dtype=targets.dtype)
+        for b_idx in xrange(batch_size):
+            start_seq_idx = start_idx + (b_idx * stride)
+            end_seq_idx = start_seq_idx + seq_length
+            seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx]
+            seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx]
+        flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:])
+        flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:])
+        yield flatten_inputs, flatten_targets
+
+def seq_minibatches2(inputs, targets, batch_size, num_steps):
+    """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and
+    the target context by the given batch_size and num_steps (sequence_length),
+    see ``PTB tutorial``. In TensorFlow's tutorial, this generates the batch_size pointers into the raw
+    PTB data, and allows minibatch iteration along these pointers.
+
+    - Hint, if the input data are images, you can modify the code as follow.
+
+    .. code-block:: python
+
+        from
+        data = np.zeros([batch_size, batch_len)
+        to
+        data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])
+
+    Parameters
+    ----------
+    inputs : a list
+            the context in list format; note that context usually be
+            represented by splitting by space, and then convert to unique
+            word IDs.
+    targets : a list
+            the context in list format; note that context usually be
+            represented by splitting by space, and then convert to unique
+            word IDs.
+    batch_size : int
+            the batch size.
+    num_steps : int
+            the number of unrolls. i.e. sequence_length
+
+    Yields
+    ------
+    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
+
+    Raises
+    ------
+    ValueError : if batch_size or num_steps are too high.
+
+    Examples
+    --------
+    >>> X = [i for i in range(20)]
+    >>> Y = [i for i in range(20,40)]
+    >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3):
+    ...     x, y = batch
+    ...     print(x, y)
+    ...
+    ... [[  0.   1.   2.]
+    ... [ 10.  11.  12.]]
+    ... [[ 20.  21.  22.]
+    ... [ 30.  31.  32.]]
+    ...
+    ... [[  3.   4.   5.]
+    ... [ 13.  14.  15.]]
+    ... [[ 23.  24.  25.]
+    ... [ 33.  34.  35.]]
+    ...
+    ... [[  6.   7.   8.]
+    ... [ 16.  17.  18.]]
+    ... [[ 26.  27.  28.]
+    ... [ 36.  37.  38.]]
+
+    Code References
+    ---------------
+    - ``tensorflow/models/rnn/ptb/reader.py``
+    """
+    assert len(inputs) == len(targets)
+    data_len = len(inputs)
+    batch_len = data_len // batch_size
+    # data = np.zeros([batch_size, batch_len])
+    data = np.zeros((batch_size, batch_len) + inputs.shape[1:],
+                          dtype=inputs.dtype)
+    data2 = np.zeros([batch_size, batch_len])
+
+    for i in range(batch_size):
+        data[i] = inputs[batch_len * i:batch_len * (i + 1)]
+        data2[i] = targets[batch_len * i:batch_len * (i + 1)]
+
+    epoch_size = (batch_len - 1) // num_steps
+
+    if epoch_size == 0:
+        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
+
+    for i in range(epoch_size):
+        x = data[:, i*num_steps:(i+1)*num_steps]
+        x2 = data2[:, i*num_steps:(i+1)*num_steps]
+        yield (x, x2)
+
+
+def ptb_iterator(raw_data, batch_size, num_steps):
+    """
+    Generate a generator that iterates on a list of words, see PTB tutorial. Yields (Returns) the source contexts and
+    the target context by the given batch_size and num_steps (sequence_length).\n
+    see ``PTB tutorial``.
+
+    e.g. x = [0, 1, 2]  y = [1, 2, 3] , when batch_size = 1, num_steps = 3,
+    raw_data = [i for i in range(100)]
+
+    In TensorFlow's tutorial, this generates batch_size pointers into the raw
+    PTB data, and allows minibatch iteration along these pointers.
+
+    Parameters
+    ----------
+    raw_data : a list
+            the context in list format; note that context usually be
+            represented by splitting by space, and then convert to unique
+            word IDs.
+    batch_size : int
+            the batch size.
+    num_steps : int
+            the number of unrolls. i.e. sequence_length
+
+    Yields
+    ------
+    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
+    The second element of the tuple is the same data time-shifted to the
+    right by one.
+
+    Raises
+    ------
+    ValueError : if batch_size or num_steps are too high.
+
+    Examples
+    --------
+    >>> train_data = [i for i in range(20)]
+    >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3):
+    >>>     x, y = batch
+    >>>     print(x, y)
+    ... [[ 0  1  2] <---x                       1st subset/ iteration
+    ...  [10 11 12]]
+    ... [[ 1  2  3] <---y
+    ...  [11 12 13]]
+    ...
+    ... [[ 3  4  5]  <--- 1st batch input       2nd subset/ iteration
+    ...  [13 14 15]] <--- 2nd batch input
+    ... [[ 4  5  6]  <--- 1st batch target
+    ...  [14 15 16]] <--- 2nd batch target
+    ...
+    ... [[ 6  7  8]                             3rd subset/ iteration
+    ...  [16 17 18]]
+    ... [[ 7  8  9]
+    ...  [17 18 19]]
+
+    Code References
+    ----------------
+    - ``tensorflow/models/rnn/ptb/reader.py``
+    """
+    raw_data = np.array(raw_data, dtype=np.int32)
+
+    data_len = len(raw_data)
+    batch_len = data_len // batch_size
+    data = np.zeros([batch_size, batch_len], dtype=np.int32)
+    for i in range(batch_size):
+        data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
+
+    epoch_size = (batch_len - 1) // num_steps
+
+    if epoch_size == 0:
+        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
+
+    for i in range(epoch_size):
+        x = data[:, i*num_steps:(i+1)*num_steps]
+        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
+        yield (x, y)
+
+
+
+# def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1):
+#     """
+#     Input a group of example in 2D numpy.array and their labels.
+#     Return the examples and labels by the given batchsize, sequence_length.
+#     Use for RNN.
+#
+#     Parameters
+#     ----------
+#     inputs : numpy.array
+#         (X) The input features, every row is a example.
+#     targets : numpy.array
+#         (y) The labels of inputs, every row is a example.
+#     batchsize : int
+#         The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0
+#     sequence_length : int
+#         The sequence length
+#     stride : int
+#         The stride step
+#
+#     Examples
+#     --------
+#     >>> sequence_length = 2
+#     >>> batch_size = 4
+#     >>> stride = 1
+#     >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]])
+#     >>> y_train = np.asarray(['0','1','2','3','4','5','6','7'])
+#     >>> print('X_train = %s' % X_train)
+#     >>> print('y_train = %s' % y_train)
+#     >>> for batch in minibatches_for_sequence2D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride):
+#     >>>     inputs, targets = batch
+#     >>>     print(inputs)
+#     >>>     print(targets)
+#     ... [[ 1.  2.  3.]
+#     ... [ 4.  5.  6.]
+#     ... [ 4.  5.  6.]
+#     ... [ 7.  8.  9.]]
+#     ... [1 2]
+#     ... [[  4.   5.   6.]
+#     ... [  7.   8.   9.]
+#     ... [  7.   8.   9.]
+#     ... [ 10.  11.  12.]]
+#     ... [2 3]
+#     ... ...
+#     ... [[ 16.  17.  18.]
+#     ... [ 19.  20.  21.]
+#     ... [ 19.  20.  21.]
+#     ... [ 22.  23.  24.]]
+#     ... [6 7]
+#     """
+#     print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride))
+#     assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length
+#     # assert int(batch_size % sequence_length) == 0, 'batch_size % sequence_length must == 0\
+#     # batch_size is number of examples rather than number of targets'
+#
+#     # print(inputs.shape, len(inputs), len(inputs[0]))
+#
+#     n_targets = int(batch_size/sequence_length)
+#     # n_targets = int(np.ceil(batch_size/sequence_length))
+#     X = np.empty(shape=(0,len(inputs[0])), dtype=np.float32)
+#     y = np.zeros(shape=(1, n_targets), dtype=np.int32)
+#
+#     for idx in range(sequence_length, len(inputs), stride):  # go through all example during 1 epoch
+#         for n in range(n_targets):   # for num of target
+#             X = np.concatenate((X, inputs[idx-sequence_length+n:idx+n]))
+#             y[0][n] = targets[idx-1+n]
+#             # y = np.vstack((y, targets[idx-1+n]))
+#         yield X, y[0]
+#         X = np.empty(shape=(0,len(inputs[0])))
+#         # y = np.empty(shape=(1,0))
+#
+#
+# def minibatches_for_sequence4D(inputs, targets, batch_size, sequence_length, stride=1): #
+#     """
+#     Input a group of example in 4D numpy.array and their labels.
+#     Return the examples and labels by the given batchsize, sequence_length.
+#     Use for RNN.
+#
+#     Parameters
+#     ----------
+#     inputs : numpy.array
+#         (X) The input features, every row is a example.
+#     targets : numpy.array
+#         (y) The labels of inputs, every row is a example.
+#     batchsize : int
+#         The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0
+#     sequence_length : int
+#         The sequence length
+#     stride : int
+#         The stride step
+#
+#     Examples
+#     --------
+#     >>> sequence_length = 2
+#     >>> batch_size = 2
+#     >>> stride = 1
+#     >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]])
+#     >>> y_train = np.asarray(['0','1','2','3','4','5','6','7'])
+#     >>> X_train = np.expand_dims(X_train, axis=1)
+#     >>> X_train = np.expand_dims(X_train, axis=3)
+#     >>> for batch in minibatches_for_sequence4D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride):
+#     >>>     inputs, targets = batch
+#     >>>     print(inputs)
+#     >>>     print(targets)
+#     ... [[[[ 1.]
+#     ...    [ 2.]
+#     ...    [ 3.]]]
+#     ... [[[ 4.]
+#     ...   [ 5.]
+#     ...   [ 6.]]]]
+#     ... [1]
+#     ... [[[[ 4.]
+#     ...    [ 5.]
+#     ...    [ 6.]]]
+#     ... [[[ 7.]
+#     ...   [ 8.]
+#     ...   [ 9.]]]]
+#     ... [2]
+#     ... ...
+#     ... [[[[ 19.]
+#     ...    [ 20.]
+#     ...    [ 21.]]]
+#     ... [[[ 22.]
+#     ...   [ 23.]
+#     ...   [ 24.]]]]
+#     ... [7]
+#     """
+#     print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride))
+#     assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length
+#     # assert int(batch_size % sequence_length) == 0, 'in LSTM, batch_size % sequence_length must == 0\
+#     # batch_size is number of X_train rather than number of targets'
+#     assert stride >= 1, 'stride must be >=1, at least move 1 step for each iternation'
+#
+#     n_example, n_channels, width, height = inputs.shape
+#     print('n_example=%d n_channels=%d width=%d height=%d' % (n_example, n_channels, width, height))
+#
+#     n_targets = int(np.ceil(batch_size/sequence_length)) # 实际为 batchsize/sequence_length + 1
+#     print(n_targets)
+#     X = np.zeros(shape=(batch_size, n_channels, width, height), dtype=np.float32)
+#     # X = np.zeros(shape=(n_targets, sequence_length, n_channels, width, height), dtype=np.float32)
+#     y = np.zeros(shape=(1,n_targets), dtype=np.int32)
+#     # y = np.empty(shape=(0,1), dtype=np.float32)
+#     # time.sleep(2)
+#     for idx in range(sequence_length, n_example-n_targets+2, stride):  # go through all example during 1 epoch
+#         for n in range(n_targets):   # for num of target
+#             # print(idx+n, inputs[idx-sequence_length+n : idx+n].shape)
+#             X[n*sequence_length : (n+1)*sequence_length] = inputs[idx+n-sequence_length : idx+n]
+#             # X[n] = inputs[idx-sequence_length+n:idx+n]
+#             y[0][n] = targets[idx+n-1]
+#             # y = np.vstack((y, targets[idx-1+n]))
+#         # y = targets[idx: idx+n_targets]
+#         yield X, y[0]
--- a/tensorlayer/layers.py
+++ b/tensorlayer/layers.py
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+
+import tensorflow as tf
+import os
+from sys import platform as _platform
+import collections
+import random
+import numpy as np
+import warnings
+from six.moves import xrange
+from tensorflow.python.platform import gfile
+import re
+
+## Iteration functions
+def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_index=0):
+    """Generate a training batch for the Skip-Gram model.
+
+    Parameters
+    ----------
+    data : a list
+        To present context.
+    batch_size : an int
+        Batch size to return.
+    num_skips : an int
+        How many times to reuse an input to generate a label.
+    skip_window : an int
+        How many words to consider left and right.
+    data_index : an int
+        Index of the context location.
+        without using yield, this code use data_index to instead.
+
+    Returns
+    --------
+    batch : a list
+        Inputs
+    labels : a list
+        Labels
+    data_index : an int
+        Index of the context location.
+
+    Examples
+    --------
+    >>> Setting num_skips=2, skip_window=1, use the right and left words.
+    >>> In the same way, num_skips=4, skip_window=2 means use the nearby 4 words.
+
+    >>> data = [1,2,3,4,5,6,7,8,9,10,11]
+    >>> batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0)
+    >>> print(batch)
+    ... [2 2 3 3 4 4 5 5]
+    >>> print(labels)
+    ... [[3]
+    ... [1]
+    ... [4]
+    ... [2]
+    ... [5]
+    ... [3]
+    ... [4]
+    ... [6]]
+
+    References
+    -----------
+    - `TensorFlow word2vec tutorial <https://www.tensorflow.org/versions/r0.9/tutorials/word2vec/index.html#vector-representations-of-words>`_
+    """
+    # global data_index   # you can put data_index outside the function, then
+    #       modify the global data_index in the function without return it.
+    # note: without using yield, this code use data_index to instead.
+    assert batch_size % num_skips == 0
+    assert num_skips <= 2 * skip_window
+    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
+    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
+    span = 2 * skip_window + 1 # [ skip_window target skip_window ]
+    buffer = collections.deque(maxlen=span)
+    for _ in range(span):
+        buffer.append(data[data_index])
+        data_index = (data_index + 1) % len(data)
+    for i in range(batch_size // num_skips):
+        target = skip_window  # target label at the center of the buffer
+        targets_to_avoid = [ skip_window ]
+        for j in range(num_skips):
+            while target in targets_to_avoid:
+                target = random.randint(0, span - 1)
+            targets_to_avoid.append(target)
+            batch[i * num_skips + j] = buffer[skip_window]
+            labels[i * num_skips + j, 0] = buffer[target]
+        buffer.append(data[data_index])
+        data_index = (data_index + 1) % len(data)
+    return batch, labels, data_index
+
+
+## Sampling functions
+def sample(a=[], temperature=1.0):
+    """Sample an index from a probability array.
+
+    Parameters
+    ----------
+    a : a list
+        List of probabilities.
+    temperature : float or None
+        The higher the more uniform.\n
+        When a = [0.1, 0.2, 0.7],\n
+            temperature = 0.7, the distribution will be sharpen [ 0.05048273  0.13588945  0.81362782]\n
+            temperature = 1.0, the distribution will be the same [0.1    0.2    0.7]\n
+            temperature = 1.5, the distribution will be filtered [ 0.16008435  0.25411807  0.58579758]\n
+        If None, it will be ``np.argmax(a)``
+
+    Notes
+    ------
+    No matter what is the temperature and input list, the sum of all probabilities will be one.
+    Even if input list = [1, 100, 200], the sum of all probabilities will still be one.
+
+    For large vocabulary_size, choice a higher temperature to avoid error.
+    """
+    b = np.copy(a)
+    try:
+        if temperature == 1:
+            return np.argmax(np.random.multinomial(1, a, 1))
+        if temperature is None:
+            return np.argmax(a)
+        else:
+            a = np.log(a) / temperature
+            a = np.exp(a) / np.sum(np.exp(a))
+            return np.argmax(np.random.multinomial(1, a, 1))
+    except:
+        # np.set_printoptions(threshold=np.nan)
+        # print(a)
+        # print(np.sum(a))
+        # print(np.max(a))
+        # print(np.min(a))
+        # exit()
+        message = "For large vocabulary_size, choice a higher temperature\
+         to avoid log error. Hint : use ``sample_top``. "
+        warnings.warn(message, Warning)
+        # print(a)
+        # print(b)
+        return np.argmax(np.random.multinomial(1, b, 1))
+
+def sample_top(a=[], top_k=10):
+    """Sample from ``top_k`` probabilities.
+
+    Parameters
+    ----------
+    a : a list
+        List of probabilities.
+    top_k : int
+        Number of candidates to be considered.
+    """
+    idx = np.argpartition(a, -top_k)[-top_k:]
+    probs = a[idx]
+    # print("new", probs)
+    probs = probs / np.sum(probs)
+    choice = np.random.choice(idx, p=probs)
+    return choice
+    ## old implementation
+    # a = np.array(a)
+    # idx = np.argsort(a)[::-1]
+    # idx = idx[:top_k]
+    # # a = a[idx]
+    # probs = a[idx]
+    # print("prev", probs)
+    # # probs = probs / np.sum(probs)
+    # # choice = np.random.choice(idx, p=probs)
+    # # return choice
+
+
+## Vector representations of words (Advanced)  UNDOCUMENT
+class SimpleVocabulary(object):
+  """Simple vocabulary wrapper, see create_vocab().
+
+  Parameters
+  ------------
+  vocab : A dictionary of word to word_id.
+  unk_id : Id of the special 'unknown' word.
+  """
+
+  def __init__(self, vocab, unk_id):
+    """Initializes the vocabulary."""
+
+
+    self._vocab = vocab
+    self._unk_id = unk_id
+
+  def word_to_id(self, word):
+    """Returns the integer id of a word string."""
+    if word in self._vocab:
+      return self._vocab[word]
+    else:
+      return self._unk_id
+
+class Vocabulary(object):
+  """Create Vocabulary class from a given vocabulary and its id-word, word-id convert,
+  see create_vocab() and ``tutorial_tfrecord3.py``.
+
+  Parameters
+  -----------
+  vocab_file : File containing the vocabulary, where the words are the first
+        whitespace-separated token on each line (other tokens are ignored) and
+        the word ids are the corresponding line numbers.
+  start_word : Special word denoting sentence start.
+  end_word : Special word denoting sentence end.
+  unk_word : Special word denoting unknown words.
+
+  Properties
+  ------------
+  vocab : a dictionary from word to id.
+  reverse_vocab : a list from id to word.
+  start_id : int of start id
+  end_id : int of end id
+  unk_id : int of unk id
+  pad_id : int of padding id
+
+  Vocab_files
+  -------------
+  >>> Look as follow, includes `start_word` , `end_word` but no `unk_word` .
+  >>> a 969108
+  >>> <S> 586368
+  >>> </S> 586368
+  >>> . 440479
+  >>> on 213612
+  >>> of 202290
+  >>> the 196219
+  >>> in 182598
+  >>> with 152984
+  >>> and 139109
+  >>> is 97322
+  """
+
+  def __init__(self,
+               vocab_file,
+               start_word="<S>",
+               end_word="</S>",
+               unk_word="<UNK>",
+               pad_word="<PAD>"):
+    if not tf.gfile.Exists(vocab_file):
+      tf.logging.fatal("Vocab file %s not found.", vocab_file)
+    tf.logging.info("Initializing vocabulary from file: %s", vocab_file)
+
+    with tf.gfile.GFile(vocab_file, mode="r") as f:
+      reverse_vocab = list(f.readlines())
+    reverse_vocab = [line.split()[0] for line in reverse_vocab]
+    assert start_word in reverse_vocab
+    assert end_word in reverse_vocab
+    if unk_word not in reverse_vocab:
+      reverse_vocab.append(unk_word)
+    vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
+
+    print("  [TL] Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word))
+    print("    vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab))
+    # tf.logging.info("     vocabulary with %d words" % len(vocab))
+
+    self.vocab = vocab  # vocab[word] = id
+    self.reverse_vocab = reverse_vocab  # reverse_vocab[id] = word
+
+    # Save special word ids.
+    self.start_id = vocab[start_word]
+    self.end_id = vocab[end_word]
+    self.unk_id = vocab[unk_word]
+    self.pad_id = vocab[pad_word]
+    print("      start_id: %d" % self.start_id)
+    print("      end_id: %d" % self.end_id)
+    print("      unk_id: %d" % self.unk_id)
+    print("      pad_id: %d" % self.pad_id)
+
+  def word_to_id(self, word):
+    """Returns the integer word id of a word string."""
+    if word in self.vocab:
+      return self.vocab[word]
+    else:
+      return self.unk_id
+
+  def id_to_word(self, word_id):
+    """Returns the word string of an integer word id."""
+    if word_id >= len(self.reverse_vocab):
+      return self.reverse_vocab[self.unk_id]
+    else:
+      return self.reverse_vocab[word_id]
+
+def process_sentence(sentence, start_word="<S>", end_word="</S>"):
+    """Converts a sentence string into a list of string words, add start_word and end_word,
+    see ``create_vocab()`` and ``tutorial_tfrecord3.py``.
+
+    Parameter
+    ---------
+    sentence : a sentence in string.
+    start_word : a string or None, if None, non start word will be appended.
+    end_word : a string or None, if None, non end word will be appended.
+
+    Returns
+    ---------
+    A list of strings; the processed caption.
+
+    Examples
+    -----------
+    >>> c = "how are you?"
+    >>> c = tl.nlp.process_sentence(c)
+    >>> print(c)
+    ... ['<S>', 'how', 'are', 'you', '?', '</S>']
+
+    Notes
+    -------
+    - You have to install the following package.
+    - `Installing NLTK <http://www.nltk.org/install.html>`_
+    - `Installing NLTK data <http://www.nltk.org/data.html>`_
+    """
+    try:
+        import nltk
+    except:
+        raise Exception("Hint : NLTK is required.")
+    if start_word is not None:
+        process_sentence = [start_word]
+    else:
+        process_sentence = []
+    process_sentence.extend(nltk.tokenize.word_tokenize(sentence.lower()))
+    if end_word is not None:
+        process_sentence.append(end_word)
+    return process_sentence
+
+def create_vocab(sentences, word_counts_output_file, min_word_count=1):
+    """Creates the vocabulary of word to word_id, see create_vocab() and ``tutorial_tfrecord3.py``.
+
+    The vocabulary is saved to disk in a text file of word counts. The id of each
+    word in the file is its corresponding 0-based line number.
+
+    Parameters
+    ------------
+    sentences : a list of lists of strings.
+    word_counts_output_file : A string
+        The file name.
+    min_word_count : a int
+        Minimum number of occurrences for a word.
+
+    Returns
+    --------
+    - tl.nlp.SimpleVocabulary object.
+
+    Mores
+    -----
+    - ``tl.nlp.build_vocab()``
+
+    Examples
+    --------
+    >>> captions = ["one two , three", "four five five"]
+    >>> processed_capts = []
+    >>> for c in captions:
+    >>>     c = tl.nlp.process_sentence(c, start_word="<S>", end_word="</S>")
+    >>>     processed_capts.append(c)
+    >>> print(processed_capts)
+    ...[['<S>', 'one', 'two', ',', 'three', '</S>'], ['<S>', 'four', 'five', 'five', '</S>']]
+
+    >>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1)
+    ...   [TL] Creating vocabulary.
+    ...   Total words: 8
+    ...   Words in vocabulary: 8
+    ...   Wrote vocabulary file: vocab.txt
+    >>> vocab = tl.nlp.Vocabulary('vocab.txt', start_word="<S>", end_word="</S>", unk_word="<UNK>")
+    ... INFO:tensorflow:Initializing vocabulary from file: vocab.txt
+    ... [TL] Vocabulary from vocab.txt : <S> </S> <UNK>
+    ... vocabulary with 10 words (includes start_word, end_word, unk_word)
+    ...     start_id: 2
+    ...     end_id: 3
+    ...     unk_id: 9
+    ...     pad_id: 0
+    """
+    from collections import Counter
+    print("  [TL] Creating vocabulary.")
+    counter = Counter()
+    for c in sentences:
+        counter.update(c)
+        # print('c',c)
+    print("    Total words: %d" % len(counter))
+
+    # Filter uncommon words and sort by descending count.
+    word_counts = [x for x in counter.items() if x[1] >= min_word_count]
+    word_counts.sort(key=lambda x: x[1], reverse=True)
+    word_counts = [("<PAD>", 0)] + word_counts # 1st id should be reserved for padding
+    # print(word_counts)
+    print("    Words in vocabulary: %d" % len(word_counts))
+
+    # Write out the word counts file.
+    with tf.gfile.FastGFile(word_counts_output_file, "w") as f:
+        f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts]))
+    print("    Wrote vocabulary file: %s" % word_counts_output_file)
+
+    # Create the vocabulary dictionary.
+    reverse_vocab = [x[0] for x in word_counts]
+    unk_id = len(reverse_vocab)
+    vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
+    vocab = SimpleVocabulary(vocab_dict, unk_id)
+
+    return vocab
+
+
+## Vector representations of words
+def simple_read_words(filename="nietzsche.txt"):
+    """Read context from file without any preprocessing.
+
+    Parameters
+    ----------
+    filename : a string
+        A file path (like .txt file)
+
+    Returns
+    --------
+    The context in a string
+    """
+    with open("nietzsche.txt", "r") as f:
+        words = f.read()
+        return words
+
+def read_words(filename="nietzsche.txt", replace = ['\n', '<eos>']):
+    """File to list format context. Note that, this script can not handle punctuations.
+    For customized read_words method, see ``tutorial_generate_text.py``.
+
+    Parameters
+    ----------
+    filename : a string
+        A file path (like .txt file),
+    replace : a list
+        [original string, target string], to disable replace use ['', '']
+
+    Returns
+    --------
+    The context in a list, split by space by default, and use ``'<eos>'`` to represent ``'\n'``,
+    e.g. ``[... 'how', 'useful', 'it', "'s" ... ]``.
+
+    Code References
+    ---------------
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
+    """
+    with tf.gfile.GFile(filename, "r") as f:
+        try:    # python 3.4 or older
+            context_list = f.read().replace(*replace).split()
+        except: # python 3.5
+            f.seek(0)
+            replace = [x.encode('utf-8') for x in replace]
+            context_list = f.read().replace(*replace).split()
+        return context_list
+
+def read_analogies_file(eval_file='questions-words.txt', word2id={}):
+    """Reads through an analogy question file, return its id format.
+
+    Parameters
+    ----------
+    eval_data : a string
+        The file name.
+    word2id : a dictionary
+        Mapping words to unique IDs.
+
+    Returns
+    --------
+    analogy_questions : a [n, 4] numpy array containing the analogy question's
+             word ids.
+             questions_skipped: questions skipped due to unknown words.
+
+    Examples
+    ---------
+    >>> eval_file should be in this format :
+    >>> : capital-common-countries
+    >>> Athens Greece Baghdad Iraq
+    >>> Athens Greece Bangkok Thailand
+    >>> Athens Greece Beijing China
+    >>> Athens Greece Berlin Germany
+    >>> Athens Greece Bern Switzerland
+    >>> Athens Greece Cairo Egypt
+    >>> Athens Greece Canberra Australia
+    >>> Athens Greece Hanoi Vietnam
+    >>> Athens Greece Havana Cuba
+    ...
+
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> data, count, dictionary, reverse_dictionary = \
+                tl.nlp.build_words_dataset(words, vocabulary_size, True)
+    >>> analogy_questions = tl.nlp.read_analogies_file( \
+                eval_file='questions-words.txt', word2id=dictionary)
+    >>> print(analogy_questions)
+    ... [[ 3068  1248  7161  1581]
+    ... [ 3068  1248 28683  5642]
+    ... [ 3068  1248  3878   486]
+    ... ...,
+    ... [ 1216  4309 19982 25506]
+    ... [ 1216  4309  3194  8650]
+    ... [ 1216  4309   140   312]]
+    """
+    questions = []
+    questions_skipped = 0
+    with open(eval_file, "rb") as analogy_f:
+      for line in analogy_f:
+          if line.startswith(b":"):  # Skip comments.
+                continue
+          words = line.strip().lower().split(b" ")  # lowercase
+          ids = [word2id.get(w.strip()) for w in words]
+          if None in ids or len(ids) != 4:
+              questions_skipped += 1
+          else:
+              questions.append(np.array(ids))
+    print("Eval analogy file: ", eval_file)
+    print("Questions: ", len(questions))
+    print("Skipped: ", questions_skipped)
+    analogy_questions = np.array(questions, dtype=np.int32)
+    return analogy_questions
+
+def build_vocab(data):
+    """Build vocabulary.
+    Given the context in list format.
+    Return the vocabulary, which is a dictionary for word to id.
+    e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
+
+    Parameters
+    ----------
+    data : a list of string
+        the context in list format
+
+    Returns
+    --------
+    word_to_id : a dictionary
+        mapping words to unique IDs. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
+
+    Code References
+    ---------------
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
+
+    Examples
+    --------
+    >>> data_path = os.getcwd() + '/simple-examples/data'
+    >>> train_path = os.path.join(data_path, "ptb.train.txt")
+    >>> word_to_id = build_vocab(read_txt_words(train_path))
+    """
+    # data = _read_words(filename)
+    counter = collections.Counter(data)
+    # print('counter', counter)   # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1
+    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
+    # print('count_pairs',count_pairs)  # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1)
+    words, _ = list(zip(*count_pairs))
+    word_to_id = dict(zip(words, range(len(words))))
+    # print(words)    # list of words
+    # print(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746
+    return word_to_id
+
+def build_reverse_dictionary(word_to_id):
+    """Given a dictionary for converting word to integer id.
+    Returns a reverse dictionary for converting a id to word.
+
+    Parameters
+    ----------
+    word_to_id : dictionary
+        mapping words to unique ids
+
+    Returns
+    --------
+    reverse_dictionary : a dictionary
+        mapping ids to words
+    """
+    reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys()))
+    return reverse_dictionary
+
+def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key = 'UNK'):
+    """Build the words dictionary and replace rare words with 'UNK' token.
+    The most common word has the smallest integer id.
+
+    Parameters
+    ----------
+    words : a list of string or byte
+        The context in list format. You may need to do preprocessing on the words,
+        such as lower case, remove marks etc.
+    vocabulary_size : an int
+        The maximum vocabulary size, limiting the vocabulary size.
+        Then the script replaces rare words with 'UNK' token.
+    printable : boolean
+        Whether to print the read vocabulary size of the given words.
+    unk_key : a string
+        Unknown words = unk_key
+
+    Returns
+    --------
+    data : a list of integer
+        The context in a list of ids
+    count : a list of tuple and list
+        count[0] is a list : the number of rare words\n
+        count[1:] are tuples : the number of occurrence of each word\n
+        e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
+    dictionary : a dictionary
+        word_to_id, mapping words to unique IDs.
+    reverse_dictionary : a dictionary
+        id_to_word, mapping id to unique word.
+
+    Examples
+    --------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> vocabulary_size = 50000
+    >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size)
+
+    Code References
+    -----------------
+    - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`_
+    """
+    import collections
+    count = [[unk_key, -1]]
+    count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
+    dictionary = dict()
+    for word, _ in count:
+        dictionary[word] = len(dictionary)
+    data = list()
+    unk_count = 0
+    for word in words:
+        if word in dictionary:
+            index = dictionary[word]
+        else:
+            index = 0  # dictionary['UNK']
+            unk_count += 1
+        data.append(index)
+    count[0][1] = unk_count
+    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
+    if printable:
+        print('Real vocabulary size    %d' % len(collections.Counter(words).keys()))
+        print('Limited vocabulary size {}'.format(vocabulary_size))
+    assert len(collections.Counter(words).keys()) >= vocabulary_size , \
+            "the limited vocabulary_size must be less than or equal to the read vocabulary_size"
+    return data, count, dictionary, reverse_dictionary
+
+def words_to_word_ids(data=[], word_to_id={}, unk_key = 'UNK'):
+    """Given a context (words) in list format and the vocabulary,
+    Returns a list of IDs to represent the context.
+
+    Parameters
+    ----------
+    data : a list of string or byte
+        the context in list format
+    word_to_id : a dictionary
+        mapping words to unique IDs.
+    unk_key : a string
+        Unknown words = unk_key
+
+    Returns
+    --------
+    A list of IDs to represent the context.
+
+    Examples
+    --------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> vocabulary_size = 50000
+    >>> data, count, dictionary, reverse_dictionary = \
+    ...         tl.nlp.build_words_dataset(words, vocabulary_size, True)
+    >>> context = [b'hello', b'how', b'are', b'you']
+    >>> ids = tl.nlp.words_to_word_ids(words, dictionary)
+    >>> context = tl.nlp.word_ids_to_words(ids, reverse_dictionary)
+    >>> print(ids)
+    ... [6434, 311, 26, 207]
+    >>> print(context)
+    ... [b'hello', b'how', b'are', b'you']
+
+    Code References
+    ---------------
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
+    """
+    # if isinstance(data[0], six.string_types):
+    #     print(type(data[0]))
+    #     # exit()
+    #     print(data[0])
+    #     print(word_to_id)
+    #     return [word_to_id[str(word)] for word in data]
+    # else:
+
+    word_ids = []
+    for word in data:
+        if word_to_id.get(word) is not None:
+            word_ids.append(word_to_id[word])
+        else:
+            word_ids.append(word_to_id[unk_key])
+    return word_ids
+    # return [word_to_id[word] for word in data]    # this one
+
+    # if isinstance(data[0], str):
+    #     # print('is a string object')
+    #     return [word_to_id[word] for word in data]
+    # else:#if isinstance(s, bytes):
+    #     # print('is a unicode object')
+    #     # print(data[0])
+    #     return [word_to_id[str(word)] f
+
+def word_ids_to_words(data, id_to_word):
+    """Given a context (ids) in list format and the vocabulary,
+    Returns a list of words to represent the context.
+
+    Parameters
+    ----------
+    data : a list of integer
+        the context in list format
+    id_to_word : a dictionary
+        mapping id to unique word.
+
+    Returns
+    --------
+    A list of string or byte to represent the context.
+
+    Examples
+    ---------
+    >>> see words_to_word_ids
+    """
+    return [id_to_word[i] for i in data]
+
+def save_vocab(count=[], name='vocab.txt'):
+    """Save the vocabulary to a file so the model can be reloaded.
+
+    Parameters
+    ----------
+    count : a list of tuple and list
+        count[0] is a list : the number of rare words\n
+        count[1:] are tuples : the number of occurrence of each word\n
+        e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
+
+    Examples
+    ---------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> vocabulary_size = 50000
+    >>> data, count, dictionary, reverse_dictionary = \
+    ...     tl.nlp.build_words_dataset(words, vocabulary_size, True)
+    >>> tl.nlp.save_vocab(count, name='vocab_text8.txt')
+    >>> vocab_text8.txt
+    ... UNK 418391
+    ... the 1061396
+    ... of 593677
+    ... and 416629
+    ... one 411764
+    ... in 372201
+    ... a 325873
+    ... to 316376
+    """
+    pwd = os.getcwd()
+    vocabulary_size = len(count)
+    with open(os.path.join(pwd, name), "w") as f:
+        for i in xrange(vocabulary_size):
+            f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1]))
+    print("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd))
+
+## Functions for translation
+def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
+  """Very basic tokenizer: split the sentence into a list of tokens.
+
+  Parameters
+  -----------
+  sentence : tensorflow.python.platform.gfile.GFile Object
+  _WORD_SPLIT : regular expression for word spliting.
+
+
+  Examples
+  --------
+  >>> see create_vocabulary
+  >>> from tensorflow.python.platform import gfile
+  >>> train_path = "wmt/giga-fren.release2"
+  >>> with gfile.GFile(train_path + ".en", mode="rb") as f:
+  >>>    for line in f:
+  >>>       tokens = tl.nlp.basic_tokenizer(line)
+  >>>       print(tokens)
+  >>>       exit()
+  ... [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How',
+  ...   b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home',
+  ...   b'|', b'Concepts', b'|', b'Teachers', b'|', b'Search', b'|', b'Overview',
+  ...   b'|', b'Credits', b'|', b'HHCC', b'Web', b'|', b'Reference', b'|',
+  ...   b'Feedback', b'Virtual', b'Museum', b'of', b'Canada', b'Home', b'Page']
+
+  References
+  ----------
+  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
+  """
+  words = []
+  sentence = tf.compat.as_bytes(sentence)
+  for space_separated_fragment in sentence.strip().split():
+    words.extend(re.split(_WORD_SPLIT, space_separated_fragment))
+  return [w for w in words if w]
+
+def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size,
+                      tokenizer=None, normalize_digits=True,
+                      _DIGIT_RE=re.compile(br"\d"),
+                      _START_VOCAB=[b"_PAD", b"_GO", b"_EOS", b"_UNK"]):
+  """Create vocabulary file (if it does not exist yet) from data file.
+
+  Data file is assumed to contain one sentence per line. Each sentence is
+  tokenized and digits are normalized (if normalize_digits is set).
+  Vocabulary contains the most-frequent tokens up to max_vocabulary_size.
+  We write it to vocabulary_path in a one-token-per-line format, so that later
+  token in the first line gets id=0, second line gets id=1, and so on.
+
+  Parameters
+  -----------
+  vocabulary_path : path where the vocabulary will be created.
+  data_path : data file that will be used to create vocabulary.
+  max_vocabulary_size : limit on the size of the created vocabulary.
+  tokenizer : a function to use to tokenize each data sentence.
+        if None, basic_tokenizer will be used.
+  normalize_digits : Boolean
+        if true, all digits are replaced by 0s.
+
+  References
+  ----------
+  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
+  """
+  if not gfile.Exists(vocabulary_path):
+    print("Creating vocabulary %s from data %s" % (vocabulary_path, data_path))
+    vocab = {}
+    with gfile.GFile(data_path, mode="rb") as f:
+      counter = 0
+      for line in f:
+        counter += 1
+        if counter % 100000 == 0:
+          print("  processing line %d" % counter)
+        tokens = tokenizer(line) if tokenizer else basic_tokenizer(line)
+        for w in tokens:
+          word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w
+          if word in vocab:
+            vocab[word] += 1
+          else:
+            vocab[word] = 1
+      vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True)
+      if len(vocab_list) > max_vocabulary_size:
+        vocab_list = vocab_list[:max_vocabulary_size]
+      with gfile.GFile(vocabulary_path, mode="wb") as vocab_file:
+        for w in vocab_list:
+          vocab_file.write(w + b"\n")
+  else:
+    print("Vocabulary %s from data %s exists" % (vocabulary_path, data_path))
+
+def initialize_vocabulary(vocabulary_path):
+  """Initialize vocabulary from file, return the word_to_id (dictionary)
+  and id_to_word (list).
+
+  We assume the vocabulary is stored one-item-per-line, so a file:\n
+    dog\n
+    cat\n
+  will result in a vocabulary {"dog": 0, "cat": 1}, and this function will
+  also return the reversed-vocabulary ["dog", "cat"].
+
+  Parameters
+  -----------
+  vocabulary_path : path to the file containing the vocabulary.
+
+  Returns
+  --------
+  vocab : a dictionary
+        Word to id. A dictionary mapping string to integers.
+  rev_vocab : a list
+        Id to word. The reversed vocabulary (a list, which reverses the vocabulary mapping).
+
+  Examples
+  ---------
+  >>> Assume 'test' contains
+  ... dog
+  ... cat
+  ... bird
+  >>> vocab, rev_vocab = tl.nlp.initialize_vocabulary("test")
+  >>> print(vocab)
+  >>> {b'cat': 1, b'dog': 0, b'bird': 2}
+  >>> print(rev_vocab)
+  >>> [b'dog', b'cat', b'bird']
+
+  Raises
+  -------
+  ValueError : if the provided vocabulary_path does not exist.
+  """
+  if gfile.Exists(vocabulary_path):
+    rev_vocab = []
+    with gfile.GFile(vocabulary_path, mode="rb") as f:
+      rev_vocab.extend(f.readlines())
+    rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab]
+    vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
+    return vocab, rev_vocab
+  else:
+    raise ValueError("Vocabulary file %s not found.", vocabulary_path)
+
+def sentence_to_token_ids(sentence, vocabulary,
+                          tokenizer=None, normalize_digits=True,
+                          UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+  """Convert a string to list of integers representing token-ids.
+
+  For example, a sentence "I have a dog" may become tokenized into
+  ["I", "have", "a", "dog"] and with vocabulary {"I": 1, "have": 2,
+  "a": 4, "dog": 7"} this function will return [1, 2, 4, 7].
+
+  Parameters
+  -----------
+  sentence :  tensorflow.python.platform.gfile.GFile Object
+        The sentence in bytes format to convert to token-ids.\n
+        see basic_tokenizer(), data_to_token_ids()
+  vocabulary : a dictionary mapping tokens to integers.
+  tokenizer : a function to use to tokenize each sentence;
+        If None, basic_tokenizer will be used.
+  normalize_digits : Boolean
+        If true, all digits are replaced by 0s.
+
+  Returns
+  --------
+  A list of integers, the token-ids for the sentence.
+  """
+
+  if tokenizer:
+    words = tokenizer(sentence)
+  else:
+    words = basic_tokenizer(sentence)
+  if not normalize_digits:
+    return [vocabulary.get(w, UNK_ID) for w in words]
+  # Normalize digits by 0 before looking words up in the vocabulary.
+  return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words]
+
+def data_to_token_ids(data_path, target_path, vocabulary_path,
+                      tokenizer=None, normalize_digits=True,
+                      UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+  """Tokenize data file and turn into token-ids using given vocabulary file.
+
+  This function loads data line-by-line from data_path, calls the above
+  sentence_to_token_ids, and saves the result to target_path. See comment
+  for sentence_to_token_ids on the details of token-ids format.
+
+  Parameters
+  -----------
+  data_path : path to the data file in one-sentence-per-line format.
+  target_path : path where the file with token-ids will be created.
+  vocabulary_path : path to the vocabulary file.
+  tokenizer : a function to use to tokenize each sentence;
+      if None, basic_tokenizer will be used.
+  normalize_digits : Boolean; if true, all digits are replaced by 0s.
+
+  References
+  ----------
+  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
+  """
+  if not gfile.Exists(target_path):
+    print("Tokenizing data in %s" % data_path)
+    vocab, _ = initialize_vocabulary(vocabulary_path)
+    with gfile.GFile(data_path, mode="rb") as data_file:
+      with gfile.GFile(target_path, mode="w") as tokens_file:
+        counter = 0
+        for line in data_file:
+          counter += 1
+          if counter % 100000 == 0:
+            print("  tokenizing line %d" % counter)
+          token_ids = sentence_to_token_ids(line, vocab, tokenizer,
+                                            normalize_digits, UNK_ID=UNK_ID,
+                                            _DIGIT_RE=_DIGIT_RE)
+          tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
+  else:
+    print("Target path %s exists" % target_path)
--- a/tensorlayer/ops.py
+++ b/tensorlayer/ops.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+
+import tensorflow as tf
+import os
+import sys
+from sys import platform as _platform
+
+
+def exit_tf(sess=None):
+    """Close tensorboard and nvidia-process if available
+
+    Parameters
+    ----------
+    sess : a session instance of TensorFlow
+        TensorFlow session
+    """
+    text = "[tl] Close tensorboard and nvidia-process if available"
+    sess.close()
+    # import time
+    # time.sleep(2)
+    if _platform == "linux" or _platform == "linux2":
+        print('linux: %s' % text)
+        os.system('nvidia-smi')
+        os.system('fuser 6006/tcp -k')  # kill tensorboard 6006
+        os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process
+    elif _platform == "darwin":
+        print('OS X: %s' % text)
+        os.system("lsof -i tcp:6006 | grep -v PID | awk '{print $2}' | xargs kill") # kill tensorboard 6006
+    elif _platform == "win32":
+        print('Windows: %s' % text)
+    else:
+        print(_platform)
+    exit()
+
+def clear_all(printable=True):
+    """Clears all the placeholder variables of keep prob,
+    including keeping probabilities of all dropout, denoising, dropconnect etc.
+
+    Parameters
+    ----------
+    printable : boolean
+        If True, print all deleted variables.
+    """
+    print('clear all .....................................')
+    gl = globals().copy()
+    for var in gl:
+        if var[0] == '_': continue
+        if 'func' in str(globals()[var]): continue
+        if 'module' in str(globals()[var]): continue
+        if 'class' in str(globals()[var]): continue
+
+        if printable:
+            print(" clear_all ------- %s" % str(globals()[var]))
+
+        del globals()[var]
+
+# def clear_all2(vars, printable=True):
+#     """
+#     The :function:`clear_all()` Clears all the placeholder variables of keep prob,
+#     including keeping probabilities of all dropout, denoising, dropconnect
+#     Parameters
+#     ----------
+#     printable : if True, print all deleted variables.
+#     """
+#     print('clear all .....................................')
+#     for var in vars:
+#         if var[0] == '_': continue
+#         if 'func' in str(var): continue
+#         if 'module' in str(var): continue
+#         if 'class' in str(var): continue
+#
+#         if printable:
+#             print(" clear_all ------- %s" % str(var))
+#
+#         del var
+
+def set_gpu_fraction(sess=None, gpu_fraction=0.3):
+    """Set the GPU memory fraction for the application.
+
+    Parameters
+    ----------
+    sess : a session instance of TensorFlow
+        TensorFlow session
+    gpu_fraction : a float
+        Fraction of GPU memory, (0 ~ 1]
+
+    References
+    ----------
+    - `TensorFlow using GPU <https://www.tensorflow.org/versions/r0.9/how_tos/using_gpu/index.html>`_
+    """
+    print("  tensorlayer: GPU MEM Fraction %f" % gpu_fraction)
+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
+    sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
+    return sess
+
+
+
+
+
+def disable_print():
+    """Disable console output, ``suppress_stdout`` is recommended.
+
+    Examples
+    ---------
+    >>> print("You can see me")
+    >>> tl.ops.disable_print()
+    >>> print(" You can't see me")
+    >>> tl.ops.enable_print()
+    >>> print("You can see me")
+    """
+    # sys.stdout = os.devnull   # this one kill the process
+    sys.stdout = None
+    sys.stderr = os.devnull
+
+def enable_print():
+    """Enable console output, ``suppress_stdout`` is recommended.
+
+    Examples
+    --------
+    - see tl.ops.disable_print()
+    """
+    sys.stdout = sys.__stdout__
+    sys.stderr = sys.__stderr__
+
+
+# class temporary_disable_print:
+#     """Temporarily disable console output.
+#
+#     Examples
+#     ---------
+#     >>> print("You can see me")
+#     >>> with tl.ops.temporary_disable_print() as t:
+#     >>>     print("You can't see me")
+#     >>> print("You can see me")
+#     """
+#     def __init__(self):
+#         pass
+#     def __enter__(self):
+#         sys.stdout = None
+#         sys.stderr = os.devnull
+#     def __exit__(self, type, value, traceback):
+#         sys.stdout = sys.__stdout__
+#         sys.stderr = sys.__stderr__
+#         return isinstance(value, TypeError)
+
+
+from contextlib import contextmanager
+@contextmanager
+def suppress_stdout():
+    """Temporarily disable console output.
+
+    Examples
+    ---------
+    >>> print("You can see me")
+    >>> with tl.ops.suppress_stdout():
+    >>>     print("You can't see me")
+    >>> print("You can see me")
+
+    References
+    -----------
+    - `stackoverflow <http://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python>`_
+    """
+    with open(os.devnull, "w") as devnull:
+        old_stdout = sys.stdout
+        sys.stdout = devnull
+        try:
+            yield
+        finally:
+            sys.stdout = old_stdout
+
+
+
+def get_site_packages_directory():
+    """Print and return the site-packages directory.
+
+    Examples
+    ---------
+    >>> loc = tl.ops.get_site_packages_directory()
+    """
+    import site
+    try:
+        loc = site.getsitepackages()
+        print("  tl.ops : site-packages in ", loc)
+        return loc
+    except:
+        print("  tl.ops : Cannot find package dir from virtual environment")
+        return False
+
+
+
+def empty_trash():
+    """Empty trash folder.
+
+    """
+    text = "[tl] Empty the trash"
+    if _platform == "linux" or _platform == "linux2":
+        print('linux: %s' % text)
+        os.system("rm -rf ~/.local/share/Trash/*")
+    elif _platform == "darwin":
+        print('OS X: %s' % text)
+        os.system("sudo rm -rf ~/.Trash/*")
+    elif _platform == "win32":
+        print('Windows: %s' % text)
+        try:
+            os.system("rd /s c:\$Recycle.Bin")  # Windows 7 or Server 2008
+        except:
+            pass
+        try:
+            os.system("rd /s c:\recycler")  #  Windows XP, Vista, or Server 2003
+        except:
+            pass
+    else:
+        print(_platform)
+
+#
--- a/tensorlayer/prepro.py
+++ b/tensorlayer/prepro.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import tensorflow as tf
+import tensorlayer as tl
+import numpy as np
+
+import time
+import numbers
+import random
+import os
+import re
+import sys
+
+import threading
+# import Queue  # <-- donot work for py3
+is_py2 = sys.version[0] == '2'
+if is_py2:
+    import Queue as queue
+else:
+    import queue as queue
+
+from six.moves import range
+import scipy
+from scipy import linalg
+import scipy.ndimage as ndi
+
+from skimage import transform
+from skimage import exposure
+import skimage
+
+# linalg https://docs.scipy.org/doc/scipy/reference/linalg.html
+# ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html
+
+## Threading
+def threading_data(data=None, fn=None, **kwargs):
+    """Return a batch of result by given data.
+    Usually be used for data augmentation.
+
+    Parameters
+    -----------
+    data : numpy array or zip of numpy array, see Examples below.
+    fn : the function for data processing.
+    more args : the args for fn, see Examples below.
+
+    Examples
+    --------
+    - Single array
+    >>> X --> [batch_size, row, col, 1] greyscale
+    >>> results = threading_data(X, zoom, zoom_range=[0.5, 1], is_random=True)
+    ... results --> [batch_size, row, col, channel]
+    >>> tl.visualize.images2d(images=np.asarray(results), second=0.01, saveable=True, name='after', dtype=None)
+    >>> tl.visualize.images2d(images=np.asarray(X), second=0.01, saveable=True, name='before', dtype=None)
+
+    - List of array (e.g. functions with ``multi``)
+    >>> X, Y --> [batch_size, row, col, 1]  greyscale
+    >>> data = threading_data([_ for _ in zip(X, Y)], zoom_multi, zoom_range=[0.5, 1], is_random=True)
+    ... data --> [batch_size, 2, row, col, 1]
+    >>> X_, Y_ = data.transpose((1,0,2,3,4))
+    ... X_, Y_ --> [batch_size, row, col, 1]
+    >>> tl.visualize.images2d(images=np.asarray(X_), second=0.01, saveable=True, name='after', dtype=None)
+    >>> tl.visualize.images2d(images=np.asarray(Y_), second=0.01, saveable=True, name='before', dtype=None)
+
+    - Customized function for image segmentation
+    >>> def distort_img(data):
+    ...     x, y = data
+    ...     x, y = flip_axis_multi([x, y], axis=0, is_random=True)
+    ...     x, y = flip_axis_multi([x, y], axis=1, is_random=True)
+    ...     x, y = crop_multi([x, y], 100, 100, is_random=True)
+    ...     return x, y
+    >>> X, Y --> [batch_size, row, col, channel]
+    >>> data = threading_data([_ for _ in zip(X, Y)], distort_img)
+    >>> X_, Y_ = data.transpose((1,0,2,3,4))
+
+    References
+    ----------
+    - `python queue <https://pymotw.com/2/Queue/index.html#module-Queue>`_
+    - `run with limited queue <http://effbot.org/librarybook/queue.htm>`_
+    """
+    ## plot function info
+    # for name, value in kwargs.items():
+    #     print('{0} = {1}'.format(name, value))
+    # exit()
+    # define function for threading
+    def apply_fn(results, i, data, kwargs):
+        results[i] = fn(data, **kwargs)
+
+    ## start multi-threaded reading.
+    results = [None] * len(data) ## preallocate result list
+    threads = []
+    for i in range(len(data)):
+        t = threading.Thread(
+                        name='threading_and_return',
+                        target=apply_fn,
+                        args=(results, i, data[i], kwargs)
+                        )
+        t.start()
+        threads.append(t)
+
+    ## <Milo> wait for all threads to complete
+    for t in threads:
+        t.join()
+
+    return np.asarray(results)
+
+    ## old implementation
+    # define function for threading
+    # def function(q, i, data, kwargs):
+    #     result = fn(data, **kwargs)
+    #     q.put([i, result])
+    # ## start threading
+    # q = queue.Queue()
+    # threads = []
+    # for i in range(len(data)):
+    #     t = threading.Thread(
+    #                     name='threading_and_return',
+    #                     target=function,
+    #                     args=(q, i, data[i], kwargs)
+    #                     )
+    #     t.start()
+    #     threads.append(t)
+    #
+    # ## <Milo> wait for all threads to complete
+    # for t in threads:
+    #     t.join()
+    #
+    # ## get results
+    # results = []
+    # for i in range(len(data)):
+    #     result = q.get()
+    #     results.append(result)
+    # results = sorted(results)
+    # for i in range(len(results)):
+    #     results[i] = results[i][1]
+    # return np.asarray(results)
+
+
+## Image
+def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
+                    fill_mode='nearest', cval=0.):
+    """Rotate an image randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    rg : int or float
+        Degree to rotate, usually 0 ~ 180.
+    is_random : boolean, default False
+        If True, randomly rotate.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+
+    Examples
+    ---------
+    >>> x --> [row, col, 1] greyscale
+    >>> x = rotation(x, rg=40, is_random=False)
+    >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='temp',cmap='gray')
+    """
+    if is_random:
+        theta = np.pi / 180 * np.random.uniform(-rg, rg)
+    else:
+        theta = np.pi /180 * rg
+    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
+                                [np.sin(theta), np.cos(theta), 0],
+                                [0, 0, 1]])
+
+    h, w = x.shape[row_index], x.shape[col_index]
+    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
+                    fill_mode='nearest', cval=0.):
+    """Rotate multiple images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``rotation``.
+
+    Examples
+    --------
+    >>> x, y --> [row, col, 1]  greyscale
+    >>> x, y = rotation_multi([x, y], rg=90, is_random=False)
+    >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='x',cmap='gray')
+    >>> tl.visualize.frame(y[:,:,0], second=0.01, saveable=True, name='y',cmap='gray')
+    """
+    if is_random:
+        theta = np.pi / 180 * np.random.uniform(-rg, rg)
+    else:
+        theta = np.pi /180 * rg
+    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
+                                [np.sin(theta), np.cos(theta), 0],
+                                [0, 0, 1]])
+
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# crop
+def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
+    """Randomly or centrally crop an image.
+
+    Parameters
+    ----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    wrg : float
+        Size of weight.
+    hrg : float
+        Size of height.
+    is_random : boolean, default False
+        If True, randomly crop, else central crop.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    """
+    h, w = x.shape[row_index], x.shape[col_index]
+    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+    if is_random:
+        h_offset = int(np.random.uniform(0, h-hrg) -1)
+        w_offset = int(np.random.uniform(0, w-wrg) -1)
+        # print(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape)
+        return x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset]
+    else:   # central crop
+        h_offset = int(np.floor((h - hrg)/2.))
+        w_offset = int(np.floor((w - wrg)/2.))
+        h_end = h_offset + hrg
+        w_end = w_offset + wrg
+        return x[h_offset: h_end, w_offset: w_end]
+        # old implementation
+        # h_offset = (h - hrg)/2
+        # w_offset = (w - wrg)/2
+        # # print(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape)
+        # return x[h_offset: h-h_offset ,w_offset: w-w_offset]
+        # central crop
+
+
+def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
+    """Randomly or centrally crop multiple images.
+
+    Parameters
+    ----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``crop``.
+    """
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+    if is_random:
+        h_offset = int(np.random.uniform(0, h-hrg) -1)
+        w_offset = int(np.random.uniform(0, w-wrg) -1)
+        results = []
+        for data in x:
+            results.append( data[h_offset: hrg+h_offset ,w_offset: wrg+w_offset])
+        return np.asarray(results)
+    else:
+        # central crop
+        h_offset = (h - hrg)/2
+        w_offset = (w - wrg)/2
+        results = []
+        for data in x:
+            results.append( data[h_offset: h-h_offset ,w_offset: w-w_offset] )
+        return np.asarray(results)
+
+# flip
+def flip_axis(x, axis, is_random=False):
+    """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly,
+
+    Parameters
+    ----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    axis : int
+        - 0, flip up and down
+        - 1, flip left and right
+        - 2, flip channel
+    is_random : boolean, default False
+        If True, randomly flip.
+    """
+    if is_random:
+        factor = np.random.uniform(-1, 1)
+        if factor > 0:
+            x = np.asarray(x).swapaxes(axis, 0)
+            x = x[::-1, ...]
+            x = x.swapaxes(0, axis)
+            return x
+        else:
+            return x
+    else:
+        x = np.asarray(x).swapaxes(axis, 0)
+        x = x[::-1, ...]
+        x = x.swapaxes(0, axis)
+        return x
+
+def flip_axis_multi(x, axis, is_random=False):
+    """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly,
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``flip_axis``.
+    """
+    if is_random:
+        factor = np.random.uniform(-1, 1)
+        if factor > 0:
+            # x = np.asarray(x).swapaxes(axis, 0)
+            # x = x[::-1, ...]
+            # x = x.swapaxes(0, axis)
+            # return x
+            results = []
+            for data in x:
+                data = np.asarray(data).swapaxes(axis, 0)
+                data = data[::-1, ...]
+                data = data.swapaxes(0, axis)
+                results.append( data )
+            return np.asarray(results)
+        else:
+            return np.asarray(x)
+    else:
+        # x = np.asarray(x).swapaxes(axis, 0)
+        # x = x[::-1, ...]
+        # x = x.swapaxes(0, axis)
+        # return x
+        results = []
+        for data in x:
+            data = np.asarray(data).swapaxes(axis, 0)
+            data = data[::-1, ...]
+            data = data.swapaxes(0, axis)
+            results.append( data )
+        return np.asarray(results)
+
+# shift
+def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shift an image randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    wrg : float
+        Percentage of shift in axis x, usually -0.25 ~ 0.25.
+    hrg : float
+        Percentage of shift in axis y, usually -0.25 ~ 0.25.
+    is_random : boolean, default False
+        If True, randomly shift.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    """
+    h, w = x.shape[row_index], x.shape[col_index]
+    if is_random:
+        tx = np.random.uniform(-hrg, hrg) * h
+        ty = np.random.uniform(-wrg, wrg) * w
+    else:
+        tx, ty = hrg * h, wrg * w
+    translation_matrix = np.array([[1, 0, tx],
+                                   [0, 1, ty],
+                                   [0, 0, 1]])
+
+    transform_matrix = translation_matrix  # no need to do offset
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shift images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``shift``.
+    """
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    if is_random:
+        tx = np.random.uniform(-hrg, hrg) * h
+        ty = np.random.uniform(-wrg, wrg) * w
+    else:
+        tx, ty = hrg * h, wrg * w
+    translation_matrix = np.array([[1, 0, tx],
+                                   [0, 1, ty],
+                                   [0, 0, 1]])
+
+    transform_matrix = translation_matrix  # no need to do offset
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# shear
+def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shear an image randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    intensity : float
+        Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False),
+        you can have a quick try by shear(X, 1).
+    is_random : boolean, default False
+        If True, randomly shear.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    """
+    if is_random:
+        shear = np.random.uniform(-intensity, intensity)
+    else:
+        shear = intensity
+    shear_matrix = np.array([[1, -np.sin(shear), 0],
+                             [0, np.cos(shear), 0],
+                             [0, 0, 1]])
+
+    h, w = x.shape[row_index], x.shape[col_index]
+    transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shear images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``shear``.
+    """
+    if is_random:
+        shear = np.random.uniform(-intensity, intensity)
+    else:
+        shear = intensity
+    shear_matrix = np.array([[1, -np.sin(shear), 0],
+                             [0, np.cos(shear), 0],
+                             [0, 0, 1]])
+
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# swirl
+def swirl(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True, preserve_range=False, is_random=False):
+    """Swirl an image randomly or non-randomly, see `scikit-image swirl API <http://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.swirl>`_
+    and `example <http://scikit-image.org/docs/dev/auto_examples/plot_swirl.html>`_.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    center : (row, column) tuple or (2,) ndarray, optional
+        Center coordinate of transformation.
+    strength : float, optional
+        The amount of swirling applied.
+    radius : float, optional
+        The extent of the swirl in pixels. The effect dies out rapidly beyond radius.
+    rotation : float, (degree) optional
+        Additional rotation applied to the image, usually [0, 360], relates to center.
+    output_shape : tuple (rows, cols), optional
+        Shape of the output image generated. By default the shape of the input image is preserved.
+    order : int, optional
+        The order of the spline interpolation, default is 1. The order has to be in the range 0-5. See skimage.transform.warp for detail.
+    mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional
+        Points outside the boundaries of the input are filled according to the given mode, with ‘constant’ used as the default. Modes match the behaviour of numpy.pad.
+    cval : float, optional
+        Used in conjunction with mode ‘constant’, the value outside the image boundaries.
+    clip : bool, optional
+        Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
+    preserve_range : bool, optional
+        Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
+    is_random : boolean, default False
+        If True, random swirl.
+            - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])]
+            - random strength = [0, strength]
+            - random radius = [1e-10, radius]
+            - random rotation = [-rotation, rotation]
+
+    Examples
+    ---------
+    >>> x --> [row, col, 1] greyscale
+    >>> x = swirl(x, strength=4, radius=100)
+    """
+    assert radius != 0, Exception("Invalid radius value")
+    rotation = np.pi / 180 * rotation
+    if is_random:
+        center_h = int(np.random.uniform(0, x.shape[0]))
+        center_w = int(np.random.uniform(0, x.shape[1]))
+        center = (center_h, center_w)
+        strength = np.random.uniform(0, strength)
+        radius = np.random.uniform(1e-10, radius)
+        rotation = np.random.uniform(-rotation, rotation)
+
+    max_v = np.max(x)
+    if max_v > 1:   # Note: the input of this fn should be [-1, 1], rescale is required.
+        x = x / max_v
+    swirled = skimage.transform.swirl(x, center=center, strength=strength, radius=radius, rotation=rotation,
+        output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
+    if max_v > 1:
+        swirled = swirled * max_v
+    return swirled
+
+def swirl_multi(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True, preserve_range=False, is_random=False):
+    """Swirl multiple images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``swirl``.
+    """
+    assert radius != 0, Exception("Invalid radius value")
+    rotation = np.pi / 180 * rotation
+    if is_random:
+        center_h = int(np.random.uniform(0, x[0].shape[0]))
+        center_w = int(np.random.uniform(0, x[0].shape[1]))
+        center = (center_h, center_w)
+        strength = np.random.uniform(0, strength)
+        radius = np.random.uniform(1e-10, radius)
+        rotation = np.random.uniform(-rotation, rotation)
+
+    results = []
+    for data in x:
+        max_v = np.max(data)
+        if max_v > 1:   # Note: the input of this fn should be [-1, 1], rescale is required.
+            data = data / max_v
+        swirled = skimage.transform.swirl(data, center=center, strength=strength, radius=radius, rotation=rotation,
+            output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
+        if max_v > 1:
+            swirled = swirled * max_v
+        results.append( swirled )
+    return np.asarray(results)
+
+# elastic_transform
+
+from scipy.ndimage.interpolation import map_coordinates
+from scipy.ndimage.filters import gaussian_filter
+def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False):
+    """Elastic deformation of images as described in `[Simard2003] <http://deeplearning.cs.cmu.edu/pdfs/Simard.pdf>`_ .
+
+    Parameters
+    -----------
+    x : numpy array, a greyscale image.
+    alpha : scalar factor.
+    sigma : scalar or sequence of scalars, the smaller the sigma, the more transformation.
+        Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes.
+    mode : default constant, see `scipy.ndimage.filters.gaussian_filter <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.filters.gaussian_filter.html>`_.
+    cval : float, optional. Used in conjunction with mode ‘constant’, the value outside the image boundaries.
+    is_random : boolean, default False
+
+    Examples
+    ---------
+    >>> x = elastic_transform(x, alpha = x.shape[1] * 3, sigma = x.shape[1] * 0.07)
+
+    References
+    ------------
+    - `Github <https://gist.github.com/chsasank/4d8f68caf01f041a6453e67fb30f8f5a>`_.
+    - `Kaggle <https://www.kaggle.com/pscion/ultrasound-nerve-segmentation/elastic-transform-for-data-augmentation-0878921a>`_
+    """
+    if is_random is False:
+        random_state = np.random.RandomState(None)
+    else:
+        random_state = np.random.RandomState(int(time.time()))
+    #
+    is_3d = False
+    if len(x.shape) == 3 and x.shape[-1] == 1:
+        x = x[:,:,0]
+        is_3d = True
+    elif len(x.shape) == 3 and x.shape[-1] != 1:
+        raise Exception("Only support greyscale image")
+    assert len(x.shape)==2
+
+    shape = x.shape
+
+    dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha
+    dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha
+
+    x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
+    indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1))
+    if is_3d:
+        return map_coordinates(x, indices, order=1).reshape((shape[0], shape[1], 1))
+    else:
+        return map_coordinates(x, indices, order=1).reshape(shape)
+
+def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=False):
+    """Elastic deformation of images as described in `[Simard2003] <http://deeplearning.cs.cmu.edu/pdfs/Simard.pdf>`_.
+
+    Parameters
+    -----------
+    x : list of numpy array
+    others : see ``elastic_transform``.
+    """
+    if is_random is False:
+        random_state = np.random.RandomState(None)
+    else:
+        random_state = np.random.RandomState(int(time.time()))
+
+    shape = x[0].shape
+    if len(shape) == 3:
+        shape = (shape[0], shape[1])
+    new_shape = random_state.rand(*shape)
+
+    results = []
+    for data in x:
+        is_3d = False
+        if len(data.shape) == 3 and data.shape[-1] == 1:
+            data = data[:,:,0]
+            is_3d = True
+        elif len(data.shape) == 3 and data.shape[-1] != 1:
+            raise Exception("Only support greyscale image")
+        assert len(data.shape)==2
+
+        dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
+        dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
+
+        x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
+        indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1))
+        # print(data.shape)
+        if is_3d:
+            results.append( map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1)))
+        else:
+            results.append( map_coordinates(data, indices, order=1).reshape(shape) )
+    return np.asarray(results)
+
+# zoom
+def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2,
+                fill_mode='nearest', cval=0.):
+    """Zoom in and out of a single image, randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    zoom_range : list or tuple
+        - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in.
+        - If is_random=True, (min zoom out, max zoom out) for x and y with different random zoom in/out factor.
+        e.g (0.5, 1) zoom in 1~2 times.
+    is_random : boolean, default False
+        If True, randomly zoom.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    """
+    if len(zoom_range) != 2:
+        raise Exception('zoom_range should be a tuple or list of two floats. '
+                        'Received arg: ', zoom_range)
+    if is_random:
+        if zoom_range[0] == 1 and zoom_range[1] == 1:
+            zx, zy = 1, 1
+            print(" random_zoom : not zoom in/out")
+        else:
+            zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
+    else:
+        zx, zy = zoom_range
+    # print(zx, zy)
+    zoom_matrix = np.array([[zx, 0, 0],
+                            [0, zy, 0],
+                            [0, 0, 1]])
+
+    h, w = x.shape[row_index], x.shape[col_index]
+    transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False,
+        row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.):
+    """Zoom in and out of images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``zoom``.
+    """
+    if len(zoom_range) != 2:
+        raise Exception('zoom_range should be a tuple or list of two floats. '
+                        'Received arg: ', zoom_range)
+
+    if is_random:
+        if zoom_range[0] == 1 and zoom_range[1] == 1:
+            zx, zy = 1, 1
+            print(" random_zoom : not zoom in/out")
+        else:
+            zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
+    else:
+        zx, zy = zoom_range
+
+    zoom_matrix = np.array([[zx, 0, 0],
+                            [0, zy, 0],
+                            [0, 0, 1]])
+
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
+    # x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    # return x
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# image = tf.image.random_brightness(image, max_delta=32. / 255.)
+# image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+# image = tf.image.random_hue(image, max_delta=0.032)
+# image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+
+# brightness
+def brightness(x, gamma=1, gain=1, is_random=False):
+    """Change the brightness of a single image, randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    gamma : float, small than 1 means brighter.
+        Non negative real number. Default value is 1.
+
+        - If is_random is True, gamma in a range of (1-gamma, 1+gamma).
+    gain : float
+        The constant multiplier. Default value is 1.
+    is_random : boolean, default False
+        - If True, randomly change brightness.
+
+    References
+    -----------
+    - `skimage.exposure.adjust_gamma <http://scikit-image.org/docs/dev/api/skimage.exposure.html>`_
+    - `chinese blog <http://www.cnblogs.com/denny402/p/5124402.html>`_
+    """
+    if is_random:
+        gamma = np.random.uniform(1-gamma, 1+gamma)
+    x = exposure.adjust_gamma(x, gamma, gain)
+    return x
+
+def brightness_multi(x, gamma=1, gain=1, is_random=False):
+    """Change the brightness of multiply images, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``brightness``.
+    """
+    if is_random:
+        gamma = np.random.uniform(1-gamma, 1+gamma)
+
+    results = []
+    for data in x:
+        results.append( exposure.adjust_gamma(data, gamma, gain) )
+    return np.asarray(results)
+
+
+# contrast
+def constant(x, cutoff=0.5, gain=10, inv=False, is_random=False):
+    # TODO
+    x = exposure.adjust_sigmoid(x, cutoff=cutoff, gain=gain, inv=inv)
+    return x
+
+def constant_multi():
+    #TODO
+    pass
+
+# resize
+def imresize(x, size=[100, 100], interp='bilinear', mode=None):
+    """Resize an image by given output size and method. Warning, this function
+    will rescale the value to [0, 255].
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    size : int, float or tuple (h, w)
+        - int, Percentage of current size.
+        - float, Fraction of current size.
+        - tuple, Size of the output image.
+    interp : str, optional
+        Interpolation to use for re-sizing (‘nearest’, ‘lanczos’, ‘bilinear’, ‘bicubic’ or ‘cubic’).
+    mode : str, optional
+        The PIL image mode (‘P’, ‘L’, etc.) to convert arr before resizing.
+
+    Returns
+    --------
+    imresize : ndarray
+    The resized array of image.
+
+    References
+    ------------
+    - `scipy.misc.imresize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.imresize.html>`_
+    """
+    if x.shape[-1] == 1:
+        # greyscale
+        x = scipy.misc.imresize(x[:,:,0], size, interp=interp, mode=mode)
+        return x[:, :, np.newaxis]
+    elif x.shape[-1] == 3:
+        # rgb, bgr ..
+        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
+    else:
+        raise Exception("Unsupported channel %d" % x.shape[-1])
+
+# normailization
+def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False,
+            channel_index=2, epsilon=1e-7):
+    """Normalize an image by rescale, samplewise centering and samplewise centering in order.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    rescale : rescaling factor.
+            If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation)
+    samplewise_center : set each sample mean to 0.
+    samplewise_std_normalization : divide each input by its std.
+    epsilon : small position value for dividing standard deviation.
+
+    Examples
+    --------
+    >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True)
+    >>> print(x.shape, np.mean(x), np.std(x))
+    ... (160, 176, 1), 0.0, 1.0
+
+    Notes
+    ------
+    When samplewise_center and samplewise_std_normalization are True.
+
+    - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image.
+    - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1.
+    """
+    if rescale:
+        x *= rescale
+
+    if x.shape[channel_index] == 1:
+        # greyscale
+        if samplewise_center:
+            x = x - np.mean(x)
+        if samplewise_std_normalization:
+            x = x / np.std(x)
+        return x
+    elif x.shape[channel_index] == 3:
+        # rgb
+        if samplewise_center:
+            x = x - np.mean(x, axis=channel_index, keepdims=True)
+        if samplewise_std_normalization:
+            x = x / (np.std(x, axis=channel_index, keepdims=True) + epsilon)
+        return x
+    else:
+        raise Exception("Unsupported channels %d" % x.shape[channel_index])
+
+def featurewise_norm(x, mean=None, std=None, epsilon=1e-7):
+    """Normalize every pixels by the same given mean and std, which are usually
+    compute from all examples.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    mean : value for subtraction.
+    std : value for division.
+    epsilon : small position value for dividing standard deviation.
+    """
+    if mean:
+        x = x - mean
+    if std:
+        x = x / (std + epsilon)
+    return x
+
+# whitening
+def get_zca_whitening_principal_components_img(X):
+    """Return the ZCA whitening principal components matrix.
+
+    Parameters
+    -----------
+    x : numpy array
+        Batch of image with dimension of [n_example, row, col, channel] (default).
+    """
+    flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
+    print("zca : computing sigma ..")
+    sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
+    print("zca : computing U, S and V ..")
+    U, S, V = linalg.svd(sigma)
+    print("zca : computing principal components ..")
+    principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
+    return principal_components
+
+def zca_whitening(x, principal_components):
+    """Apply ZCA whitening on an image by given principal components matrix.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    principal_components : matrix from ``get_zca_whitening_principal_components_img``.
+    """
+    # flatx = np.reshape(x, (x.size))
+    print(principal_components.shape, x.shape)  # ((28160, 28160), (160, 176, 1))
+    # flatx = np.reshape(x, (x.shape))
+    # flatx = np.reshape(x, (x.shape[0], ))
+    print(flatx.shape)  # (160, 176, 1)
+    whitex = np.dot(flatx, principal_components)
+    x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
+    return x
+
+# developing
+# def barrel_transform(x, intensity):
+#     # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
+#     # TODO
+#     pass
+#
+# def barrel_transform_multi(x, intensity):
+#     # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
+#     # TODO
+#     pass
+
+# channel shift
+def channel_shift(x, intensity, is_random=False, channel_index=2):
+    """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`_.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    intensity : float
+        Intensity of shifting.
+    is_random : boolean, default False
+        If True, randomly shift.
+    channel_index : int
+        Index of channel, default 2.
+    """
+    if is_random:
+        factor = np.random.uniform(-intensity, intensity)
+    else:
+        factor = intensity
+    x = np.rollaxis(x, channel_index, 0)
+    min_x, max_x = np.min(x), np.max(x)
+    channel_images = [np.clip(x_channel + factor, min_x, max_x)
+                      for x_channel in x]
+    x = np.stack(channel_images, axis=0)
+    x = np.rollaxis(x, 0, channel_index+1)
+    return x
+    # x = np.rollaxis(x, channel_index, 0)
+    # min_x, max_x = np.min(x), np.max(x)
+    # channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
+    #                   for x_channel in x]
+    # x = np.stack(channel_images, axis=0)
+    # x = np.rollaxis(x, 0, channel_index+1)
+    # return x
+
+def channel_shift_multi(x, intensity, channel_index=2):
+    """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`_ .
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``channel_shift``.
+    """
+    if is_random:
+        factor = np.random.uniform(-intensity, intensity)
+    else:
+        factor = intensity
+
+    results = []
+    for data in x:
+        data = np.rollaxis(data, channel_index, 0)
+        min_x, max_x = np.min(data), np.max(data)
+        channel_images = [np.clip(x_channel + factor, min_x, max_x)
+                          for x_channel in x]
+        data = np.stack(channel_images, axis=0)
+        data = np.rollaxis(x, 0, channel_index+1)
+        results.append( data )
+    return np.asarray(results)
+
+# noise
+def drop(x, keep=0.5):
+    """Randomly set some pixels to zero by a given keeping probability.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] or [row, col].
+    keep : float (0, 1)
+        The keeping probability, the lower more values will be set to zero.
+    """
+    if len(x.shape) == 3:
+        if x.shape[-1]==3: # color
+            img_size = x.shape
+            mask = np.random.binomial(n=1, p=keep, size=x.shape[:-1])
+            for i in range(3):
+                x[:,:,i] = np.multiply(x[:,:,i] , mask)
+        elif x.shape[-1]==1: # greyscale image
+            img_size = x.shape
+            x = np.multiply(x , np.random.binomial(n=1, p=keep, size=img_size))
+        else:
+            raise Exception("Unsupported shape {}".format(x.shape))
+    elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector
+        img_size = x.shape
+        x = np.multiply(x , np.random.binomial(n=1, p=keep, size=img_size))
+    else:
+        raise Exception("Unsupported shape {}".format(x.shape))
+    return x
+
+# x = np.asarray([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]])
+# x = np.asarray([x,x,x,x,x,x])
+# x.shape = 10, 4, 3
+# # print(x)
+# # exit()
+# print(x.shape)
+# # exit()
+# print(drop(x, keep=1.))
+# exit()
+
+# manual transform
+def transform_matrix_offset_center(matrix, x, y):
+    """Return transform matrix offset center.
+
+    Parameters
+    ----------
+    matrix : numpy array
+        Transform matrix
+    x, y : int
+        Size of image.
+
+    Examples
+    --------
+    - See ``rotation``, ``shear``, ``zoom``.
+    """
+    o_x = float(x) / 2 + 0.5
+    o_y = float(y) / 2 + 0.5
+    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
+    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
+    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
+    return transform_matrix
+
+
+def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0.):
+    """Return transformed images by given transform_matrix from ``transform_matrix_offset_center``.
+
+    Parameters
+    ----------
+    x : numpy array
+        Batch of images with dimension of 3, [batch_size, row, col, channel].
+    transform_matrix : numpy array
+        Transform matrix (offset center), can be generated by ``transform_matrix_offset_center``
+    channel_index : int
+        Index of channel, default 2.
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+
+    Examples
+    --------
+    - See ``rotation``, ``shift``, ``shear``, ``zoom``.
+    """
+    x = np.rollaxis(x, channel_index, 0)
+    final_affine_matrix = transform_matrix[:2, :2]
+    final_offset = transform_matrix[:2, 2]
+    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
+                      final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
+    x = np.stack(channel_images, axis=0)
+    x = np.rollaxis(x, 0, channel_index+1)
+    return x
+
+
+def projective_transform_by_points(x, src, dst, map_args={}, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False):
+    """Projective transform by given coordinates, usually 4 coordinates. see `scikit-image <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`_.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    src : list or numpy
+        The original coordinates, usually 4 coordinates of (x, y).
+    dst : list or numpy
+        The coordinates after transformation, the number of coordinates is the same with src.
+    map_args : dict, optional
+        Keyword arguments passed to inverse_map.
+    output_shape : tuple (rows, cols), optional
+        Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified.
+    order : int, optional
+        The order of interpolation. The order has to be in the range 0-5:
+
+        - 0 Nearest-neighbor
+        - 1 Bi-linear (default)
+        - 2 Bi-quadratic
+        - 3 Bi-cubic
+        - 4 Bi-quartic
+        - 5 Bi-quintic
+    mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional
+        Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad.
+    cval : float, optional
+        Used in conjunction with mode ‘constant’, the value outside the image boundaries.
+    clip : bool, optional
+        Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
+    preserve_range : bool, optional
+        Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
+
+    Examples
+    --------
+    >>> Assume X is an image from CIFAR 10, i.e. shape == (32, 32, 3)
+    >>> src = [[0,0],[0,32],[32,0],[32,32]]
+    >>> dst = [[10,10],[0,32],[32,0],[32,32]]
+    >>> x = projective_transform_by_points(X, src, dst)
+
+    References
+    -----------
+    - `scikit-image : geometric transformations <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`_
+    - `scikit-image : examples <http://scikit-image.org/docs/dev/auto_examples/index.html>`_
+    """
+    if type(src) is list:   # convert to numpy
+        src = np.array(src)
+    if type(dst) is list:
+        dst = np.array(dst)
+    if np.max(x)>1:         # convert to [0, 1]
+        x = x/255
+
+    m = transform.ProjectiveTransform()
+    m.estimate(dst, src)
+    warped = transform.warp(x, m,  map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
+    return warped
+
+# Numpy and PIL
+def array_to_img(x, dim_ordering=(0,1,2), scale=True):
+    """Converts a numpy array to PIL image object (uint8 format).
+
+    Parameters
+    ----------
+    x : numpy array
+        A image with dimension of 3 and channels of 1 or 3.
+    dim_ordering : list or tuple of 3 int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    scale : boolean, default is True
+        If True, converts image to [0, 255] from any range of value like [-1, 2].
+
+    References
+    -----------
+    - `PIL Image.fromarray <http://pillow.readthedocs.io/en/3.1.x/reference/Image.html?highlight=fromarray>`_
+    """
+    from PIL import Image
+    # if dim_ordering == 'default':
+    #     dim_ordering = K.image_dim_ordering()
+    # if dim_ordering == 'th':  # theano
+    #     x = x.transpose(1, 2, 0)
+    x = x.transpose(dim_ordering)
+    if scale:
+        x += max(-np.min(x), 0)
+        x_max = np.max(x)
+        if x_max != 0:
+            # print(x_max)
+            # x /= x_max
+            x = x / x_max
+        x *= 255
+    if x.shape[2] == 3:
+        # RGB
+        return Image.fromarray(x.astype('uint8'), 'RGB')
+    elif x.shape[2] == 1:
+        # grayscale
+        return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
+    else:
+        raise Exception('Unsupported channel number: ', x.shape[2])
+
+
+
+
+def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low'):
+    """ Find iso-valued contours in a 2D array for a given level value, returns list of (n, 2)-ndarrays
+    see `skimage.measure.find_contours <http://scikit-image.org/docs/dev/api/skimage.measure.html#skimage.measure.find_contours>`_ .
+
+    Parameters
+    ------------
+    x : 2D ndarray of double. Input data in which to find contours.
+    level : float. Value along which to find contours in the array.
+    fully_connected : str, {‘low’, ‘high’}.  Indicates whether array elements below the given level value are to be considered fully-connected (and hence elements above the value will only be face connected), or vice-versa. (See notes below for details.)
+    positive_orientation : either ‘low’ or ‘high’. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If ‘low’ then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour.
+    """
+    return skimage.measure.find_contours(x, level, fully_connected='low', positive_orientation='low')
+
+def pt2map(list_points=[], size=(100, 100), val=1):
+    """ Inputs a list of points, return a 2D image.
+
+    Parameters
+    --------------
+    list_points : list of [x, y].
+    size : tuple of (w, h) for output size.
+    val : float or int for the contour value.
+    """
+    i_m = np.zeros(size)
+    if list_points == []:
+        return i_m
+    for xx in list_points:
+        for x in xx:
+            # print(x)
+            i_m[int(np.round(x[0]))][int(np.round(x[1]))] = val
+    return i_m
+
+def binary_dilation(x, radius=3):
+    """ Return fast binary morphological dilation of an image.
+    see `skimage.morphology.binary_dilation <http://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.binary_dilation>`_.
+
+    Parameters
+    -----------
+    x : 2D array image.
+    radius : int for the radius of mask.
+    """
+    from skimage.morphology import disk, binary_dilation
+    mask = disk(radius)
+    x = binary_dilation(image, selem=mask)
+    return x
+
+def dilation(x, radius=3):
+    """ Return greyscale morphological dilation of an image,
+    see `skimage.morphology.dilation <http://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.dilation>`_.
+
+    Parameters
+    -----------
+    x : 2D array image.
+    radius : int for the radius of mask.
+    """
+    from skimage.morphology import disk, dilation
+    mask = disk(radius)
+    x = dilation(x, selem=mask)
+    return x
+
+
+
+
+## Sequence
+def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncating='pre', value=0.):
+    """Pads each sequence to the same length:
+    the length of the longest sequence.
+    If maxlen is provided, any sequence longer
+    than maxlen is truncated to maxlen.
+    Truncation happens off either the beginning (default) or
+    the end of the sequence.
+    Supports post-padding and pre-padding (default).
+
+    Parameters
+    ----------
+    sequences : list of lists where each element is a sequence
+    maxlen : int, maximum length
+    dtype : type to cast the resulting sequence.
+    padding : 'pre' or 'post', pad either before or after each sequence.
+    truncating : 'pre' or 'post', remove values from sequences larger than
+        maxlen either in the beginning or in the end of the sequence
+    value : float, value to pad the sequences to the desired value.
+
+    Returns
+    ----------
+    x : numpy array with dimensions (number_of_sequences, maxlen)
+
+    Examples
+    ----------
+    >>> sequences = [[1,1,1,1,1],[2,2,2],[3,3]]
+    >>> sequences = pad_sequences(sequences, maxlen=None, dtype='int32',
+    ...                  padding='post', truncating='pre', value=0.)
+    ... [[1 1 1 1 1]
+    ...  [2 2 2 0 0]
+    ...  [3 3 0 0 0]]
+    """
+    lengths = [len(s) for s in sequences]
+
+    nb_samples = len(sequences)
+    if maxlen is None:
+        maxlen = np.max(lengths)
+
+    # take the sample shape from the first non empty sequence
+    # checking for consistency in the main loop below.
+    sample_shape = tuple()
+    for s in sequences:
+        if len(s) > 0:
+            sample_shape = np.asarray(s).shape[1:]
+            break
+
+    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
+    for idx, s in enumerate(sequences):
+        if len(s) == 0:
+            continue  # empty list was found
+        if truncating == 'pre':
+            trunc = s[-maxlen:]
+        elif truncating == 'post':
+            trunc = s[:maxlen]
+        else:
+            raise ValueError('Truncating type "%s" not understood' % truncating)
+
+        # check `trunc` has expected shape
+        trunc = np.asarray(trunc, dtype=dtype)
+        if trunc.shape[1:] != sample_shape:
+            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
+                             (trunc.shape[1:], idx, sample_shape))
+
+        if padding == 'post':
+            x[idx, :len(trunc)] = trunc
+        elif padding == 'pre':
+            x[idx, -len(trunc):] = trunc
+        else:
+            raise ValueError('Padding type "%s" not understood' % padding)
+    return x
+
+def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_end_id=False):
+    """Set all tokens(ids) after END token to the padding value, and then shorten (option) it to the maximum sequence length in this batch.
+
+    Parameters
+    -----------
+    sequences : numpy array or list of list with token IDs.
+        e.g. [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]]
+    end_id : int, the special token for END.
+    pad_val : int, replace the end_id and the ids after end_id to this value.
+    is_shorten : boolean, default True.
+        Shorten the sequences.
+    remain_end_id : boolean, default False.
+        Keep an end_id in the end.
+
+    Examples
+    ---------
+    >>> sentences_ids = [[4, 3, 5, 3, 2, 2, 2, 2],  <-- end_id is 2
+    ...                  [5, 3, 9, 4, 9, 2, 2, 3]]  <-- end_id is 2
+    >>> sentences_ids = precess_sequences(sentences_ids, end_id=vocab.end_id, pad_val=0, is_shorten=True)
+    ... [[4, 3, 5, 3, 0], [5, 3, 9, 4, 9]]
+    """
+    max_length = 0
+    for i_s, seq in enumerate(sequences):
+        is_end = False
+        for i_w, n in enumerate(seq):
+            if n == end_id and is_end == False: # 1st time to see end_id
+                is_end = True
+                if max_length < i_w:
+                    max_length = i_w
+                if remain_end_id is False:
+                    seq[i_w] = pad_val      # set end_id to pad_val
+            elif is_end == True:
+                seq[i_w] = pad_val
+
+    if remain_end_id is True:
+        max_length += 1
+    if is_shorten:
+        for i, seq in enumerate(sequences):
+            sequences[i] = seq[:max_length]
+    return sequences
+
+def sequences_add_start_id(sequences, start_id=0, remove_last=False):
+    """Add special start token(id) in the beginning of each sequence.
+
+    Examples
+    ---------
+    >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]]
+    >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2)
+    ... [[2, 4, 3, 5, 3, 2, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2, 3]]
+    >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2, remove_last=True)
+    ... [[2, 4, 3, 5, 3, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2]]
+
+    - For Seq2seq
+    >>> input = [a, b, c]
+    >>> target = [x, y, z]
+    >>> decode_seq = [start_id, a, b] <-- sequences_add_start_id(input, start_id, True)
+    """
+    sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences)
+    for i in range(len(sequences)):
+        if remove_last:
+            sequences_out[i] = [start_id] + sequences[i][:-1]
+        else:
+            sequences_out[i] = [start_id] + sequences[i]
+    return sequences_out
+
+def sequences_get_mask(sequences, pad_val=0):
+    """Return mask for sequences.
+
+    Examples
+    ---------
+    >>> sentences_ids = [[4, 0, 5, 3, 0, 0],
+    ...                  [5, 3, 9, 4, 9, 0]]
+    >>> mask = sequences_get_mask(sentences_ids, pad_val=0)
+    ... [[1 1 1 1 0 0]
+    ...  [1 1 1 1 1 0]]
+    """
+    mask = np.ones_like(sequences)
+    for i, seq in enumerate(sequences):
+        for i_w in reversed(range(len(seq))):
+            if seq[i_w] == pad_val:
+                mask[i, i_w] = 0
+            else:
+                break   # <-- exit the for loop, prepcess next sequence
+    return mask
+
+
+## Text
+# see tensorlayer.nlp
+
+
+## Tensor Opt
+def distorted_images(images=None, height=24, width=24):
+    """Distort images for generating more training data.
+
+    Features
+    ---------
+    They are cropped to height * width pixels randomly.
+
+    They are approximately whitened to make the model insensitive to dynamic range.
+
+    Randomly flip the image from left to right.
+
+    Randomly distort the image brightness.
+
+    Randomly distort the image contrast.
+
+    Whiten (Normalize) the images.
+
+    Parameters
+    ----------
+    images : 4D Tensor
+        The tensor or placeholder of images
+    height : int
+        The height for random crop.
+    width : int
+        The width for random crop.
+
+    Returns
+    -------
+    result : tuple of Tensor
+        (Tensor for distorted images, Tensor for while loop index)
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
+    >>> sess = tf.InteractiveSession()
+    >>> batch_size = 128
+    >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
+    >>> distorted_images_op = tl.preprocess.distorted_images(images=x, height=24, width=24)
+    >>> sess.run(tf.initialize_all_variables())
+    >>> feed_dict={x: X_train[0:batch_size,:,:,:]}
+    >>> distorted_images, idx = sess.run(distorted_images_op, feed_dict=feed_dict)
+    >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
+    >>> tl.visualize.images2d(distorted_images[1:10,:,:,:], second=10, saveable=False, name='distorted_images', dtype=None, fig_idx=23012)
+
+    Notes
+    ------
+    - The first image in 'distorted_images' should be removed.
+
+    References
+    -----------
+    - `tensorflow.models.image.cifar10.cifar10_input <https://github.com/tensorflow/tensorflow/blob/r0.9/tensorflow/models/image/cifar10/cifar10_input.py>`_
+    """
+    print("This function is deprecated, please use tf.map_fn instead, e.g:\n   \
+            t_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=32. / 255.), t_image)\n \
+            t_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.5, upper=1.5), t_image)\n \
+            t_image = tf.map_fn(lambda img: tf.image.random_saturation(img, lower=0.5, upper=1.5), t_image)\n \
+            t_image = tf.map_fn(lambda img: tf.image.random_hue(img, max_delta=0.032), t_image)")
+    exit()
+    # print(" [Warning] distorted_images will be deprecated due to speed, see TFRecord tutorial for more info...")
+    try:
+        batch_size = int(images._shape[0])
+    except:
+        raise Exception('unknow batch_size of images')
+    distorted_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3]))
+    i = tf.Variable(tf.constant(0))
+
+    c = lambda distorted_x, i: tf.less(i, batch_size)
+
+    def body(distorted_x, i):
+        # 1. Randomly crop a [height, width] section of the image.
+        image = tf.random_crop(tf.gather(images, i), [height, width, 3])
+        # 2. Randomly flip the image horizontally.
+        image = tf.image.random_flip_left_right(image)
+        # 3. Randomly change brightness.
+        image = tf.image.random_brightness(image, max_delta=63)
+        # 4. Randomly change contrast.
+        image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
+        # 5. Subtract off the mean and divide by the variance of the pixels.
+        image = tf.image.per_image_whitening(image)
+        # 6. Append the image to a batch.
+        image = tf.expand_dims(image, 0)
+        return tf.concat(0, [distorted_x, image]), tf.add(i, 1)
+
+    result = tf.while_loop(cond=c, body=body, loop_vars=(distorted_x, i), parallel_iterations=16)
+    return result
+
+
+def crop_central_whiten_images(images=None, height=24, width=24):
+    """Crop the central of image, and normailize it for test data.
+
+    They are cropped to central of height * width pixels.
+
+    Whiten (Normalize) the images.
+
+    Parameters
+    ----------
+    images : 4D Tensor
+        The tensor or placeholder of images
+    height : int
+        The height for central crop.
+    width : int
+        The width for central crop.
+
+    Returns
+    -------
+    result : tuple Tensor
+        (Tensor for distorted images, Tensor for while loop index)
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
+    >>> sess = tf.InteractiveSession()
+    >>> batch_size = 128
+    >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
+    >>> central_images_op = tl.preprocess.crop_central_whiten_images(images=x, height=24, width=24)
+    >>> sess.run(tf.initialize_all_variables())
+    >>> feed_dict={x: X_train[0:batch_size,:,:,:]}
+    >>> central_images, idx = sess.run(central_images_op, feed_dict=feed_dict)
+    >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
+    >>> tl.visualize.images2d(central_images[1:10,:,:,:], second=10, saveable=False, name='central_images', dtype=None, fig_idx=23012)
+
+    Notes
+    ------
+    The first image in 'central_images' should be removed.
+
+    Code References
+    ----------------
+    - ``tensorflow.models.image.cifar10.cifar10_input``
+    """
+    print("This function is deprecated, please use tf.map_fn instead, e.g:\n   \
+            t_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=32. / 255.), t_image)\n \
+            t_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.5, upper=1.5), t_image)\n \
+            t_image = tf.map_fn(lambda img: tf.image.random_saturation(img, lower=0.5, upper=1.5), t_image)\n \
+            t_image = tf.map_fn(lambda img: tf.image.random_hue(img, max_delta=0.032), t_image)")
+    exit()
+    # print(" [Warning] crop_central_whiten_images will be deprecated due to speed, see TFRecord tutorial for more info...")
+    try:
+        batch_size = int(images._shape[0])
+    except:
+        raise Exception('unknow batch_size of images')
+    central_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3]))
+    i = tf.Variable(tf.constant(0))
+
+    c = lambda central_x, i: tf.less(i, batch_size)
+
+    def body(central_x, i):
+        # 1. Crop the central [height, width] of the image.
+        image = tf.image.resize_image_with_crop_or_pad(tf.gather(images, i), height, width)
+        # 2. Subtract off the mean and divide by the variance of the pixels.
+        image = tf.image.per_image_whitening(image)
+        # 5. Append the image to a batch.
+        image = tf.expand_dims(image, 0)
+        return tf.concat(0, [central_x, image]), tf.add(i, 1)
+
+    result = tf.while_loop(cond=c, body=body, loop_vars=(central_x, i), parallel_iterations=16)
+    return result
+
+
+
+
+
+
+
+
+
+
+
+
+#
--- a/tensorlayer/rein.py
+++ b/tensorlayer/rein.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+import numpy as np
+from six.moves import xrange
+
+def discount_episode_rewards(rewards=[], gamma=0.99, mode=0):
+    """ Take 1D float array of rewards and compute discounted rewards for an
+    episode. When encount a non-zero value, consider as the end a of an episode.
+
+    Parameters
+    ----------
+    rewards : numpy list
+        a list of rewards
+    gamma : float
+        discounted factor
+    mode : int
+        if mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game).
+        if mode == 1, would not reset the discount process.
+
+    Examples
+    ----------
+    >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1])
+    >>> gamma = 0.9
+    >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma)
+    >>> print(discount_rewards)
+    ... [ 0.72899997  0.81        0.89999998  1.          0.72899997  0.81
+    ... 0.89999998  1.          0.72899997  0.81        0.89999998  1.        ]
+    >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma, mode=1)
+    >>> print(discount_rewards)
+    ... [ 1.52110755  1.69011939  1.87791049  2.08656716  1.20729685  1.34144104
+    ... 1.49048996  1.65610003  0.72899997  0.81        0.89999998  1.        ]
+    """
+    discounted_r = np.zeros_like(rewards, dtype=np.float32)
+    running_add = 0
+    for t in reversed(xrange(0, rewards.size)):
+        if mode == 0:
+            if rewards[t] != 0: running_add = 0
+
+        running_add = running_add * gamma + rewards[t]
+        discounted_r[t] = running_add
+    return discounted_r
+
+
+def cross_entropy_reward_loss(logits, actions, rewards, name=None):
+    """ Calculate the loss for Policy Gradient Network.
+
+    Parameters
+    ----------
+    logits : tensor
+        The network outputs without softmax. This function implements softmax
+        inside.
+    actions : tensor/ placeholder
+        The agent actions.
+    rewards : tensor/ placeholder
+        The rewards.
+
+    Examples
+    ----------
+    >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])   # observation for training
+    >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer')
+    >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1')
+    >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer')
+    >>> probs = network.outputs
+    >>> sampling_prob = tf.nn.softmax(probs)
+    >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
+    >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
+    >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
+    >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
+    """
+
+    try: # TF 1.0
+        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name)
+    except:
+        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions)
+        # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions)
+
+    try: ## TF1.0
+        loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards))
+    except: ## TF0.12
+        loss = tf.reduce_sum(tf.mul(cross_entropy, rewards))   # element-wise mul
+    return loss
--- a/tensorlayer/utils.py
+++ b/tensorlayer/utils.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+import tensorflow as tf
+import tensorlayer as tl
+from . import iterate
+import numpy as np
+import time
+import math
+import random
+
+
+def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100,
+        n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True,
+        tensorboard=False, tensorboard_epoch_freq=5, tensorboard_weight_histograms=True, tensorboard_graph_vis=True):
+    """Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
+
+    Parameters
+    ----------
+    sess : TensorFlow session
+        sess = tf.InteractiveSession()
+    network : a TensorLayer layer
+        the network will be trained
+    train_op : a TensorFlow optimizer
+        like tf.train.AdamOptimizer
+    X_train : numpy array
+        the input of training data
+    y_train : numpy array
+        the target of training data
+    x : placeholder
+        for inputs
+    y_ : placeholder
+        for targets
+    acc : the TensorFlow expression of accuracy (or other metric) or None
+        if None, would not display the metric
+    batch_size : int
+        batch size for training and evaluating
+    n_epoch : int
+        the number of training epochs
+    print_freq : int
+        display the training information every ``print_freq`` epochs
+    X_val : numpy array or None
+        the input of validation data
+    y_val : numpy array or None
+        the target of validation data
+    eval_train : boolean
+        if X_val and y_val are not None, it refects whether to evaluate the training data
+    tensorboard : boolean
+        if True summary data will be stored to the log/ direcory for visualization with tensorboard.
+        See also detailed tensorboard_X settings for specific configurations of features. (default False)
+        Also runs tl.layers.initialize_global_variables(sess) internally in fit() to setup the summary nodes, see Note:
+    tensorboard_epoch_freq : int
+        how many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5)
+    tensorboard_weight_histograms : boolean
+        if True updates tensorboard data in the logs/ directory for visulaization
+        of the weight histograms every tensorboard_epoch_freq epoch (default True)
+    tensorboard_graph_vis : boolean
+        if True stores the graph in the tensorboard summaries saved to log/ (default True)
+
+    Examples
+    --------
+    >>> see tutorial_mnist_simple.py
+    >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
+    ...            acc=acc, batch_size=500, n_epoch=200, print_freq=5,
+    ...            X_val=X_val, y_val=y_val, eval_train=False)
+    >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
+    ...            acc=acc, batch_size=500, n_epoch=200, print_freq=5,
+    ...            X_val=X_val, y_val=y_val, eval_train=False,
+    ...            tensorboard=True, tensorboard_weight_histograms=True, tensorboard_graph_vis=True)
+
+    Note
+    --------
+        If tensorboard=True, the global_variables_initializer will be run inside the fit function
+        in order to initalize the automatically generated summary nodes used for tensorboard visualization,
+        thus tf.global_variables_initializer().run() before the fit() call will be undefined.
+    """
+    assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size"
+
+    if(tensorboard):
+        print("Setting up tensorboard ...")
+        #Set up tensorboard summaries and saver
+        tl.files.exists_or_mkdir('logs/')
+
+        #Only write summaries for more recent TensorFlow versions
+        if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'):
+            if tensorboard_graph_vis:
+                train_writer = tf.summary.FileWriter('logs/train',sess.graph)
+                val_writer = tf.summary.FileWriter('logs/validation',sess.graph)
+            else:
+                train_writer = tf.summary.FileWriter('logs/train')
+                val_writer = tf.summary.FileWriter('logs/validation')
+
+        #Set up summary nodes
+        if(tensorboard_weight_histograms):
+            for param in network.all_params:
+                if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'):
+                    print('Param name ', param.name)
+                    tf.summary.histogram(param.name, param)
+
+        if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'):
+            tf.summary.scalar('cost', cost)
+
+        merged = tf.summary.merge_all()
+
+        #Initalize all variables and summaries
+        tl.layers.initialize_global_variables(sess)
+        print("Finished! use $tensorboard --logdir=logs/ to start server")
+
+    print("Start training the network ...")
+    start_time_begin = time.time()
+    tensorboard_train_index, tensorboard_val_index = 0, 0
+    for epoch in range(n_epoch):
+        start_time = time.time()
+        loss_ep = 0; n_step = 0
+        for X_train_a, y_train_a in iterate.minibatches(X_train, y_train,
+                                                    batch_size, shuffle=True):
+            feed_dict = {x: X_train_a, y_: y_train_a}
+            feed_dict.update( network.all_drop )    # enable noise layers
+            loss, _ = sess.run([cost, train_op], feed_dict=feed_dict)
+            loss_ep += loss
+            n_step += 1
+        loss_ep = loss_ep/ n_step
+
+        if tensorboard and hasattr(tf, 'summary'):
+            if epoch+1 == 1 or (epoch+1) % tensorboard_epoch_freq == 0:
+                for X_train_a, y_train_a in iterate.minibatches(
+                                        X_train, y_train, batch_size, shuffle=True):
+                    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+                    feed_dict = {x: X_train_a, y_: y_train_a}
+                    feed_dict.update(dp_dict)
+                    result = sess.run(merged, feed_dict=feed_dict)
+                    train_writer.add_summary(result, tensorboard_train_index)
+                    tensorboard_train_index += 1
+                if (X_val is not None) and (y_val is not None):                      
+                        for X_val_a, y_val_a in iterate.minibatches(
+                                        X_val, y_val, batch_size, shuffle=True):
+                                dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+                                feed_dict = {x: X_val_a, y_: y_val_a}
+                                feed_dict.update(dp_dict)
+                                result = sess.run(merged, feed_dict=feed_dict)
+                                val_writer.add_summary(result, tensorboard_val_index)
+                                tensorboard_val_index += 1
+
+        if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
+            if (X_val is not None) and (y_val is not None):
+                print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
+                if eval_train is True:
+                    train_loss, train_acc, n_batch = 0, 0, 0
+                    for X_train_a, y_train_a in iterate.minibatches(
+                                            X_train, y_train, batch_size, shuffle=True):
+                        dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+                        feed_dict = {x: X_train_a, y_: y_train_a}
+                        feed_dict.update(dp_dict)
+                        if acc is not None:
+                            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+                            train_acc += ac
+                        else:
+                            err = sess.run(cost, feed_dict=feed_dict)
+                        train_loss += err;  n_batch += 1
+                    print("   train loss: %f" % (train_loss/ n_batch))
+                    if acc is not None:
+                        print("   train acc: %f" % (train_acc/ n_batch))
+                val_loss, val_acc, n_batch = 0, 0, 0
+                for X_val_a, y_val_a in iterate.minibatches(
+                                            X_val, y_val, batch_size, shuffle=True):
+                    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+                    feed_dict = {x: X_val_a, y_: y_val_a}
+                    feed_dict.update(dp_dict)
+                    if acc is not None:
+                        err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+                        val_acc += ac
+                    else:
+                        err = sess.run(cost, feed_dict=feed_dict)
+                    val_loss += err; n_batch += 1
+                print("   val loss: %f" % (val_loss/ n_batch))
+                if acc is not None:
+                    print("   val acc: %f" % (val_acc/ n_batch))
+            else:
+                print("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep))
+    print("Total training time: %fs" % (time.time() - start_time_begin))
+
+
+def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None):
+    """
+    Test a given non time-series network by the given test data and metric.
+
+    Parameters
+    ----------
+    sess : TensorFlow session
+        sess = tf.InteractiveSession()
+    network : a TensorLayer layer
+        the network will be trained
+    acc : the TensorFlow expression of accuracy (or other metric) or None
+        if None, would not display the metric
+    X_test : numpy array
+        the input of test data
+    y_test : numpy array
+        the target of test data
+    x : placeholder
+        for inputs
+    y_ : placeholder
+        for targets
+    batch_size : int or None
+        batch size for testing, when dataset is large, we should use minibatche for testing.
+        when dataset is small, we can set it to None.
+    cost : the TensorFlow expression of cost or None
+        if None, would not display the cost
+
+    Examples
+    --------
+    >>> see tutorial_mnist_simple.py
+    >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)
+    """
+    print('Start testing the network ...')
+    if batch_size is None:
+        dp_dict = dict_to_one( network.all_drop )
+        feed_dict = {x: X_test, y_: y_test}
+        feed_dict.update(dp_dict)
+        if cost is not None:
+            print("   test loss: %f" % sess.run(cost, feed_dict=feed_dict))
+        print("   test acc: %f" % sess.run(acc, feed_dict=feed_dict))
+            # print("   test acc: %f" % np.mean(y_test == sess.run(y_op,
+            #                                           feed_dict=feed_dict)))
+    else:
+        test_loss, test_acc, n_batch = 0, 0, 0
+        for X_test_a, y_test_a in iterate.minibatches(
+                                    X_test, y_test, batch_size, shuffle=True):
+            dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+            feed_dict = {x: X_test_a, y_: y_test_a}
+            feed_dict.update(dp_dict)
+            if cost is not None:
+                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+                test_loss += err
+            else:
+                ac = sess.run(acc, feed_dict=feed_dict)
+            test_acc += ac; n_batch += 1
+        if cost is not None:
+            print("   test loss: %f" % (test_loss/ n_batch))
+        print("   test acc: %f" % (test_acc/ n_batch))
+
+
+def predict(sess, network, X, x, y_op, batch_size=None):
+    """
+    Return the predict results of given non time-series network.
+
+    Parameters
+    ----------
+    sess : TensorFlow session
+        sess = tf.InteractiveSession()
+    network : a TensorLayer layer
+        the network will be trained
+    X : numpy array
+        the input
+    x : placeholder
+        for inputs
+    y_op : placeholder
+        the argmax expression of softmax outputs
+    batch_size : int or None
+        batch size for prediction, when dataset is large, we should use minibatche for prediction.
+        when dataset is small, we can set it to None.
+
+    Examples
+    --------
+    >>> see tutorial_mnist_simple.py
+    >>> y = network.outputs
+    >>> y_op = tf.argmax(tf.nn.softmax(y), 1)
+    >>> print(tl.utils.predict(sess, network, X_test, x, y_op))
+    """
+    if batch_size is None:
+        dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+        feed_dict = {x: X,}
+        feed_dict.update(dp_dict)
+        return sess.run(y_op, feed_dict=feed_dict)
+    else:
+        result = None
+        for X_a, _ in iterate.minibatches(
+                X, X, batch_size, shuffle=False):
+            dp_dict = dict_to_one( network.all_drop )
+            feed_dict = {x: X_a, }
+            feed_dict.update(dp_dict)
+            result_a = sess.run(y_op, feed_dict=feed_dict)
+            if result is None:
+                result = result_a
+            else:
+                result = np.hstack((result, result_a))
+        return result
+
+
+## Evaluation
+def evaluation(y_test=None, y_predict=None, n_classes=None):
+    """
+    Input the predicted results, targets results and
+    the number of class, return the confusion matrix, F1-score of each class,
+    accuracy and macro F1-score.
+
+    Parameters
+    ----------
+    y_test : numpy.array or list
+        target results
+    y_predict : numpy.array or list
+        predicted results
+    n_classes : int
+        number of classes
+
+    Examples
+    --------
+    >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes)
+    """
+    from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
+    c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)])
+    f1    = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)])
+    f1_macro = f1_score(y_test, y_predict, average='macro')
+    acc   = accuracy_score(y_test, y_predict)
+    print('confusion matrix: \n',c_mat)
+    print('f1-score:',f1)
+    print('f1-score(macro):',f1_macro)   # same output with > f1_score(y_true, y_pred, average='macro')
+    print('accuracy-score:', acc)
+    return c_mat, f1, acc, f1_macro
+
+def dict_to_one(dp_dict={}):
+    """
+    Input a dictionary, return a dictionary that all items are set to one,
+    use for disable dropout, dropconnect layer and so on.
+
+    Parameters
+    ----------
+    dp_dict : dictionary
+        keeping probabilities
+
+    Examples
+    --------
+    >>> dp_dict = dict_to_one( network.all_drop )
+    >>> dp_dict = dict_to_one( network.all_drop )
+    >>> feed_dict.update(dp_dict)
+    """
+    return {x: 1 for x in dp_dict}
+
+def flatten_list(list_of_list=[[],[]]):
+    """
+    Input a list of list, return a list that all items are in a list.
+
+    Parameters
+    ----------
+    list_of_list : a list of list
+
+    Examples
+    --------
+    >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]])
+    ... [1, 2, 3, 4, 5, 6]
+    """
+    return sum(list_of_list, [])
+
+
+def class_balancing_oversample(X_train=None, y_train=None, printable=True):
+    """Input the features and labels, return the features and labels after oversampling.
+
+    Parameters
+    ----------
+    X_train : numpy.array
+        Features, each row is an example
+    y_train : numpy.array
+        Labels
+
+    Examples
+    --------
+    - One X
+    >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True)
+
+    - Two X
+    >>> X, y = tl.utils.class_balancing_oversample(X_train=np.hstack((X1, X2)), y_train=y, printable=False)
+    >>> X1 = X[:, 0:5]
+    >>> X2 = X[:, 5:]
+    """
+    # ======== Classes balancing
+    if printable:
+        print("Classes balancing for training examples...")
+    from collections import Counter
+    c = Counter(y_train)
+    if printable:
+        print('the occurrence number of each stage: %s' % c.most_common())
+        print('the least stage is Label %s have %s instances' % c.most_common()[-1])
+        print('the most stage is  Label %s have %s instances' % c.most_common(1)[0])
+    most_num = c.most_common(1)[0][1]
+    if printable:
+        print('most num is %d, all classes tend to be this num' % most_num)
+
+    locations = {}
+    number = {}
+
+    for lab, num in c.most_common():    # find the index from y_train
+        number[lab] = num
+        locations[lab] = np.where(np.array(y_train)==lab)[0]
+    if printable:
+        print('convert list(np.array) to dict format')
+    X = {}  # convert list to dict
+    for lab, num in number.items():
+        X[lab] = X_train[locations[lab]]
+
+    # oversampling
+    if printable:
+        print('start oversampling')
+    for key in X:
+        temp = X[key]
+        while True:
+            if len(X[key]) >= most_num:
+                break
+            X[key] = np.vstack((X[key], temp))
+    if printable:
+        print('first features of label 0 >', len(X[0][0]))
+        print('the occurrence num of each stage after oversampling')
+    for key in X:
+        print(key, len(X[key]))
+    if printable:
+        print('make each stage have same num of instances')
+    for key in X:
+        X[key] = X[key][0:most_num,:]
+        print(key, len(X[key]))
+
+    # convert dict to list
+    if printable:
+        print('convert from dict to list format')
+    y_train = []
+    X_train = np.empty(shape=(0,len(X[0][0])))
+    for key in X:
+        X_train = np.vstack( (X_train, X[key] ) )
+        y_train.extend([key for i in range(len(X[key]))])
+    # print(len(X_train), len(y_train))
+    c = Counter(y_train)
+    if printable:
+        print('the occurrence number of each stage after oversampling: %s' % c.most_common())
+    # ================ End of Classes balancing
+    return X_train, y_train
+
+## Random
+def get_random_int(min=0, max=10, number=5, seed=None):
+    """Return a list of random integer by the given range and quantity.
+
+    Examples
+    ---------
+    >>> r = get_random_int(min=0, max=10, number=5)
+    ... [10, 2, 3, 3, 7]
+    """
+    rnd = random.Random()
+    if seed:
+        rnd = random.Random(seed)
+    # return [random.randint(min,max) for p in range(0, number)]
+    return [rnd.randint(min,max) for p in range(0, number)]
+
+#
+# def class_balancing_sequence_4D(X_train, y_train, sequence_length, model='downsampling' ,printable=True):
+#     ''' 输入、输出都是sequence format
+#         oversampling or downsampling
+#     '''
+#     n_features = X_train.shape[2]
+#     # ======== Classes balancing for sequence
+#     if printable:
+#         print("Classes balancing for 4D sequence training examples...")
+#     from collections import Counter
+#     c = Counter(y_train)    # Counter({2: 454, 4: 267, 3: 124, 1: 57, 0: 48})
+#     if printable:
+#         print('the occurrence number of each stage: %s' % c.most_common())
+#         print('the least Label %s have %s instances' % c.most_common()[-1])
+#         print('the most  Label %s have %s instances' % c.most_common(1)[0])
+#     # print(c.most_common()) # [(2, 454), (4, 267), (3, 124), (1, 57), (0, 48)]
+#     most_num = c.most_common(1)[0][1]
+#     less_num = c.most_common()[-1][1]
+#
+#     locations = {}
+#     number = {}
+#     for lab, num in c.most_common():
+#         number[lab] = num
+#         locations[lab] = np.where(np.array(y_train)==lab)[0]
+#     # print(locations)
+#     # print(number)
+#     if printable:
+#         print('  convert list to dict')
+#     X = {}  # convert list to dict
+#     ### a sequence
+#     for lab, _ in number.items():
+#         X[lab] = np.empty(shape=(0,1,n_features,1)) # 4D
+#     for lab, _ in number.items():
+#         #X[lab] = X_train[locations[lab]
+#         for l in locations[lab]:
+#             X[lab] = np.vstack((X[lab], X_train[l*sequence_length : (l+1)*(sequence_length)]))
+#         # X[lab] = X_train[locations[lab]*sequence_length : locations[lab]*(sequence_length+1)]    # a sequence
+#     # print(X)
+#
+#     if model=='oversampling':
+#         if printable:
+#             print('  oversampling -- most num is %d, all classes tend to be this num\nshuffle applied' % most_num)
+#         for key in X:
+#             temp = X[key]
+#             while True:
+#                 if len(X[key]) >= most_num * sequence_length:   # sequence
+#                     break
+#                 X[key] = np.vstack((X[key], temp))
+#             # print(key, len(X[key]))
+#         if printable:
+#             print('  make each stage have same num of instances')
+#         for key in X:
+#             X[key] = X[key][0:most_num*sequence_length,:]   # sequence
+#             if printable:
+#                 print(key, len(X[key]))
+#     elif model=='downsampling':
+#         import random
+#         if printable:
+#             print('  downsampling -- less num is %d, all classes tend to be this num by randomly choice without replacement\nshuffle applied' % less_num)
+#         for key in X:
+#             # print(key, len(X[key]))#, len(X[key])/sequence_length)
+#             s_idx = [ i for i in range(int(len(X[key])/sequence_length))]
+#             s_idx = np.asarray(s_idx)*sequence_length   # start index of sequnce in X[key]
+#             # print('s_idx',s_idx)
+#             r_idx = np.random.choice(s_idx, less_num, replace=False)    # random choice less_num of s_idx
+#             # print('r_idx',r_idx)
+#             temp = X[key]
+#             X[key] = np.empty(shape=(0,1,n_features,1)) # 4D
+#             for idx in r_idx:
+#                 X[key] = np.vstack((X[key], temp[idx:idx+sequence_length]))
+#             # print(key, X[key])
+#             # np.random.choice(l, len(l), replace=False)
+#     else:
+#         raise Exception('  model should be oversampling or downsampling')
+#
+#     # convert dict to list
+#     if printable:
+#         print('  convert dict to list')
+#     y_train = []
+#     # X_train = np.empty(shape=(0,len(X[0][0])))
+#     # X_train = np.empty(shape=(0,len(X[1][0])))    # 2D
+#     X_train = np.empty(shape=(0,1,n_features,1))    # 4D
+#     l_key = list(X.keys())  # shuffle
+#     random.shuffle(l_key)   # shuffle
+#     # for key in X:     # no shuffle
+#     for key in l_key:   # shuffle
+#         X_train = np.vstack( (X_train, X[key] ) )
+#         # print(len(X[key]))
+#         y_train.extend([key for i in range(int(len(X[key])/sequence_length))])
+#     # print(X_train,y_train, type(X_train), type(y_train))
+#     # ================ End of Classes balancing for sequence
+#     # print(X_train.shape, len(y_train))
+#     return X_train, np.asarray(y_train)
--- a/tensorlayer/visualize.py
+++ b/tensorlayer/visualize.py
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
+import numpy as np
+import os
+
+
+## Save images
+import scipy.misc
+
+def save_image(image, image_path):
+    """Save one image.
+
+    Parameters
+    -----------
+    images : numpy array [w, h, c]
+    image_path : string.
+    """
+    scipy.misc.imsave(image_path, image)
+
+def save_images(images, size, image_path):
+    """Save mutiple images into one single image.
+
+    Parameters
+    -----------
+    images : numpy array [batch, w, h, c]
+    size : list of two int, row and column number.
+        number of images should be equal or less than size[0] * size[1]
+    image_path : string.
+
+    Examples
+    ---------
+    >>> images = np.random.rand(64, 100, 100, 3)
+    >>> tl.visualize.save_images(images, [8, 8], 'temp.png')
+    """
+    def merge(images, size):
+        h, w = images.shape[1], images.shape[2]
+        img = np.zeros((h * size[0], w * size[1], 3))
+        for idx, image in enumerate(images):
+            i = idx % size[1]
+            j = idx // size[1]
+            img[j*h:j*h+h, i*w:i*w+w, :] = image
+        return img
+
+    def imsave(images, size, path):
+        return scipy.misc.imsave(path, merge(images, size))
+
+    assert len(images) <= size[0] * size[1], "number of images should be equal or less than size[0] * size[1] {}".format(len(images))
+    return imsave(images, size, image_path)
+
+def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=2396512):
+    """Visualize every columns of the weight matrix to a group of Greyscale img.
+
+    Parameters
+    ----------
+    W : numpy.array
+        The weight matrix
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolean
+        Save or plot the figure.
+    shape : a list with 2 int
+        The shape of feature image, MNIST is [28, 80].
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> tl.visualize.W(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012)
+    """
+    if saveable is False:
+        plt.ion()
+    fig = plt.figure(fig_idx)      # show all feature images
+    size = W.shape[0]
+    n_units = W.shape[1]
+
+    num_r = int(np.sqrt(n_units))  # 每行显示的个数   若25个hidden unit -> 每行显示5个
+    num_c = int(np.ceil(n_units/num_r))
+    count = int(1)
+    for row in range(1, num_r+1):
+        for col in range(1, num_c+1):
+            if count > n_units:
+                break
+            a = fig.add_subplot(num_r, num_c, count)
+            # ------------------------------------------------------------
+            # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray')
+            # ------------------------------------------------------------
+            feature = W[:,count-1] / np.sqrt( (W[:,count-1]**2).sum())
+            # feature[feature<0.0001] = 0   # value threshold
+            # if count == 1 or count == 2:
+            #     print(np.mean(feature))
+            # if np.std(feature) < 0.03:      # condition threshold
+            #     feature = np.zeros_like(feature)
+            # if np.mean(feature) < -0.015:      # condition threshold
+            #     feature = np.zeros_like(feature)
+            plt.imshow(np.reshape(feature ,(shape[0],shape[1])),
+                    cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature))
+            # plt.title(name)
+            # ------------------------------------------------------------
+            # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest")
+            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+            plt.gca().yaxis.set_major_locator(plt.NullLocator())
+            count = count + 1
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836):
+    """Display a frame(image). Make sure OpenAI Gym render() is disable before using it.
+
+    Parameters
+    ----------
+    I : numpy.array
+        The image
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolean
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    cmap : None or string
+        'gray' for greyscale, None for default, etc.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> env = gym.make("Pong-v0")
+    >>> observation = env.reset()
+    >>> tl.visualize.frame(observation)
+    """
+    if saveable is False:
+        plt.ion()
+    fig = plt.figure(fig_idx)      # show all feature images
+
+    if len(I.shape) and I.shape[-1]==1:     # (10,10,1) --> (10,10)
+        I = I[:,:,0]
+
+    plt.imshow(I, cmap)
+    plt.title(name)
+    # plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+    # plt.gca().yaxis.set_major_locator(plt.NullLocator())
+
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362):
+    """Display a group of RGB or Greyscale CNN masks.
+
+    Parameters
+    ----------
+    CNN : numpy.array
+        The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64).
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolean
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012)
+    """
+    # print(CNN.shape)    # (5, 5, 3, 64)
+    # exit()
+    n_mask = CNN.shape[3]
+    n_row = CNN.shape[0]
+    n_col = CNN.shape[1]
+    n_color = CNN.shape[2]
+    row = int(np.sqrt(n_mask))
+    col = int(np.ceil(n_mask/row))
+    plt.ion()   # active mode
+    fig = plt.figure(fig_idx)
+    count = 1
+    for ir in range(1, row+1):
+        for ic in range(1, col+1):
+            if count > n_mask:
+                break
+            a = fig.add_subplot(col, row, count)
+            # print(CNN[:,:,:,count-1].shape, n_row, n_col)   # (5, 1, 32) 5 5
+            # exit()
+            # plt.imshow(
+            #         np.reshape(CNN[count-1,:,:,:], (n_row, n_col)),
+            #         cmap='gray', interpolation="nearest")     # theano
+            if n_color == 1:
+                plt.imshow(
+                        np.reshape(CNN[:,:,:,count-1], (n_row, n_col)),
+                        cmap='gray', interpolation="nearest")
+            elif n_color == 3:
+                plt.imshow(
+                        np.reshape(CNN[:,:,:,count-1], (n_row, n_col, n_color)),
+                        cmap='gray', interpolation="nearest")
+            else:
+                raise Exception("Unknown n_color")
+            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+            plt.gca().yaxis.set_major_locator(plt.NullLocator())
+            count = count + 1
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+
+def images2d(images=None, second=10, saveable=True, name='images', dtype=None,
+                                                            fig_idx=3119362):
+    """Display a group of RGB or Greyscale images.
+
+    Parameters
+    ----------
+    images : numpy.array
+        The images.
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolean
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    dtype : None or numpy data type
+        The data type for displaying the images.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
+    >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
+    """
+    # print(images.shape)    # (50000, 32, 32, 3)
+    # exit()
+    if dtype:
+        images = np.asarray(images, dtype=dtype)
+    n_mask = images.shape[0]
+    n_row = images.shape[1]
+    n_col = images.shape[2]
+    n_color = images.shape[3]
+    row = int(np.sqrt(n_mask))
+    col = int(np.ceil(n_mask/row))
+    plt.ion()   # active mode
+    fig = plt.figure(fig_idx)
+    count = 1
+    for ir in range(1, row+1):
+        for ic in range(1, col+1):
+            if count > n_mask:
+                break
+            a = fig.add_subplot(col, row, count)
+            # print(images[:,:,:,count-1].shape, n_row, n_col)   # (5, 1, 32) 5 5
+            # plt.imshow(
+            #         np.reshape(images[count-1,:,:,:], (n_row, n_col)),
+            #         cmap='gray', interpolation="nearest")     # theano
+            if n_color == 1:
+                plt.imshow(
+                        np.reshape(images[count-1,:,:], (n_row, n_col)),
+                        cmap='gray', interpolation="nearest")
+                # plt.title(name)
+            elif n_color == 3:
+                plt.imshow(images[count-1,:,:],
+                        cmap='gray', interpolation="nearest")
+                # plt.title(name)
+            else:
+                raise Exception("Unknown n_color")
+            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+            plt.gca().yaxis.set_major_locator(plt.NullLocator())
+            count = count + 1
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+def tsne_embedding(embeddings, reverse_dictionary, plot_only=500,
+                        second=5, saveable=False, name='tsne', fig_idx=9862):
+    """Visualize the embeddings by using t-SNE.
+
+    Parameters
+    ----------
+    embeddings : a matrix
+        The images.
+    reverse_dictionary : a dictionary
+        id_to_word, mapping id to unique word.
+    plot_only : int
+        The number of examples to plot, choice the most common words.
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolean
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> see 'tutorial_word2vec_basic.py'
+    >>> final_embeddings = normalized_embeddings.eval()
+    >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary,
+    ...                   plot_only=500, second=5, saveable=False, name='tsne')
+    """
+    def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5,
+                                    saveable=True, name='tsne', fig_idx=9862):
+        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
+        if saveable is False:
+            plt.ion()
+            plt.figure(fig_idx)
+        plt.figure(figsize=figsize)  #in inches
+        for i, label in enumerate(labels):
+            x, y = low_dim_embs[i,:]
+            plt.scatter(x, y)
+            plt.annotate(label,
+                     xy=(x, y),
+                     xytext=(5, 2),
+                     textcoords='offset points',
+                     ha='right',
+                     va='bottom')
+        if saveable:
+            plt.savefig(name+'.pdf',format='pdf')
+        else:
+            plt.draw()
+            plt.pause(second)
+
+    try:
+        from sklearn.manifold import TSNE
+        import matplotlib.pyplot as plt
+        from six.moves import xrange
+
+        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
+        # plot_only = 500
+        low_dim_embs = tsne.fit_transform(embeddings[:plot_only,:])
+        labels = [reverse_dictionary[i] for i in xrange(plot_only)]
+        plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \
+                                                    name=name, fig_idx=fig_idx)
+    except ImportError:
+        print("Please install sklearn and matplotlib to visualize embeddings.")
+
+
+#
--- a/utils.py
+++ b/utils.py
+import scipy
+import numpy as np
+
+def get_imgs_fn(file_name):
+	return scipy.misc.imread(file_name, mode='RGB')
+
+def augment_imgs_fn(x, add_noise=True):
+	return x+0.1*x.std()*np.random.random(x.shape)
+
+def normalize_imgs_fn(x):
+    x = x * (2./ 255.) - 1.
+    # x = x * (1./255.)
+    return x
+
+def truncate_imgs_fn(x):
+	x = np.where(x > -1., x, -1.)
+	x = np.where(x < 1., x, 1.)
+	return x
\ No newline at end of file