Commit cb8dde1c authored by hepj's avatar hepj
Browse files

增加transformer-xl模型代码

parent a22e7ca7
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1536 -n 11532 -k 512 --alpha 1 --lda 1536 --ldb 512 --beta 0 --ldc 1536 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1536 -n 12288 -k 512 --alpha 1 --lda 1536 --ldb 512 --beta 0 --ldc 1536 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 2048 -n 5388 -k 512 --alpha 1 --lda 2048 --ldb 512 --beta 0 --ldc 2048 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 2048 -n 6144 -k 512 --alpha 1 --lda 2048 --ldb 512 --beta 0 --ldc 2048 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 1024 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 5388 -k 2048 --alpha 1 --lda 512 --ldb 2048 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 5388 -k 204 --alpha 1 --lda 512 --ldb 204 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 5388 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 6144 -k 2048 --alpha 1 --lda 512 --ldb 2048 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 6144 -k 204 --alpha 1 --lda 512 --ldb 204 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 6144 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 961 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1536 -n 512 -k 11532 --alpha 1 --lda 1536 --ldb 512 --beta 0 --ldc 1536 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1536 -n 512 -k 12288 --alpha 1 --lda 1536 --ldb 512 --beta 0 --ldc 1536 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 2048 -n 512 -k 5388 --alpha 1 --lda 2048 --ldb 512 --beta 0 --ldc 2048 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 2048 -n 512 -k 6144 --alpha 1 --lda 2048 --ldb 512 --beta 0 --ldc 2048 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 204 -n 512 -k 5388 --alpha 1 --lda 204 --ldb 512 --beta 0 --ldc 204 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 204 -n 512 -k 6144 --alpha 1 --lda 204 --ldb 512 --beta 0 --ldc 204 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 2048 -k 5388 --alpha 1 --lda 512 --ldb 2048 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 2048 -k 6144 --alpha 1 --lda 512 --ldb 2048 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 512 -k 1024 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 512 -k 5388 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 512 -k 6144 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 512 -k 961 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 2048 -n 5388 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 2048 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 2048 -n 6144 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 2048 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 204 -n 5388 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 204 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 204 -n 6144 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 204 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 11532 -k 1536 --alpha 1 --lda 1536 --ldb 1536 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 12288 -k 1536 --alpha 1 --lda 1536 --ldb 1536 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 5388 -k 2048 --alpha 1 --lda 2048 --ldb 2048 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 5388 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 6144 -k 2048 --alpha 1 --lda 2048 --ldb 2048 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 6144 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 0 --ldc 512 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB N -m 1024 -n 512 -k 64 --alpha 1 --lda 1024 --stride_a 65536 --ldb 64 --stride_b 32768 --beta 0 --ldc 1024 --stride_c 524288 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB N -m 1024 -n 6144 -k 64 --alpha 1 --lda 1024 --stride_a 65536 --ldb 64 --stride_b 393216 --beta 0 --ldc 1024 --stride_c 6291456 --batch_count 8 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB N -m 64 -n 449 -k 961 --alpha 1 --lda 64 --stride_a 61504 --ldb 961 --stride_b 431489 --beta 0 --ldc 64 --stride_c 28736 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB N -m 64 -n 512 -k 1024 --alpha 1 --lda 64 --stride_a 65536 --ldb 1024 --stride_b 524288 --beta 0 --ldc 64 --stride_c 32768 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB N -m 961 -n 449 -k 64 --alpha 1 --lda 961 --stride_a 61504 --ldb 64 --stride_b 28736 --beta 0 --ldc 961 --stride_c 431489 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB N -m 961 -n 5388 -k 64 --alpha 1 --lda 961 --stride_a 61504 --ldb 64 --stride_b 344832 --beta 0 --ldc 961 --stride_c 5177868 --batch_count 8 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB T -m 1024 -n 64 -k 512 --alpha 1 --lda 1024 --stride_a 524288 --ldb 64 --stride_b 32768 --beta 0 --ldc 1024 --stride_c 65536 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB T -m 1024 -n 64 -k 6144 --alpha 1 --lda 1024 --stride_a 6291456 --ldb 64 --stride_b 393216 --beta 0 --ldc 1024 --stride_c 65536 --batch_count 8 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB T -m 64 -n 1024 -k 512 --alpha 1 --lda 64 --stride_a 32768 --ldb 1024 --stride_b 524288 --beta 0 --ldc 64 --stride_c 65536 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB T -m 64 -n 961 -k 449 --alpha 1 --lda 64 --stride_a 28736 --ldb 961 --stride_b 431489 --beta 0 --ldc 64 --stride_c 61504 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB T -m 961 -n 64 -k 449 --alpha 1 --lda 961 --stride_a 431489 --ldb 64 --stride_b 28736 --beta 0 --ldc 961 --stride_c 61504 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA N --transposeB T -m 961 -n 64 -k 5388 --alpha 1 --lda 961 --stride_a 5177868 --ldb 64 --stride_b 344832 --beta 0 --ldc 961 --stride_c 61504 --batch_count 8 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 1024 -n 512 -k 64 --alpha 1 --lda 64 --stride_a 65536 --ldb 64 --stride_b 32768 --beta 0 --ldc 1024 --stride_c 524288 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 64 -n 449 -k 961 --alpha 1 --lda 961 --stride_a 61504 --ldb 961 --stride_b 431489 --beta 0 --ldc 64 --stride_c 28736 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 64 -n 512 -k 1024 --alpha 1 --lda 1024 --stride_a 65536 --ldb 1024 --stride_b 524288 --beta 0 --ldc 64 --stride_c 32768 --batch_count 96 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 64 -n 5388 -k 961 --alpha 1 --lda 961 --stride_a 61504 --ldb 961 --stride_b 5177868 --beta 0 --ldc 64 --stride_c 344832 --batch_count 8 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 64 -n 6144 -k 1024 --alpha 1 --lda 1024 --stride_a 65536 --ldb 1024 --stride_b 6291456 --beta 0 --ldc 64 --stride_c 393216 --batch_count 8 --atomics_not_allowed
./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 961 -n 449 -k 64 --alpha 1 --lda 64 --stride_a 61504 --ldb 64 --stride_b 28736 --beta 0 --ldc 961 --stride_c 431489 --batch_count 96 --atomics_not_allowed
This source diff could not be displayed because it is too large. You can view the blob instead.
nohup: ignoring input
Run training...
WARNING:tensorflow:From train_gpu.py:475: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead.
WARNING:tensorflow:From train_gpu.py:460: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
W0411 15:09:43.619874 46979636578560 module_wrapper.py:139] From train_gpu.py:460: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
WARNING:tensorflow:From train_gpu.py:460: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
W0411 15:09:43.620179 46979636578560 module_wrapper.py:139] From train_gpu.py:460: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
WARNING:tensorflow:From train_gpu.py:466: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
W0411 15:09:43.620661 46979636578560 module_wrapper.py:139] From train_gpu.py:466: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
INFO:tensorflow:n_token 204
I0411 15:09:43.620860 46979636578560 train_gpu.py:466] n_token 204
INFO:tensorflow:[train] File names ['train.bsz-24.tlen-512.tfrecords']
I0411 15:09:43.621257 46979636578560 data_utils.py:430] [train] File names ['train.bsz-24.tlen-512.tfrecords']
INFO:tensorflow:num of batches 7242
I0411 15:09:43.621433 46979636578560 train_gpu.py:234] num of batches 7242
WARNING:tensorflow:From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.sparse_tensor_to_dense is deprecated. Please use tf.sparse.to_dense instead.
W0411 15:09:44.940819 46979636578560 module_wrapper.py:139] From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.sparse_tensor_to_dense is deprecated. Please use tf.sparse.to_dense instead.
WARNING:tensorflow:From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.FixedLenFeature is deprecated. Please use tf.io.FixedLenFeature instead.
W0411 15:09:44.942387 46979636578560 module_wrapper.py:139] From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.FixedLenFeature is deprecated. Please use tf.io.FixedLenFeature instead.
WARNING:tensorflow:From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.VarLenFeature is deprecated. Please use tf.io.VarLenFeature instead.
W0411 15:09:44.943188 46979636578560 module_wrapper.py:139] From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.VarLenFeature is deprecated. Please use tf.io.VarLenFeature instead.
WARNING:tensorflow:From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.parse_single_example is deprecated. Please use tf.io.parse_single_example instead.
W0411 15:09:44.946408 46979636578560 module_wrapper.py:139] From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.parse_single_example is deprecated. Please use tf.io.parse_single_example instead.
WARNING:tensorflow:From /public/home/hepj/SothisAI/transformer-xl-master/tf/data_utils.py:502: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
W0411 15:09:45.984954 46979636578560 deprecation.py:323] From /public/home/hepj/SothisAI/transformer-xl-master/tf/data_utils.py:502: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
WARNING:tensorflow:From train_gpu.py:241: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
W0411 15:09:46.000672 46979636578560 deprecation.py:323] From train_gpu.py:241: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
WARNING:tensorflow:From train_gpu.py:253: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
W0411 15:09:46.023218 46979636578560 module_wrapper.py:139] From train_gpu.py:253: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
WARNING:tensorflow:From train_gpu.py:253: The name tf.get_variable_scope is deprecated. Please use tf.compat.v1.get_variable_scope instead.
W0411 15:09:46.023566 46979636578560 module_wrapper.py:139] From train_gpu.py:253: The name tf.get_variable_scope is deprecated. Please use tf.compat.v1.get_variable_scope instead.
WARNING:tensorflow:From train_gpu.py:257: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
W0411 15:09:46.023932 46979636578560 module_wrapper.py:139] From train_gpu.py:257: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
WARNING:tensorflow:From /public/home/hepj/SothisAI/transformer-xl-master/tf/gpu_utils.py:6: The name tf.NodeDef is deprecated. Please use tf.compat.v1.NodeDef instead.
W0411 15:09:46.024810 46979636578560 module_wrapper.py:139] From /public/home/hepj/SothisAI/transformer-xl-master/tf/gpu_utils.py:6: The name tf.NodeDef is deprecated. Please use tf.compat.v1.NodeDef instead.
WARNING:tensorflow:From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:460: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
W0411 15:09:46.036548 46979636578560 module_wrapper.py:139] From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:460: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
WARNING:tensorflow:From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:416: The name tf.matrix_band_part is deprecated. Please use tf.linalg.band_part instead.
W0411 15:09:46.078661 46979636578560 module_wrapper.py:139] From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:416: The name tf.matrix_band_part is deprecated. Please use tf.linalg.band_part instead.
WARNING:tensorflow:From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:493: dropout (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dropout instead.
W0411 15:09:46.113715 46979636578560 deprecation.py:323] From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:493: dropout (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dropout instead.
WARNING:tensorflow:From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/layers/core.py:271: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
W0411 15:09:46.114592 46979636578560 deprecation.py:323] From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/layers/core.py:271: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
WARNING:tensorflow:From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:54: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
W0411 15:09:46.141592 46979636578560 deprecation.py:323] From /public/home/hepj/SothisAI/transformer-xl-master/tf/model.py:54: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
WARNING:tensorflow:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.
W0411 15:09:46.438683 46979636578560 lazy_loader.py:50]
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.
WARNING:tensorflow:From train_gpu.py:189: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.
W0411 15:09:51.204890 46979636578560 module_wrapper.py:139] From train_gpu.py:189: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.
INFO:tensorflow:#params: 41055436
I0411 15:09:51.216222 46979636578560 train_gpu.py:190] #params: 41055436
INFO:tensorflow:#params: 41055436
I0411 15:10:00.890560 46979636578560 train_gpu.py:190] #params: 41055436
INFO:tensorflow:#params: 41055436
I0411 15:10:10.730956 46979636578560 train_gpu.py:190] #params: 41055436
INFO:tensorflow:#params: 41055436
I0411 15:10:20.506927 46979636578560 train_gpu.py:190] #params: 41055436
WARNING:tensorflow:From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/ops/clip_ops.py:301: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0411 15:10:26.637341 46979636578560 deprecation.py:323] From /public/home/hepj/job_env/venv_1/lib/python3.6/site-packages/tensorflow_core/python/ops/clip_ops.py:301: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:From train_gpu.py:286: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.
W0411 15:10:26.832451 46979636578560 module_wrapper.py:139] From train_gpu.py:286: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.
WARNING:tensorflow:From train_gpu.py:296: The name tf.train.cosine_decay is deprecated. Please use tf.compat.v1.train.cosine_decay instead.
W0411 15:10:26.837068 46979636578560 module_wrapper.py:139] From train_gpu.py:296: The name tf.train.cosine_decay is deprecated. Please use tf.compat.v1.train.cosine_decay instead.
WARNING:tensorflow:From train_gpu.py:307: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.
W0411 15:10:26.851632 46979636578560 module_wrapper.py:139] From train_gpu.py:307: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.
WARNING:tensorflow:From train_gpu.py:317: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.
W0411 15:10:28.594886 46979636578560 module_wrapper.py:139] From train_gpu.py:317: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.
WARNING:tensorflow:From train_gpu.py:319: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
W0411 15:10:29.020682 46979636578560 module_wrapper.py:139] From train_gpu.py:319: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
WARNING:tensorflow:From train_gpu.py:319: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.
W0411 15:10:29.021113 46979636578560 module_wrapper.py:139] From train_gpu.py:319: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.
2022-04-11 15:10:29.035479: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 1999885000 Hz
2022-04-11 15:10:29.037594: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x13b302a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-04-11 15:10:29.037722: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2022-04-11 15:10:29.041016: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libamdhip64.so
2022-04-11 15:10:33.083881: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1d6c54d0 initialized for platform ROCM (this does not guarantee that XLA will be used). Devices:
2022-04-11 15:10:33.084019: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Device 66a1, AMDGPU ISA version: gfx906
2022-04-11 15:10:33.084060: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (1): Device 66a1, AMDGPU ISA version: gfx906
2022-04-11 15:10:33.084096: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (2): Device 66a1, AMDGPU ISA version: gfx906
2022-04-11 15:10:33.084142: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (3): Device 66a1, AMDGPU ISA version: gfx906
2022-04-11 15:10:33.092406: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 0 with properties:
name: Device 66a1
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.7
pciBusID 0000:04:00.0
2022-04-11 15:10:33.092565: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 1 with properties:
name: Device 66a1
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.7
pciBusID 0000:26:00.0
2022-04-11 15:10:33.092653: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 2 with properties:
name: Device 66a1
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.7
pciBusID 0000:43:00.0
2022-04-11 15:10:33.092738: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 3 with properties:
name: Device 66a1
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.7
pciBusID 0000:63:00.0
2022-04-11 15:10:35.914914: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
2022-04-11 15:10:35.925268: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
2022-04-11 15:11:04.234699: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
2022-04-11 15:11:04.353950: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
2022-04-11 15:11:04.354631: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0, 1, 2, 3
2022-04-11 15:11:04.354846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-04-11 15:11:04.354919: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186] 0 1 2 3
2022-04-11 15:11:04.355005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0: N Y Y Y
2022-04-11 15:11:04.355062: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 1: Y N Y Y
2022-04-11 15:11:04.355110: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 2: Y Y N Y
2022-04-11 15:11:04.355160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 3: Y Y Y N
2022-04-11 15:11:04.355927: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 14923 MB memory) -> physical GPU (device: 0, name: Device 66a1, pci bus id: 0000:04:00.0)
2022-04-11 15:11:04.358874: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 14922 MB memory) -> physical GPU (device: 1, name: Device 66a1, pci bus id: 0000:26:00.0)
2022-04-11 15:11:04.364894: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 14923 MB memory) -> physical GPU (device: 2, name: Device 66a1, pci bus id: 0000:43:00.0)
2022-04-11 15:11:04.374173: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 14923 MB memory) -> physical GPU (device: 3, name: Device 66a1, pci bus id: 0000:63:00.0)
WARNING:tensorflow:From train_gpu.py:320: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.
W0411 15:11:04.391065 46979636578560 module_wrapper.py:139] From train_gpu.py:320: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.
2022-04-11 15:11:07.726957: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
2022-04-11 15:11:32.594987: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
2022-04-11 15:11:32.984313: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
2022-04-11 15:11:34.189588: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
2022-04-11 15:11:34.193113: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
INFO:tensorflow:[200] | gnorm 0.56 lr 0.000250 | loss 2.55 | pplx 12.86, bpc 3.6843
I0411 15:13:59.103161 46979636578560 train_gpu.py:345] [200] | gnorm 0.56 lr 0.000250 | loss 2.55 | pplx 12.86, bpc 3.6843
INFO:tensorflow:[400] | gnorm 0.46 lr 0.000249 | loss 1.74 | pplx 5.67, bpc 2.5034
I0411 15:16:13.616544 46979636578560 train_gpu.py:345] [400] | gnorm 0.46 lr 0.000249 | loss 1.74 | pplx 5.67, bpc 2.5034
INFO:tensorflow:[600] | gnorm 0.46 lr 0.000248 | loss 1.58 | pplx 4.84, bpc 2.2762
I0411 15:18:28.268989 46979636578560 train_gpu.py:345] [600] | gnorm 0.46 lr 0.000248 | loss 1.58 | pplx 4.84, bpc 2.2762
INFO:tensorflow:[800] | gnorm 0.44 lr 0.000246 | loss 1.41 | pplx 4.11, bpc 2.0408
I0411 15:20:42.953228 46979636578560 train_gpu.py:345] [800] | gnorm 0.44 lr 0.000246 | loss 1.41 | pplx 4.11, bpc 2.0408
INFO:tensorflow:[1000] | gnorm 0.42 lr 0.000244 | loss 1.31 | pplx 3.71, bpc 1.8905
I0411 15:22:57.681006 46979636578560 train_gpu.py:345] [1000] | gnorm 0.42 lr 0.000244 | loss 1.31 | pplx 3.71, bpc 1.8905
INFO:tensorflow:[1200] | gnorm 0.41 lr 0.000241 | loss 1.24 | pplx 3.47, bpc 1.7957
I0411 15:25:12.388444 46979636578560 train_gpu.py:345] [1200] | gnorm 0.41 lr 0.000241 | loss 1.24 | pplx 3.47, bpc 1.7957
INFO:tensorflow:[1400] | gnorm 0.41 lr 0.000238 | loss 1.20 | pplx 3.32, bpc 1.7314
I0411 15:27:27.104644 46979636578560 train_gpu.py:345] [1400] | gnorm 0.41 lr 0.000238 | loss 1.20 | pplx 3.32, bpc 1.7314
INFO:tensorflow:[1600] | gnorm 0.38 lr 0.000235 | loss 1.18 | pplx 3.27, bpc 1.7074
I0411 15:29:42.007892 46979636578560 train_gpu.py:345] [1600] | gnorm 0.38 lr 0.000235 | loss 1.18 | pplx 3.27, bpc 1.7074
INFO:tensorflow:[1800] | gnorm 0.37 lr 0.000231 | loss 1.15 | pplx 3.17, bpc 1.6638
I0411 15:31:56.785848 46979636578560 train_gpu.py:345] [1800] | gnorm 0.37 lr 0.000231 | loss 1.15 | pplx 3.17, bpc 1.6638
INFO:tensorflow:[2000] | gnorm 0.35 lr 0.000226 | loss 1.17 | pplx 3.22, bpc 1.6877
I0411 15:34:11.768712 46979636578560 train_gpu.py:345] [2000] | gnorm 0.35 lr 0.000226 | loss 1.17 | pplx 3.22, bpc 1.6877
INFO:tensorflow:[2200] | gnorm 0.37 lr 0.000221 | loss 1.13 | pplx 3.11, bpc 1.6366
I0411 15:36:26.584105 46979636578560 train_gpu.py:345] [2200] | gnorm 0.37 lr 0.000221 | loss 1.13 | pplx 3.11, bpc 1.6366
INFO:tensorflow:[2400] | gnorm 0.35 lr 0.000216 | loss 1.12 | pplx 3.06, bpc 1.6146
I0411 15:38:41.714276 46979636578560 train_gpu.py:345] [2400] | gnorm 0.35 lr 0.000216 | loss 1.12 | pplx 3.06, bpc 1.6146
INFO:tensorflow:[2600] | gnorm 0.39 lr 0.000211 | loss 1.12 | pplx 3.07, bpc 1.6197
I0411 15:40:56.903222 46979636578560 train_gpu.py:345] [2600] | gnorm 0.39 lr 0.000211 | loss 1.12 | pplx 3.07, bpc 1.6197
INFO:tensorflow:[2800] | gnorm 0.38 lr 0.000205 | loss 1.10 | pplx 2.99, bpc 1.5815
I0411 15:43:12.326100 46979636578560 train_gpu.py:345] [2800] | gnorm 0.38 lr 0.000205 | loss 1.10 | pplx 2.99, bpc 1.5815
INFO:tensorflow:[3000] | gnorm 0.35 lr 0.000199 | loss 1.09 | pplx 2.97, bpc 1.5693
I0411 15:45:27.849482 46979636578560 train_gpu.py:345] [3000] | gnorm 0.35 lr 0.000199 | loss 1.09 | pplx 2.97, bpc 1.5693
INFO:tensorflow:[3200] | gnorm 0.34 lr 0.000192 | loss 1.08 | pplx 2.96, bpc 1.5641
I0411 15:47:43.269138 46979636578560 train_gpu.py:345] [3200] | gnorm 0.34 lr 0.000192 | loss 1.08 | pplx 2.96, bpc 1.5641
INFO:tensorflow:[3400] | gnorm 0.36 lr 0.000185 | loss 1.06 | pplx 2.90, bpc 1.5352
I0411 15:49:58.617992 46979636578560 train_gpu.py:345] [3400] | gnorm 0.36 lr 0.000185 | loss 1.06 | pplx 2.90, bpc 1.5352
INFO:tensorflow:[3600] | gnorm 0.35 lr 0.000179 | loss 1.07 | pplx 2.90, bpc 1.5370
I0411 15:52:14.135429 46979636578560 train_gpu.py:345] [3600] | gnorm 0.35 lr 0.000179 | loss 1.07 | pplx 2.90, bpc 1.5370
INFO:tensorflow:[3800] | gnorm 0.33 lr 0.000171 | loss 1.05 | pplx 2.86, bpc 1.5170
I0411 15:54:29.591644 46979636578560 train_gpu.py:345] [3800] | gnorm 0.33 lr 0.000171 | loss 1.05 | pplx 2.86, bpc 1.5170
INFO:tensorflow:[4000] | gnorm 0.33 lr 0.000164 | loss 1.06 | pplx 2.89, bpc 1.5298
I0411 15:56:44.983796 46979636578560 train_gpu.py:345] [4000] | gnorm 0.33 lr 0.000164 | loss 1.06 | pplx 2.89, bpc 1.5298
2022-04-11 15:56:46.113523: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
INFO:tensorflow:Model saved in path: EXP-enwik8/model.ckpt
I0411 15:56:51.852108 46979636578560 train_gpu.py:351] Model saved in path: EXP-enwik8/model.ckpt
INFO:tensorflow:[4200] | gnorm 0.33 lr 0.000156 | loss 1.06 | pplx 2.88, bpc 1.5241
I0411 15:59:07.272919 46979636578560 train_gpu.py:345] [4200] | gnorm 0.33 lr 0.000156 | loss 1.06 | pplx 2.88, bpc 1.5241
INFO:tensorflow:[4400] | gnorm 0.33 lr 0.000149 | loss 1.03 | pplx 2.81, bpc 1.4889
I0411 16:01:22.739627 46979636578560 train_gpu.py:345] [4400] | gnorm 0.33 lr 0.000149 | loss 1.03 | pplx 2.81, bpc 1.4889
INFO:tensorflow:[4600] | gnorm 0.34 lr 0.000141 | loss 1.05 | pplx 2.87, bpc 1.5207
I0411 16:03:38.228260 46979636578560 train_gpu.py:345] [4600] | gnorm 0.34 lr 0.000141 | loss 1.05 | pplx 2.87, bpc 1.5207
INFO:tensorflow:[4800] | gnorm 0.32 lr 0.000133 | loss 1.02 | pplx 2.78, bpc 1.4776
I0411 16:05:53.647228 46979636578560 train_gpu.py:345] [4800] | gnorm 0.32 lr 0.000133 | loss 1.02 | pplx 2.78, bpc 1.4776
INFO:tensorflow:[5000] | gnorm 0.32 lr 0.000126 | loss 1.02 | pplx 2.78, bpc 1.4747
I0411 16:08:09.163792 46979636578560 train_gpu.py:345] [5000] | gnorm 0.32 lr 0.000126 | loss 1.02 | pplx 2.78, bpc 1.4747
INFO:tensorflow:[5200] | gnorm 0.32 lr 0.000118 | loss 1.04 | pplx 2.83, bpc 1.4988
I0411 16:10:24.546232 46979636578560 train_gpu.py:345] [5200] | gnorm 0.32 lr 0.000118 | loss 1.04 | pplx 2.83, bpc 1.4988
INFO:tensorflow:[5400] | gnorm 0.34 lr 0.000110 | loss 1.04 | pplx 2.82, bpc 1.4976
I0411 16:12:39.994851 46979636578560 train_gpu.py:345] [5400] | gnorm 0.34 lr 0.000110 | loss 1.04 | pplx 2.82, bpc 1.4976
INFO:tensorflow:[5600] | gnorm 0.33 lr 0.000102 | loss 1.03 | pplx 2.79, bpc 1.4825
I0411 16:14:55.336359 46979636578560 train_gpu.py:345] [5600] | gnorm 0.33 lr 0.000102 | loss 1.03 | pplx 2.79, bpc 1.4825
INFO:tensorflow:[5800] | gnorm 0.34 lr 0.000095 | loss 1.00 | pplx 2.71, bpc 1.4406
I0411 16:17:10.817455 46979636578560 train_gpu.py:345] [5800] | gnorm 0.34 lr 0.000095 | loss 1.00 | pplx 2.71, bpc 1.4406
INFO:tensorflow:[6000] | gnorm 0.34 lr 0.000087 | loss 1.00 | pplx 2.72, bpc 1.4411
I0411 16:19:26.204326 46979636578560 train_gpu.py:345] [6000] | gnorm 0.34 lr 0.000087 | loss 1.00 | pplx 2.72, bpc 1.4411
INFO:tensorflow:[6200] | gnorm 0.33 lr 0.000080 | loss 1.02 | pplx 2.77, bpc 1.4704
I0411 16:21:41.674479 46979636578560 train_gpu.py:345] [6200] | gnorm 0.33 lr 0.000080 | loss 1.02 | pplx 2.77, bpc 1.4704
INFO:tensorflow:[6400] | gnorm 0.32 lr 0.000072 | loss 1.01 | pplx 2.74, bpc 1.4556
I0411 16:23:57.228107 46979636578560 train_gpu.py:345] [6400] | gnorm 0.32 lr 0.000072 | loss 1.01 | pplx 2.74, bpc 1.4556
INFO:tensorflow:[6600] | gnorm 0.35 lr 0.000066 | loss 1.03 | pplx 2.80, bpc 1.4847
I0411 16:26:12.733853 46979636578560 train_gpu.py:345] [6600] | gnorm 0.35 lr 0.000066 | loss 1.03 | pplx 2.80, bpc 1.4847
INFO:tensorflow:[6800] | gnorm 0.32 lr 0.000059 | loss 1.02 | pplx 2.78, bpc 1.4754
I0411 16:28:28.142014 46979636578560 train_gpu.py:345] [6800] | gnorm 0.32 lr 0.000059 | loss 1.02 | pplx 2.78, bpc 1.4754
INFO:tensorflow:[7000] | gnorm 0.35 lr 0.000052 | loss 0.99 | pplx 2.70, bpc 1.4311
I0411 16:30:43.719517 46979636578560 train_gpu.py:345] [7000] | gnorm 0.35 lr 0.000052 | loss 0.99 | pplx 2.70, bpc 1.4311
INFO:tensorflow:[7200] | gnorm 0.33 lr 0.000046 | loss 0.97 | pplx 2.64, bpc 1.4001
I0411 16:32:59.260841 46979636578560 train_gpu.py:345] [7200] | gnorm 0.33 lr 0.000046 | loss 0.97 | pplx 2.64, bpc 1.4001
INFO:tensorflow:[7400] | gnorm 0.34 lr 0.000040 | loss 1.01 | pplx 2.73, bpc 1.4512
I0411 16:35:14.672811 46979636578560 train_gpu.py:345] [7400] | gnorm 0.34 lr 0.000040 | loss 1.01 | pplx 2.73, bpc 1.4512
INFO:tensorflow:[7600] | gnorm 0.33 lr 0.000035 | loss 0.96 | pplx 2.61, bpc 1.3820
I0411 16:37:30.178255 46979636578560 train_gpu.py:345] [7600] | gnorm 0.33 lr 0.000035 | loss 0.96 | pplx 2.61, bpc 1.3820
INFO:tensorflow:[7800] | gnorm 0.33 lr 0.000030 | loss 0.98 | pplx 2.68, bpc 1.4198
I0411 16:39:45.574867 46979636578560 train_gpu.py:345] [7800] | gnorm 0.33 lr 0.000030 | loss 0.98 | pplx 2.68, bpc 1.4198
INFO:tensorflow:[8000] | gnorm 0.31 lr 0.000025 | loss 0.96 | pplx 2.61, bpc 1.3850
I0411 16:42:00.981717 46979636578560 train_gpu.py:345] [8000] | gnorm 0.31 lr 0.000025 | loss 0.96 | pplx 2.61, bpc 1.3850
INFO:tensorflow:Model saved in path: EXP-enwik8/model.ckpt
I0411 16:42:05.911054 46979636578560 train_gpu.py:351] Model saved in path: EXP-enwik8/model.ckpt
INFO:tensorflow:[8200] | gnorm 0.31 lr 0.000020 | loss 0.98 | pplx 2.65, bpc 1.4076
I0411 16:44:21.411516 46979636578560 train_gpu.py:345] [8200] | gnorm 0.31 lr 0.000020 | loss 0.98 | pplx 2.65, bpc 1.4076
INFO:tensorflow:[8400] | gnorm 0.30 lr 0.000016 | loss 0.97 | pplx 2.63, bpc 1.3931
I0411 16:46:36.940109 46979636578560 train_gpu.py:345] [8400] | gnorm 0.30 lr 0.000016 | loss 0.97 | pplx 2.63, bpc 1.3931
INFO:tensorflow:[8600] | gnorm 0.32 lr 0.000013 | loss 0.95 | pplx 2.59, bpc 1.3718
I0411 16:48:52.560289 46979636578560 train_gpu.py:345] [8600] | gnorm 0.32 lr 0.000013 | loss 0.95 | pplx 2.59, bpc 1.3718
INFO:tensorflow:[8800] | gnorm 0.33 lr 0.000010 | loss 0.95 | pplx 2.58, bpc 1.3666
I0411 16:51:08.070581 46979636578560 train_gpu.py:345] [8800] | gnorm 0.33 lr 0.000010 | loss 0.95 | pplx 2.58, bpc 1.3666
INFO:tensorflow:[9000] | gnorm 0.31 lr 0.000007 | loss 0.96 | pplx 2.62, bpc 1.3921
I0411 16:53:23.609839 46979636578560 train_gpu.py:345] [9000] | gnorm 0.31 lr 0.000007 | loss 0.96 | pplx 2.62, bpc 1.3921
INFO:tensorflow:[9200] | gnorm 0.31 lr 0.000005 | loss 0.98 | pplx 2.66, bpc 1.4139
I0411 16:55:38.975215 46979636578560 train_gpu.py:345] [9200] | gnorm 0.31 lr 0.000005 | loss 0.98 | pplx 2.66, bpc 1.4139
INFO:tensorflow:[9400] | gnorm 0.32 lr 0.000003 | loss 0.97 | pplx 2.65, bpc 1.4033
I0411 16:57:54.425925 46979636578560 train_gpu.py:345] [9400] | gnorm 0.32 lr 0.000003 | loss 0.97 | pplx 2.65, bpc 1.4033
INFO:tensorflow:[9600] | gnorm 0.34 lr 0.000002 | loss 0.97 | pplx 2.63, bpc 1.3954
I0411 17:00:09.927987 46979636578560 train_gpu.py:345] [9600] | gnorm 0.34 lr 0.000002 | loss 0.97 | pplx 2.63, bpc 1.3954
INFO:tensorflow:[9800] | gnorm 0.31 lr 0.000001 | loss 0.97 | pplx 2.65, bpc 1.4051
I0411 17:02:25.449383 46979636578560 train_gpu.py:345] [9800] | gnorm 0.31 lr 0.000001 | loss 0.97 | pplx 2.65, bpc 1.4051
INFO:tensorflow:[10000] | gnorm 0.33 lr 0.000001 | loss 0.96 | pplx 2.62, bpc 1.3900
I0411 17:04:40.909836 46979636578560 train_gpu.py:345] [10000] | gnorm 0.33 lr 0.000001 | loss 0.96 | pplx 2.62, bpc 1.3900
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import tensorflow as tf
def positional_embedding(pos_seq, inv_freq, bsz=None):
sinusoid_inp = tf.einsum('i,j->ij', pos_seq, inv_freq)
pos_emb = tf.concat([tf.sin(sinusoid_inp), tf.cos(sinusoid_inp)], -1)
if bsz is not None:
return tf.tile(pos_emb[:, None, :], [1, bsz, 1])
else:
return pos_emb[:, None, :]
def positionwise_FF(inp, d_model, d_inner, dropout, kernel_initializer,
scope='ff', is_training=True):
output = inp
with tf.variable_scope(scope):
output = tf.layers.dense(inp, d_inner, activation=tf.nn.relu,
kernel_initializer=kernel_initializer,
name='layer_1')
output = tf.layers.dropout(output, dropout, training=is_training,
name='drop_1')
output = tf.layers.dense(output, d_model,
kernel_initializer=kernel_initializer,
name='layer_2')
output = tf.layers.dropout(output, dropout, training=is_training,
name='drop_2')
output = tf.contrib.layers.layer_norm(output + inp, begin_norm_axis=-1)
return output
def rel_shift(x):
x_size = tf.shape(x)
x = tf.pad(x, [[0, 0], [1, 0], [0, 0], [0, 0]])
x = tf.reshape(x, [x_size[1] + 1, x_size[0], x_size[2], x_size[3]])
x = tf.slice(x, [1, 0, 0, 0], [-1, -1, -1, -1])
x = tf.reshape(x, x_size)
return x
def rel_multihead_attn(w, r, r_w_bias, r_r_bias, attn_mask, mems, d_model,
n_head, d_head, dropout, dropatt, is_training,
kernel_initializer, scope='rel_attn'):
scale = 1 / (d_head ** 0.5)
with tf.variable_scope(scope):
qlen = tf.shape(w)[0]
rlen = tf.shape(r)[0]
bsz = tf.shape(w)[1]
cat = tf.concat([mems, w],
0) if mems is not None and mems.shape.ndims > 1 else w
w_heads = tf.layers.dense(cat, 3 * n_head * d_head, use_bias=False,
kernel_initializer=kernel_initializer, name='qkv')
r_head_k = tf.layers.dense(r, n_head * d_head, use_bias=False,
kernel_initializer=kernel_initializer, name='r')
w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, -1)
w_head_q = w_head_q[-qlen:]
klen = tf.shape(w_head_k)[0]
w_head_q = tf.reshape(w_head_q, [qlen, bsz, n_head, d_head])
w_head_k = tf.reshape(w_head_k, [klen, bsz, n_head, d_head])
w_head_v = tf.reshape(w_head_v, [klen, bsz, n_head, d_head])
r_head_k = tf.reshape(r_head_k, [rlen, n_head, d_head])
rw_head_q = w_head_q + r_w_bias
rr_head_q = w_head_q + r_r_bias
AC = tf.einsum('ibnd,jbnd->ijbn', rw_head_q, w_head_k)
BD = tf.einsum('ibnd,jnd->ijbn', rr_head_q, r_head_k)
BD = rel_shift(BD)
attn_score = (AC + BD) * scale
attn_mask_t = attn_mask[:, :, None, None]
attn_score = attn_score * (1 - attn_mask_t) - 1e30 * attn_mask_t
attn_prob = tf.nn.softmax(attn_score, 1)
attn_prob = tf.layers.dropout(attn_prob, dropatt, training=is_training)
attn_vec = tf.einsum('ijbn,jbnd->ibnd', attn_prob, w_head_v)
size_t = tf.shape(attn_vec)
attn_vec = tf.reshape(attn_vec, [size_t[0], size_t[1], n_head * d_head])
attn_out = tf.layers.dense(attn_vec, d_model, use_bias=False,
kernel_initializer=kernel_initializer, name='o')
attn_out = tf.layers.dropout(attn_out, dropout, training=is_training)
output = tf.contrib.layers.layer_norm(attn_out + w, begin_norm_axis=-1)
return output
def embedding_lookup(lookup_table, x, use_tpu=True):
if use_tpu:
n_token = tf.shape(lookup_table)[0]
one_hot_idx = tf.one_hot(x, n_token)
if one_hot_idx.shape.ndims == 2:
return tf.einsum('nd,in->id', lookup_table, one_hot_idx)
else:
return tf.einsum('nd,ibn->ibd', lookup_table, one_hot_idx)
else:
return tf.nn.embedding_lookup(lookup_table, x)
def mask_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer,
proj_initializer, div_val=1,
proj_same_dim=True,
scope='adaptive_embed', **kwargs):
emb_scale = d_proj ** 0.5
with tf.variable_scope(scope):
if div_val == 1:
lookup_table = tf.get_variable('lookup_table', [n_token, d_embed],
initializer=initializer)
y = embedding_lookup(lookup_table, x, use_tpu=False)
if d_proj != d_embed:
proj_W = tf.get_variable('proj_W', [d_embed, d_proj],
initializer=proj_initializer)
y = tf.einsum('ibe,ed->ibd', y, proj_W)
else:
proj_W = None
ret_params = [lookup_table, proj_W]
else:
tables, projs = [], []
cutoff_ends = [0] + cutoffs + [n_token]
x_size = tf.shape(x)
y = tf.zeros([x_size[0], x_size[1], d_proj])
for i in range(len(cutoff_ends) - 1):
with tf.variable_scope('cutoff_{}'.format(i)):
l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
mask = (x >= l_idx) & (x < r_idx)
cur_x = tf.boolean_mask(x, mask) - l_idx
cur_d_embed = d_embed // (div_val ** i)
lookup_table = tf.get_variable('lookup_table',
[r_idx - l_idx, cur_d_embed],
initializer=initializer)
cur_y = embedding_lookup(lookup_table, cur_x, use_tpu=False)
if d_proj == cur_d_embed and not proj_same_dim:
proj_W = None
else:
proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj],
initializer=proj_initializer)
cur_y = tf.einsum('id,de->ie', cur_y, proj_W)
mask_idx = tf.to_int64(tf.where(mask))
y += tf.scatter_nd(mask_idx, cur_y, tf.to_int64(tf.shape(y)))
tables.append(lookup_table)
projs.append(proj_W)
ret_params = [tables, projs]
y *= emb_scale
return y, ret_params
def mul_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer,
proj_initializer, div_val=1, perms=None,
proj_same_dim=True,
scope='adaptive_embed'):
"""
perms: If None, first compute W = W1 x W2 (projection for each bin),
and then compute X x W (embedding lookup). If not None,
use bin-based embedding lookup with max_bin_size defined by
the shape of perms.
"""
emb_scale = d_proj ** 0.5
with tf.variable_scope(scope):
if div_val == 1:
lookup_table = tf.get_variable('lookup_table', [n_token, d_embed],
initializer=initializer)
y = embedding_lookup(lookup_table, x)
if d_proj != d_embed:
proj_W = tf.get_variable('proj_W', [d_embed, d_proj],
initializer=proj_initializer)
y = tf.einsum('ibe,ed->ibd', y, proj_W)
else:
proj_W = None
ret_params = [lookup_table, proj_W]
else:
tables, projs = [], []
cutoff_ends = [0] + cutoffs + [n_token]
x_size = tf.shape(x)
if perms is None:
cat_lookup = []
else:
cat_lookup = tf.zeros([x_size[0], x_size[1], d_proj])
for i in range(len(cutoff_ends) - 1):
with tf.variable_scope('cutoff_{}'.format(i)):
l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
cur_d_embed = d_embed // (div_val ** i)
lookup_table = tf.get_variable('lookup_table',
[r_idx - l_idx, cur_d_embed],
initializer=initializer)
if cur_d_embed == d_proj and not proj_same_dim:
proj_W = None
else:
proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj],
initializer=proj_initializer)
if perms is None:
cat_lookup.append(tf.einsum('ie,ed->id', lookup_table, proj_W))
else:
# speed up the computation of the first bin
# also save some meory
if i == 0:
cur_y = embedding_lookup(lookup_table, tf.minimum(x, r_idx - 1))
if proj_W is not None:
cur_y = tf.einsum('ibe,ed->ibd', cur_y, proj_W)
cur_y *= perms[i][:, :, None]
cat_lookup += cur_y
else:
cur_x = tf.einsum('ib,ibk->k', tf.to_float(x - l_idx), perms[i])
cur_x = tf.to_int32(cur_x)
cur_y = embedding_lookup(lookup_table, cur_x)
if proj_W is not None:
cur_y = tf.einsum('ke,ed->kd', cur_y, proj_W)
cat_lookup += tf.einsum('kd,ibk->ibd', cur_y, perms[i])
tables.append(lookup_table)
projs.append(proj_W)
if perms is None:
cat_lookup = tf.concat(cat_lookup, 0)
y = embedding_lookup(cat_lookup, x)
else:
y = cat_lookup
ret_params = [tables, projs]
y *= emb_scale
return y, ret_params
def mask_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs,
params, tie_projs,
initializer=None, proj_initializer=None,
div_val=1, scope='adaptive_softmax',
proj_same_dim=True,
return_mean=True, **kwargs):
def _logit(x, W, b, proj):
y = x
if proj is not None:
y = tf.einsum('ibd,ed->ibe', y, proj)
return tf.einsum('ibd,nd->ibn', y, W) + b
params_W, params_projs = params[0], params[1]
def _gather_logprob(logprob, target):
lp_size = tf.shape(logprob)
r = tf.range(lp_size[0])
idx = tf.stack([r, target], 1)
return tf.gather_nd(logprob, idx)
with tf.variable_scope(scope):
if len(cutoffs) == 0:
softmax_b = tf.get_variable('bias', [n_token],
initializer=tf.zeros_initializer())
output = _logit(hidden, params_W, softmax_b, params_projs)
nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
logits=output)
else:
cutoff_ends = [0] + cutoffs + [n_token]
nll = tf.zeros_like(target, dtype=tf.float32)
for i in range(len(cutoff_ends) - 1):
with tf.variable_scope('cutoff_{}'.format(i)):
l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
mask = (target >= l_idx) & (target < r_idx)
mask_idx = tf.where(mask)
cur_target = tf.boolean_mask(target, mask) - l_idx
cur_d_embed = d_embed // (div_val ** i)
if div_val == 1:
cur_W = params_W[l_idx: r_idx]
else:
cur_W = params_W[i]
cur_b = tf.get_variable('b', [r_idx - l_idx],
initializer=tf.zeros_initializer())
if tie_projs[i]:
if div_val == 1:
cur_proj = params_projs
else:
cur_proj = params_projs[i]
else:
if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed:
cur_proj = None
else:
cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj],
initializer=proj_initializer)
if i == 0:
cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed],
initializer=tf.zeros_initializer())
cluster_b = tf.get_variable('cluster_b', [len(cutoffs)],
initializer=tf.zeros_initializer())
cur_W = tf.concat([cur_W, cluster_W], 0)
cur_b = tf.concat([cur_b, cluster_b], 0)
head_logit = _logit(hidden, cur_W, cur_b, cur_proj)
head_logprob = tf.nn.log_softmax(head_logit)
cur_head_logprob = tf.boolean_mask(head_logprob, mask)
cur_logprob = _gather_logprob(cur_head_logprob, cur_target)
else:
cur_head_logprob = tf.boolean_mask(head_logprob, mask)
cur_hidden = tf.boolean_mask(hidden, mask)
tail_logit = tf.squeeze(_logit(
cur_hidden[None], cur_W, cur_b, cur_proj), 0)
tail_logprob = tf.nn.log_softmax(tail_logit)
cur_logprob = (cur_head_logprob[:, cutoff_ends[1] + i - 1] +
_gather_logprob(tail_logprob, cur_target))
nll += tf.scatter_nd(mask_idx, -cur_logprob,
tf.to_int64(tf.shape(nll)))
if return_mean:
nll = tf.reduce_mean(nll)
return nll
def mul_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs,
params, tie_projs,
initializer=None, proj_initializer=None,
div_val=1, perms=None, proj_same_dim=True,
scope='adaptive_softmax',
**kwargs):
def _logit(x, W, b, proj):
y = x
if x.shape.ndims == 3:
if proj is not None:
y = tf.einsum('ibd,ed->ibe', y, proj)
return tf.einsum('ibd,nd->ibn', y, W) + b
else:
if proj is not None:
y = tf.einsum('id,ed->ie', y, proj)
return tf.einsum('id,nd->in', y, W) + b
params_W, params_projs = params[0], params[1]
with tf.variable_scope(scope):
if len(cutoffs) == 0:
softmax_b = tf.get_variable('bias', [n_token],
initializer=tf.zeros_initializer())
output = _logit(hidden, params_W, softmax_b, params_projs)
nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
logits=output)
nll = tf.reduce_mean(nll)
else:
total_loss, total_cnt = 0, 0
cutoff_ends = [0] + cutoffs + [n_token]
for i in range(len(cutoff_ends) - 1):
with tf.variable_scope('cutoff_{}'.format(i)):
l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
cur_d_embed = d_embed // (div_val ** i)
if div_val == 1:
cur_W = params_W[l_idx: r_idx]
else:
cur_W = params_W[i]
cur_b = tf.get_variable('b', [r_idx - l_idx],
initializer=tf.zeros_initializer())
if tie_projs[i]:
if div_val == 1:
cur_proj = params_projs
else:
cur_proj = params_projs[i]
else:
if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed:
cur_proj = None
else:
cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj],
initializer=proj_initializer)
if i == 0:
cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed],
initializer=tf.zeros_initializer())
cluster_b = tf.get_variable('cluster_b', [len(cutoffs)],
initializer=tf.zeros_initializer())
cur_W = tf.concat([cur_W, cluster_W], 0)
cur_b = tf.concat([cur_b, cluster_b], 0)
head_logit = _logit(hidden, cur_W, cur_b, cur_proj)
head_target = kwargs.get("head_target")
head_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=head_target,
logits=head_logit)
masked_loss = head_nll * perms[i]
total_loss += tf.reduce_sum(masked_loss)
total_cnt += tf.reduce_sum(perms[i])
# head_logprob = tf.nn.log_softmax(head_logit)
# final_logprob = head_logprob * perms[i][:, :, None]
# final_target = tf.one_hot(target, tf.shape(head_logprob)[2])
# total_loss -= tf.einsum('ibn,ibn->', final_logprob, final_target)
# total_cnt += tf.reduce_sum(perms[i])
else:
cur_head_nll = tf.einsum('ib,ibk->k', head_nll, perms[i])
cur_hidden = tf.einsum('ibd,ibk->kd', hidden, perms[i])
tail_logit = _logit(cur_hidden, cur_W, cur_b, cur_proj)
tail_target = tf.einsum('ib,ibk->k', tf.to_float(target - l_idx),
perms[i])
tail_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tf.to_int32(tail_target),
logits=tail_logit)
sum_nll = cur_head_nll + tail_nll
mask = tf.reduce_sum(perms[i], [0, 1])
masked_loss = sum_nll * mask
total_loss += tf.reduce_sum(masked_loss)
total_cnt += tf.reduce_sum(mask)
nll = total_loss / total_cnt
return nll
def _create_mask(qlen, mlen, same_length=False):
attn_mask = tf.ones([qlen, qlen])
mask_u = tf.matrix_band_part(attn_mask, 0, -1)
mask_dia = tf.matrix_band_part(attn_mask, 0, 0)
attn_mask_pad = tf.zeros([qlen, mlen])
ret = tf.concat([attn_mask_pad, mask_u - mask_dia], 1)
if same_length:
mask_l = tf.matrix_band_part(attn_mask, -1, 0)
ret = tf.concat([ret[:, :qlen] + mask_l - mask_dia, ret[:, qlen:]], 1)
return ret
def _cache_mem(curr_out, prev_mem, mem_len=None):
if mem_len is None or prev_mem is None:
new_mem = curr_out
elif mem_len == 0:
return prev_mem
else:
new_mem = tf.concat([prev_mem, curr_out], 0)[- mem_len:]
return tf.stop_gradient(new_mem)
def transformer(dec_inp, target, mems, n_token, n_layer, d_model, d_embed,
n_head, d_head, d_inner, dropout, dropatt,
initializer, is_training, proj_initializer=None,
mem_len=None, cutoffs=[], div_val=1, tie_projs=[],
same_length=False, clamp_len=-1, use_tpu=True,
input_perms=None, target_perms=None, head_target=None,
untie_r=False, proj_same_dim=True,
scope='transformer'):
"""
cutoffs: a list of python int. Cutoffs for adaptive softmax.
tie_projs: a list of python bools. Whether to tie the projections.
use_tpu: if True, use one_hot in embedding lookup and bin-based implementation
of adaptive softmax.
perms: a list of tensors. Each tensor should of size [len, bsz, bin_size].
Only used in the adaptive setting.
"""
new_mems = []
with tf.variable_scope(scope):
if untie_r:
r_w_bias = tf.get_variable('r_w_bias', [n_layer, n_head, d_head],
initializer=initializer)
r_r_bias = tf.get_variable('r_r_bias', [n_layer, n_head, d_head],
initializer=initializer)
else:
r_w_bias = tf.get_variable('r_w_bias', [n_head, d_head],
initializer=initializer)
r_r_bias = tf.get_variable('r_r_bias', [n_head, d_head],
initializer=initializer)
qlen = tf.shape(dec_inp)[0]
mlen = tf.shape(mems[0])[0] if mems is not None else 0
klen = mlen + qlen
if proj_initializer is None:
proj_initializer = initializer
lookup_fn = (mul_adaptive_embedding_lookup if use_tpu else
mask_adaptive_embedding_lookup)
embeddings, shared_params = lookup_fn(
x=dec_inp,
n_token=n_token,
d_embed=d_embed,
d_proj=d_model,
cutoffs=cutoffs,
initializer=initializer,
proj_initializer=proj_initializer,
div_val= div_val,
perms=input_perms,
proj_same_dim=proj_same_dim)
attn_mask = _create_mask(qlen, mlen, same_length)
pos_seq = tf.range(klen - 1, -1, -1.0)
if clamp_len > 0:
pos_seq = tf.minimum(pos_seq, clamp_len)
inv_freq = 1 / (10000 ** (tf.range(0, d_model, 2.0) / d_model))
pos_emb = positional_embedding(pos_seq, inv_freq)
output = tf.layers.dropout(embeddings, dropout, training=is_training)
pos_emb = tf.layers.dropout(pos_emb, dropout, training=is_training)
if mems is None:
mems = [None] * n_layer
for i in range(n_layer):
# cache new mems
new_mems.append(_cache_mem(output, mems[i], mem_len))
with tf.variable_scope('layer_{}'.format(i)):
output = rel_multihead_attn(
w=output,
r=pos_emb,
r_w_bias=r_w_bias if not untie_r else r_w_bias[i],
r_r_bias=r_r_bias if not untie_r else r_r_bias[i],
attn_mask=attn_mask,
mems=mems[i],
d_model=d_model,
n_head=n_head,
d_head=d_head,
dropout=dropout,
dropatt=dropatt,
is_training=is_training,
kernel_initializer=initializer)
output = positionwise_FF(
inp=output,
d_model=d_model,
d_inner=d_inner,
dropout=dropout,
kernel_initializer=initializer,
is_training=is_training)
output = tf.layers.dropout(output, dropout, training=is_training)
logsoftmax_fn = (mul_adaptive_logsoftmax if use_tpu else
mask_adaptive_logsoftmax)
loss = logsoftmax_fn(
hidden=output,
target=target,
n_token=n_token,
d_embed=d_embed,
d_proj=d_model,
cutoffs=cutoffs,
params=shared_params,
tie_projs=tie_projs,
initializer=initializer,
proj_initializer=proj_initializer,
div_val=div_val,
perms=target_perms,
head_target=head_target,
proj_same_dim=proj_same_dim)
return loss, new_mems
Run training...
WARNING:root:Limited tf.compat.v2.summary API due to missing TensorBoard installation.
WARNING:tensorflow:From train_gpu_test.py:23: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
WARNING:tensorflow:From train_gpu_test.py:23: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
WARNING:tensorflow:From train_gpu_test.py:23: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
WARNING:tensorflow:From train_gpu_test.py:23: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
WARNING:tensorflow:From train_gpu_test.py:492: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead.
WARNING:tensorflow:From train_gpu_test.py:492: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead.
WARNING:tensorflow:From train_gpu_test.py:482: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
W0623 14:46:20.521119 47187556010368 module_wrapper.py:139] From train_gpu_test.py:482: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
INFO:tensorflow:n_token 204
I0623 14:46:20.521361 47187556010368 train_gpu_test.py:482] n_token 204
INFO:tensorflow:[train] File names ['train.bsz-12.tlen-512.tfrecords']
I0623 14:46:20.531896 47187556010368 data_utils.py:434] [train] File names ['train.bsz-12.tlen-512.tfrecords']
INFO:tensorflow:num of batches 14483
I0623 14:46:20.532083 47187556010368 train_gpu_test.py:240] num of batches 14483
WARNING:tensorflow:From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.sparse_tensor_to_dense is deprecated. Please use tf.sparse.to_dense instead.
W0623 14:46:34.696085 47187556010368 module_wrapper.py:139] From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.sparse_tensor_to_dense is deprecated. Please use tf.sparse.to_dense instead.
WARNING:tensorflow:From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.FixedLenFeature is deprecated. Please use tf.io.FixedLenFeature instead.
W0623 14:46:34.697554 47187556010368 module_wrapper.py:139] From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.FixedLenFeature is deprecated. Please use tf.io.FixedLenFeature instead.
WARNING:tensorflow:From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.VarLenFeature is deprecated. Please use tf.io.VarLenFeature instead.
W0623 14:46:34.698294 47187556010368 module_wrapper.py:139] From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.VarLenFeature is deprecated. Please use tf.io.VarLenFeature instead.
WARNING:tensorflow:From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.parse_single_example is deprecated. Please use tf.io.parse_single_example instead.
W0623 14:46:34.701079 47187556010368 module_wrapper.py:139] From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.parse_single_example is deprecated. Please use tf.io.parse_single_example instead.
WARNING:tensorflow:From /work/home/hepj/tf1/transformer-xl-master/tf/data_utils.py:506: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
W0623 14:46:35.660339 47187556010368 deprecation.py:323] From /work/home/hepj/tf1/transformer-xl-master/tf/data_utils.py:506: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
WARNING:tensorflow:From train_gpu_test.py:247: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
W0623 14:46:35.673759 47187556010368 deprecation.py:323] From train_gpu_test.py:247: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
WARNING:tensorflow:From train_gpu_test.py:259: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
W0623 14:46:35.692192 47187556010368 module_wrapper.py:139] From train_gpu_test.py:259: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
WARNING:tensorflow:From train_gpu_test.py:259: The name tf.get_variable_scope is deprecated. Please use tf.compat.v1.get_variable_scope instead.
W0623 14:46:35.692516 47187556010368 module_wrapper.py:139] From train_gpu_test.py:259: The name tf.get_variable_scope is deprecated. Please use tf.compat.v1.get_variable_scope instead.
WARNING:tensorflow:From train_gpu_test.py:263: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
W0623 14:46:35.692863 47187556010368 module_wrapper.py:139] From train_gpu_test.py:263: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
WARNING:tensorflow:From /work/home/hepj/tf1/transformer-xl-master/tf/gpu_utils.py:6: The name tf.NodeDef is deprecated. Please use tf.compat.v1.NodeDef instead.
W0623 14:46:35.693674 47187556010368 module_wrapper.py:139] From /work/home/hepj/tf1/transformer-xl-master/tf/gpu_utils.py:6: The name tf.NodeDef is deprecated. Please use tf.compat.v1.NodeDef instead.
WARNING:tensorflow:From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:460: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
W0623 14:46:35.704414 47187556010368 module_wrapper.py:139] From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:460: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
WARNING:tensorflow:From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:416: The name tf.matrix_band_part is deprecated. Please use tf.linalg.band_part instead.
W0623 14:46:35.742786 47187556010368 module_wrapper.py:139] From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:416: The name tf.matrix_band_part is deprecated. Please use tf.linalg.band_part instead.
WARNING:tensorflow:From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:493: dropout (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dropout instead.
W0623 14:46:35.775503 47187556010368 deprecation.py:323] From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:493: dropout (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dropout instead.
WARNING:tensorflow:From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/layers/core.py:271: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
W0623 14:46:35.776226 47187556010368 deprecation.py:323] From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/layers/core.py:271: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
WARNING:tensorflow:From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:54: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
W0623 14:46:35.801217 47187556010368 deprecation.py:323] From /work/home/hepj/tf1/transformer-xl-master/tf/model.py:54: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
WARNING:tensorflow:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.
W0623 14:46:36.060614 47187556010368 lazy_loader.py:50]
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.
WARNING:tensorflow:From train_gpu_test.py:194: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.
W0623 14:46:40.507537 47187556010368 module_wrapper.py:139] From train_gpu_test.py:194: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.
INFO:tensorflow:#params: 41055436
I0623 14:46:40.517497 47187556010368 train_gpu_test.py:195] #params: 41055436
INFO:tensorflow:#params: 41055436
I0623 14:46:49.611661 47187556010368 train_gpu_test.py:195] #params: 41055436
INFO:tensorflow:#params: 41055436
I0623 14:46:58.740740 47187556010368 train_gpu_test.py:195] #params: 41055436
INFO:tensorflow:#params: 41055436
I0623 14:47:08.116391 47187556010368 train_gpu_test.py:195] #params: 41055436
WARNING:tensorflow:From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/ops/clip_ops.py:301: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0623 14:47:13.709527 47187556010368 deprecation.py:323] From /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/ops/clip_ops.py:301: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:From train_gpu_test.py:292: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.
W0623 14:47:13.892564 47187556010368 module_wrapper.py:139] From train_gpu_test.py:292: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.
WARNING:tensorflow:From train_gpu_test.py:302: The name tf.train.cosine_decay is deprecated. Please use tf.compat.v1.train.cosine_decay instead.
W0623 14:47:13.896909 47187556010368 module_wrapper.py:139] From train_gpu_test.py:302: The name tf.train.cosine_decay is deprecated. Please use tf.compat.v1.train.cosine_decay instead.
WARNING:tensorflow:From train_gpu_test.py:313: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.
W0623 14:47:13.910406 47187556010368 module_wrapper.py:139] From train_gpu_test.py:313: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.
WARNING:tensorflow:From train_gpu_test.py:323: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.
W0623 14:47:15.554019 47187556010368 module_wrapper.py:139] From train_gpu_test.py:323: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.
WARNING:tensorflow:From train_gpu_test.py:325: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
W0623 14:47:15.950821 47187556010368 module_wrapper.py:139] From train_gpu_test.py:325: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
WARNING:tensorflow:From train_gpu_test.py:325: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.
W0623 14:47:15.951256 47187556010368 module_wrapper.py:139] From train_gpu_test.py:325: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.
2022-06-23 14:47:15.951746: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2022-06-23 14:47:16.345128: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 1999880000 Hz
2022-06-23 14:47:16.347222: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1399f370 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-06-23 14:47:16.347347: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2022-06-23 14:47:16.376028: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libamdhip64.so
2022-06-23 14:47:20.574412: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1d549690 initialized for platform ROCM (this does not guarantee that XLA will be used). Devices:
2022-06-23 14:47:20.574543: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): C878180, AMDGPU ISA version: gfx906
2022-06-23 14:47:20.574586: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (1): C878180, AMDGPU ISA version: gfx906
2022-06-23 14:47:20.574625: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (2): C878180, AMDGPU ISA version: gfx906
2022-06-23 14:47:20.574663: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (3): C878180, AMDGPU ISA version: gfx906
2022-06-23 14:47:20.581876: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 0 with properties:
name: C878180
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.319
pciBusID 0000:04:00.0
2022-06-23 14:47:20.582075: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 1 with properties:
name: C878180
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.319
pciBusID 0000:26:00.0
2022-06-23 14:47:20.582181: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 2 with properties:
name: C878180
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.319
pciBusID 0000:43:00.0
2022-06-23 14:47:20.582264: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1650] Found device 3 with properties:
name: C878180
AMDGPU ISA: gfx906
memoryClockRate (GHz) 1.319
pciBusID 0000:63:00.0
2022-06-23 14:47:23.159813: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
2022-06-23 14:47:23.222323: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
2022-06-23 14:48:25.788779: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
2022-06-23 14:48:25.890072: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
2022-06-23 14:48:25.890632: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0, 1, 2, 3
2022-06-23 14:48:25.890804: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-06-23 14:48:25.890868: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186] 0 1 2 3
2022-06-23 14:48:25.890934: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0: N Y Y Y
2022-06-23 14:48:25.890975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 1: Y N Y Y
2022-06-23 14:48:25.891013: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 2: Y Y N Y
2022-06-23 14:48:25.891050: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 3: Y Y Y N
2022-06-23 14:48:25.891650: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 14001 MB memory) -> physical GPU (device: 0, name: C878180, pci bus id: 0000:04:00.0)
2022-06-23 14:48:25.899617: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 14001 MB memory) -> physical GPU (device: 1, name: C878180, pci bus id: 0000:26:00.0)
2022-06-23 14:48:25.913932: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 14001 MB memory) -> physical GPU (device: 2, name: C878180, pci bus id: 0000:43:00.0)
2022-06-23 14:48:25.922425: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 14001 MB memory) -> physical GPU (device: 3, name: C878180, pci bus id: 0000:63:00.0)
WARNING:tensorflow:From train_gpu_test.py:326: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.
W0623 14:48:25.955481 47187556010368 module_wrapper.py:139] From train_gpu_test.py:326: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.
2022-06-23 14:48:29.635470: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
2022-06-23 14:48:29.853604: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 13.67G (14682108416 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.853823: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 12.31G (13213896704 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.853928: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 11.08G (11892506624 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854018: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 9.97G (10703255552 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854108: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.97G (9632929792 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854214: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.07G (8669636608 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854311: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 7.27G (7802672640 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854409: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 6.54G (7022405120 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854502: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.89G (6320164352 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854589: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.30G (5688147968 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854702: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.77G (5119332864 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854792: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.29G (4607399424 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.854922: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.86G (4146659328 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855037: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.48G (3731993344 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855140: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.13G (3358793984 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855247: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.81G (3022914560 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855350: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.53G (2720623104 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855448: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.28G (2448560640 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855561: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.05G (2203704576 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855672: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.85G (1983334144 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855761: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.66G (1785000704 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855880: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.50G (1606500608 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.855996: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.35G (1445850624 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.856110: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.21G (1301265664 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.856201: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.09G (1171139072 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:29.856310: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1005.20M (1054025216 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.325078: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
2022-06-23 14:48:54.519871: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 13.67G (14682124032 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520098: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 12.31G (13213911040 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520185: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 11.08G (11892519936 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520269: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 9.97G (10703267840 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520352: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.97G (9632941056 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520434: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.07G (8669646848 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520515: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 7.27G (7802681856 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520596: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 6.54G (7022413312 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520677: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.89G (6320172032 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520757: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.30G (5688154624 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520838: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.77G (5119339008 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.520931: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.29G (4607405056 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521011: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.86G (4146664448 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521100: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.48G (3731997952 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521181: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.13G (3358798080 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521262: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.81G (3022918144 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521355: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.53G (2720626176 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521436: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.28G (2448563456 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521516: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.05G (2203707136 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521597: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.85G (1983336448 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521677: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.66G (1785002752 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521756: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.50G (1606502400 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521836: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.35G (1445852160 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.521925: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.21G (1301266944 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.522006: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.09G (1171140352 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.522086: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1005.20M (1054026496 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.550346: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
2022-06-23 14:48:54.751225: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 13.67G (14682116096 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751450: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 12.31G (13213903872 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751537: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 11.08G (11892512768 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751620: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 9.97G (10703261696 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751702: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.97G (9632934912 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751797: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.07G (8669640704 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751887: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 7.27G (7802676224 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.751969: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 6.54G (7022408192 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752049: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.89G (6320167424 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752129: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.30G (5688150528 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752209: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.77G (5119335424 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752299: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.29G (4607401984 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752379: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.86G (4146661632 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752460: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.48G (3731995392 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752540: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.13G (3358795776 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752621: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.81G (3022916096 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752701: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.53G (2720624384 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752780: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.28G (2448561920 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752868: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.05G (2203705600 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.752950: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.85G (1983335168 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.753033: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.66G (1785001728 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.753114: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.50G (1606501632 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.753194: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.35G (1445851392 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.753274: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.21G (1301266176 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.753353: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.09G (1171139584 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.753433: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1005.20M (1054025728 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.979658: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 13.67G (14682108416 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.979903: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 12.31G (13213896704 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.979991: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 11.08G (11892506624 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980075: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 9.97G (10703255552 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980159: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.97G (9632929792 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980239: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 8.07G (8669636608 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980320: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 7.27G (7802672640 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980401: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 6.54G (7022405120 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980482: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.89G (6320164352 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980562: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 5.30G (5688147968 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980656: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.77G (5119332864 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980750: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 4.29G (4607399424 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980831: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.86G (4146659328 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.980920: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.48G (3731993344 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981001: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 3.13G (3358793984 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981081: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.81G (3022914560 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981161: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.53G (2720623104 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981241: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.28G (2448560640 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981320: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 2.05G (2203704576 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981400: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.85G (1983334144 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981479: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.66G (1785000704 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981559: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.50G (1606500608 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981639: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.35G (1445850624 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981719: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.21G (1301265664 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981800: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1.09G (1171139072 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:54.981887: E tensorflow/stream_executor/rocm/rocm_driver.cc:645] failed to allocate 1005.20M (1054025216 bytes) from device: HIP_ERROR_OutOfMemory
2022-06-23 14:48:55.422345: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
2022-06-23 14:48:55.425431: I tensorflow/core/graph/gpu_fusion_pass.cc:505] ROCm Fusion is enabled.
Error 218(hipErrorInvalidKernelFile) /data/jenkins_workspace/workspace/rocBLAS_release/rocblas/build/release/virtualenv/lib64/python3.6/site-packages/Tensile/Source/lib/source/hip/HipSolutionAdapter.cpp:84:
error
hipErrorInvalidKernelFile
/work/home/hepj/app/dtk-22.04.1/rocblas/lib/library_dcu2/TensileLibrary_gfx906.co
Error 218(hipErrorInvalidKernelFile) /data/jenkins_workspace/workspace/rocBLAS_release/rocblas/build/release/virtualenv/lib64/python3.6/site-packages/Tensile/Source/lib/source/hip/HipSolutionAdapter.cpp:84:
error
hipErrorInvalidKernelFile
/work/home/hepj/app/dtk-22.04.1/rocblas/lib/library_dcu2/TensileLibrary_gfx906.co
Error 218(hipErrorInvalidKernelFile) /data/jenkins_workspace/workspace/rocBLAS_release/rocblas/build/release/virtualenv/lib64/python3.6/site-packages/Tensile/Source/lib/source/hip/HipSolutionAdapter.cpp:84:
error
hipErrorInvalidKernelFile
/work/home/hepj/app/dtk-22.04.1/rocblas/lib/library_dcu2/TensileLibrary_gfx906.co
Error 218(hipErrorInvalidKernelFile) /data/jenkins_workspace/workspace/rocBLAS_release/rocblas/build/release/virtualenv/lib64/python3.6/site-packages/Tensile/Source/lib/source/hip/HipSolutionAdapter.cpp:84:
error
hipErrorInvalidKernelFile
/work/home/hepj/app/dtk-22.04.1/rocblas/lib/library_dcu2/TensileLibrary_gfx906.co
rocBLAS error: Tensile solution found, but exception thrown for { a_type: "f32_r", b_type: "f32_r", c_type: "f32_r", d_type: "f32_r", compute_type: "f32_r", transA: 'N', transB: 'N', M: 1536, N: 3072, K: 512, alpha: 1, row_stride_a: 1, col_stride_a: 1536, row_stride_b: 1, col_stride_b: 512, row_stride_c: 1, col_stride_c: 1536, row_stride_d: 1, col_stride_d: 1536, beta: 0, batch_count: 1, strided_batch: true, stride_a: 0, stride_b: 0, stride_c: 0, stride_d: 0, atomics_mode: atomics_not_allowed }
Kernel Cijk_Ailk_Bljk_SB_MT128x64x16_SN_APM1_AF0EM1_AF1EM1_AMAS3_ASAE01_ASCE01_ASEM1_BL1_DTL0_ETSP_EPS0_FL0_GRVW4_GSU1_ISA906_IU1_K1_KLA_LPA0_LPB0_LDL1_LRVW4_MAC_MDA2_NLCA1_NLCB1_ONLL1_PK0_PGR0_PLR1_RK0_SU32_SUM0_SUS256_SVW4_SNLL0_TT8_4_USFGRO0_VAW1_VS1_VW4_WG16_16_1_WGM1 not found in any loaded module.
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
2022-06-23 14:49:05.193083: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.193084: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.193098: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.193602: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.193683: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.193756: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.193945: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.194235: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.194547: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.194827: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.195128: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.194451: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.195339: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.195407: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.195598: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.195656: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.195961: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.196166: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.196230: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.196526: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.196709: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.196821: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.196884: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.197241: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.197292: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.197748: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.198108: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.198415: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.198601: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.198852: E tensorflow/stream_executor/rocm/rocm_blas.cc:416] failed to run ROCBLAS routine rocblas_sgemm: rocblas_status_internal_error
2022-06-23 14:49:05.906378: W tensorflow/core/kernels/data/cache_dataset_ops.cc:824] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
==================================================
/work/home/hepj/tf1/transformer-xl-master/data/enwik8//tfrecords/record_info-train.bsz-12.tlen-512.json
==================================================
Traceback (most recent call last):
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1365, in _do_call
return fn(*args)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1350, in _run_fn
target_list, run_metadata)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InternalError: 2 root error(s) found.
(0) Internal: Blas GEMM launch failed : a.shape=(1024, 512), b.shape=(512, 512), m=1024, n=512, k=512
[[{{node transformer_1/layer_2/rel_attn/r/Tensordot/MatMul}}]]
(1) Internal: Blas GEMM launch failed : a.shape=(1024, 512), b.shape=(512, 512), m=1024, n=512, k=512
[[{{node transformer/layer_2/rel_attn/r/Tensordot/MatMul}}]]
0 successful operations.
3 derived errors ignored.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train_gpu_test.py", line 492, in <module>
tf.app.run()
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/platform/app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/absl/app.py", line 312, in run
_run_main(main, args)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/absl/app.py", line 258, in _run_main
sys.exit(main(argv))
File "train_gpu_test.py", line 486, in main
train(n_token, cutoffs, "/gpu:0")
File "train_gpu_test.py", line 341, in train
fetched = sess.run(fetches, feed_dict=feed_dict)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 956, in run
run_metadata_ptr)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1359, in _do_run
run_metadata)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: 2 root error(s) found.
(0) Internal: Blas GEMM launch failed : a.shape=(1024, 512), b.shape=(512, 512), m=1024, n=512, k=512
[[node transformer_1/layer_2/rel_attn/r/Tensordot/MatMul (defined at /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
(1) Internal: Blas GEMM launch failed : a.shape=(1024, 512), b.shape=(512, 512), m=1024, n=512, k=512
[[node transformer/layer_2/rel_attn/r/Tensordot/MatMul (defined at /work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
3 derived errors ignored.
Original stack trace for 'transformer_1/layer_2/rel_attn/r/Tensordot/MatMul':
File "train_gpu_test.py", line 492, in <module>
tf.app.run()
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/platform/app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/absl/app.py", line 312, in run
_run_main(main, args)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/absl/app.py", line 258, in _run_main
sys.exit(main(argv))
File "train_gpu_test.py", line 486, in main
train(n_token, cutoffs, "/gpu:0")
File "train_gpu_test.py", line 271, in train
mems=mems_i)
File "train_gpu_test.py", line 223, in single_core_graph
is_training=is_training)
File "train_gpu_test.py", line 191, in model_fn
proj_same_dim=FLAGS.proj_same_dim)
File "/work/home/hepj/tf1/transformer-xl-master/tf/model.py", line 517, in transformer
kernel_initializer=initializer)
File "/work/home/hepj/tf1/transformer-xl-master/tf/model.py", line 56, in rel_multihead_attn
kernel_initializer=kernel_initializer, name='r')
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py", line 324, in new_func
return func(*args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/layers/core.py", line 187, in dense
return layer.apply(inputs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py", line 324, in new_func
return func(*args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 1700, in apply
return self.__call__(inputs, *args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/layers/base.py", line 548, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py", line 234, in wrapper
return converted_call(f, options, args, kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py", line 439, in converted_call
return _call_unconverted(f, args, kwargs, options)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py", line 330, in _call_unconverted
return f(*args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/core.py", line 1039, in call
outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/ops/math_ops.py", line 4096, in tensordot
ab_matmul = matmul(a_reshape, b_reshape)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/util/dispatch.py", line 180, in wrapper
return target(*args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/ops/math_ops.py", line 2754, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_math_ops.py", line 6236, in mat_mul
name=name)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "/work/home/hepj/.pyenv/versions/tf1/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
#!/usr/bin/env python
# coding=utf-8
import os
import sys
import zipfile
if os.path.exists('train.txt'):
print('Tokenized enwik8 already exists - skipping processing')
sys.exit()
data = zipfile.ZipFile('enwik8.zip').read('enwik8')
print('Length of enwik8: {}'.format(len(data)))
num_test_chars = 5000000
train_data = data[: -2 * num_test_chars]
valid_data = data[-2 * num_test_chars: -num_test_chars]
test_data = data[-num_test_chars:]
for fn, part in [('train.txt', train_data), ('valid.txt', valid_data), ('test.txt', test_data)]:
print('{} will have {} bytes'.format(fn, len(part)))
print('- Tokenizing...')
part_str = ' '.join([str(c) if c != ord('\n') else '\n' for c in part])
print('- Writing...')
f = open(fn, 'w').write(part_str)
f = open(fn + '.raw', 'wb').write(part)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import time
from absl import flags
import absl.logging as _logging # pylint: disable=unused-import
import tensorflow as tf
import model
import data_utils
from gpu_utils import assign_to_gpu, average_grads_and_vars
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler import option_builder
import os
os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'
tf.logging.set_verbosity(tf.logging.INFO)
# GPU config
flags.DEFINE_integer("num_hosts", default=1,
help="Number of TPU hosts")
flags.DEFINE_integer("num_core_per_host", default=8,
help="Number of cores per host")
# Experiment (data/checkpoint/directory) config
flags.DEFINE_string("data_dir", default="",
help="Path to tf-records directory.")
flags.DEFINE_string("record_info_dir", default="",
help="Path to local directory containing filenames.txt.")
flags.DEFINE_string("corpus_info_path", default="",
help="Path to corpus-info.json file.")
flags.DEFINE_string("model_dir", default=None,
help="Estimator model_dir.")
flags.DEFINE_bool("do_train", default=True,
help="Whether to run training.")
flags.DEFINE_bool("do_eval", default=False,
help="Whether to run eval on the dev set.")
flags.DEFINE_string("eval_ckpt_path", None,
help="Checkpoint path for do_test evaluation."
"If set, model_dir will be ignored."
"If unset, will use the latest ckpt in model_dir.")
flags.DEFINE_string("warm_start_path", None,
help="Checkpoint path for warm start."
"If set, will clear Adam states."
"Note that the new model_dir should be different"
" from warm_start_path.")
# Optimization config
flags.DEFINE_float("learning_rate", default=2.5e-4,
help="Maximum learning rate.")
flags.DEFINE_float("clip", default=0.25,
help="Gradient clipping value.")
# for cosine decay
flags.DEFINE_float("min_lr_ratio", default=0.004,
help="Minimum ratio learning rate.")
flags.DEFINE_integer("warmup_steps", default=0,
help="Number of steps for linear lr warmup.")
# Training config
flags.DEFINE_integer("train_batch_size", default=60,
help="Size of train batch.")
flags.DEFINE_integer("eval_batch_size", default=60,
help="Size of valid batch.")
flags.DEFINE_integer("train_steps", default=100000,
help="Total number of training steps.")
flags.DEFINE_integer("iterations", default=500,
help="Number of iterations per repeat loop.")
flags.DEFINE_integer("save_steps", default=10000,
help="number of steps for model checkpointing.")
# Evaluation config
flags.DEFINE_bool("do_test", default=False,
help="Run on the test set.")
flags.DEFINE_integer("max_eval_batch", default=-1,
help="Set -1 to turn off. Only used in test mode.")
flags.DEFINE_bool("do_eval_only", default=False,
help="Run evaluation only.")
flags.DEFINE_integer("start_eval_steps", default=10000,
help="Which checkpoint to start with in `do_eval_only` mode.")
flags.DEFINE_string("eval_split", "valid",
help="Which data split to evaluate.")
# Model config
flags.DEFINE_integer("tgt_len", default=70,
help="Number of steps to predict")
flags.DEFINE_integer("mem_len", default=70,
help="Number of steps to cache")
flags.DEFINE_bool("same_length", default=False,
help="Same length attention")
flags.DEFINE_integer("clamp_len", default=-1,
help="Clamp length")
flags.DEFINE_integer("n_layer", default=6,
help="Number of layers.")
flags.DEFINE_integer("d_model", default=500,
help="Dimension of the model.")
flags.DEFINE_integer("d_embed", default=500,
help="Dimension of the embeddings.")
flags.DEFINE_integer("n_head", default=10,
help="Number of attention heads.")
flags.DEFINE_integer("d_head", default=50,
help="Dimension of each attention head.")
flags.DEFINE_integer("d_inner", default=1000,
help="Dimension of inner hidden size in positionwise feed-forward.")
flags.DEFINE_float("dropout", default=0.1,
help="Dropout rate.")
flags.DEFINE_float("dropatt", default=0.1,
help="Attention dropout rate.")
flags.DEFINE_bool("untie_r", default=False,
help="untie r_w_bias and r_r_bias")
# Adaptive Softmax / Embedding
flags.DEFINE_bool("tie_weight", default=True,
help="Tie embedding and softmax weight.")
flags.DEFINE_integer("div_val", default=1,
help="Divide the embedding size by this val for each bin")
flags.DEFINE_bool("proj_share_all_but_first", default=False,
help="True to share all but first projs, False not to share.")
flags.DEFINE_bool("proj_same_dim", default=True,
help="Project the bin with the same dimension.")
# Parameter initialization
flags.DEFINE_enum("init", default="normal",
enum_values=["normal", "uniform"],
help="Initialization method.")
flags.DEFINE_float("init_std", default=0.02,
help="Initialization std when init is normal.")
flags.DEFINE_float("proj_init_std", default=0.01,
help="Initialization std for embedding projection.")
flags.DEFINE_float("init_range", default=0.1,
help="Initialization std when init is uniform.")
FLAGS = flags.FLAGS
def get_model_fn(n_token, cutoffs):
def model_fn(inp, tgt, mems, is_training):
inp = tf.transpose(inp, [1, 0])
tgt = tf.transpose(tgt, [1, 0])
if FLAGS.init == "uniform":
initializer = tf.initializers.random_uniform(
minval=-FLAGS.init_range,
maxval=FLAGS.init_range,
seed=None)
elif FLAGS.init == "normal":
initializer = tf.initializers.random_normal(
stddev=FLAGS.init_std,
seed=None)
proj_initializer = tf.initializers.random_normal(
stddev=FLAGS.proj_init_std,
seed=None)
tie_projs = [False for _ in range(len(cutoffs) + 1)]
if FLAGS.proj_share_all_but_first:
for i in range(1, len(tie_projs)):
tie_projs[i] = True
loss, new_mems = model.transformer(
dec_inp=inp,
target=tgt,
mems=mems,
n_token=n_token,
n_layer=FLAGS.n_layer,
d_model=FLAGS.d_model,
d_embed=FLAGS.d_embed,
n_head=FLAGS.n_head,
d_head=FLAGS.d_head,
d_inner=FLAGS.d_inner,
dropout=FLAGS.dropout,
dropatt=FLAGS.dropatt,
initializer=initializer,
proj_initializer=proj_initializer,
is_training=is_training,
mem_len=FLAGS.mem_len,
cutoffs=cutoffs,
div_val=FLAGS.div_val,
tie_projs=tie_projs,
input_perms=None,
target_perms=None,
head_target=None,
same_length=FLAGS.same_length,
clamp_len=FLAGS.clamp_len,
use_tpu=False,
untie_r=FLAGS.untie_r,
proj_same_dim=FLAGS.proj_same_dim)
# number of parameters
num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
tf.logging.info('#params: {}'.format(num_params))
# format_str = '{{:<{0}s}}\t{{}}'.format(
# max([len(v.name) for v in tf.trainable_variables()]))
# for v in tf.trainable_variables():
# tf.logging.info(format_str.format(v.name, v.get_shape()))
if is_training:
all_vars = tf.trainable_variables()
grads = tf.gradients(loss, all_vars)
grads_and_vars = list(zip(grads, all_vars))
return loss, new_mems, grads_and_vars
else:
return loss, new_mems
return model_fn
def single_core_graph(n_token, cutoffs, is_training, inp, tgt, mems):
model_fn = get_model_fn(
n_token=n_token,
cutoffs=cutoffs)
model_ret = model_fn(
inp=inp,
tgt=tgt,
mems=mems,
is_training=is_training)
return model_ret
def train(n_token, cutoffs, ps_device):
##### Get input function and model function
tf.logging.set_verbosity(tf.logging.INFO)
train_input_fn, train_record_info = data_utils.get_input_fn(
record_info_dir=FLAGS.record_info_dir,
split="train",
per_host_bsz=FLAGS.train_batch_size,
tgt_len=FLAGS.tgt_len,
num_core_per_host=FLAGS.num_core_per_host,
num_hosts=1,
use_tpu=False)
tf.logging.info("num of batches {}".format(train_record_info["num_batch"]))
##### Create computational graph
train_set = train_input_fn({
"batch_size": FLAGS.train_batch_size,
"data_dir": FLAGS.data_dir})
input_feed, label_feed = train_set.make_one_shot_iterator().get_next()
inputs = tf.split(input_feed, FLAGS.num_core_per_host, 0)
labels = tf.split(label_feed, FLAGS.num_core_per_host, 0)
per_core_bsz = FLAGS.train_batch_size // FLAGS.num_core_per_host
tower_mems, tower_losses, tower_new_mems, tower_grads_and_vars = [], [], [], []
for i in range(FLAGS.num_core_per_host):
reuse = True if i > 0 else None
with tf.device(assign_to_gpu(i, ps_device)), \
tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
mems_i = [tf.placeholder(tf.float32,
[FLAGS.mem_len, per_core_bsz, FLAGS.d_model])
for _ in range(FLAGS.n_layer)]
loss_i, new_mems_i, grads_and_vars_i = single_core_graph(
n_token=n_token,
cutoffs=cutoffs,
is_training=True,
inp=inputs[i],
tgt=labels[i],
mems=mems_i)
tower_mems.append(mems_i)
tower_losses.append(loss_i)
tower_new_mems.append(new_mems_i)
tower_grads_and_vars.append(grads_and_vars_i)
## average losses and gradients across towers
if len(tower_losses) > 1:
loss = tf.add_n(tower_losses) / len(tower_losses)
grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
else:
loss = tower_losses[0]
grads_and_vars = tower_grads_and_vars[0]
grads, all_vars = zip(*grads_and_vars)
## clip gradient
clipped, gnorm = tf.clip_by_global_norm(grads, FLAGS.clip)
grads_and_vars = list(zip(clipped, all_vars))
## configure the optimizer
global_step = tf.train.get_or_create_global_step()
# warmup stage: increase the learning rate linearly
if FLAGS.warmup_steps > 0:
warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
* FLAGS.learning_rate
else:
warmup_lr = 0.0
# decay stage: decay the learning rate using the cosine schedule
decay_lr = tf.train.cosine_decay(
FLAGS.learning_rate,
global_step=global_step-FLAGS.warmup_steps,
decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
alpha=FLAGS.min_lr_ratio)
# choose warmup or decay
learning_rate = tf.where(global_step < FLAGS.warmup_steps,
warmup_lr, decay_lr)
# get the train op
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.apply_gradients(grads_and_vars, global_step)
##### Training loop
tower_mems_np = [
[np.zeros([FLAGS.mem_len, per_core_bsz, FLAGS.d_model], dtype=np.float32)
for layer in range(FLAGS.n_layer)]
for core in range(FLAGS.num_core_per_host)
]
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
#改《
profiler = model_analyzer.Profiler(graph=sess.graph)
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
#》
sess.run(tf.global_variables_initializer())
if FLAGS.warm_start_path is not None:
tf.logging.info("warm start from {}".format(FLAGS.warm_start_path))
saver.restore(sess, FLAGS.warm_start_path)
fetches = [loss, tower_new_mems, global_step, gnorm, learning_rate, train_op]
total_loss, prev_step = 0., -1
while True:
feed_dict = {}
for i in range(FLAGS.num_core_per_host):
for m, m_np in zip(tower_mems[i], tower_mems_np[i]):
feed_dict[m] = m_np
#改
fetched = sess.run(fetches, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata)
loss_np, tower_mems_np, curr_step = fetched[:3]
total_loss += loss_np
#改<
profiler.add_step(step=curr_step, run_meta=run_metadata)
#>
#改<
if curr_step==0:
start_time=time.time()
if curr_step > 0:
end_time=time.time()
global_step_s=1/(end_time-start_time)
start_time=end_time
tf.logging.info("global_step/sec: {:.6f} , step= {}".format(global_step_s,curr_step))
#tf.logging.info("examples/sec : {}".format(global_step_s * FLAGS.train_batch_size))
#>
if curr_step > 0 and curr_step % FLAGS.iterations == 0:
curr_loss = total_loss / (curr_step - prev_step)
tf.logging.info("[{}] | gnorm {:.2f} lr {:8.6f} "
"| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
curr_step, fetched[-3], fetched[-2],
curr_loss, math.exp(curr_loss), curr_loss / math.log(2)))
total_loss, prev_step = 0., curr_step
if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
saver.save(sess, save_path)
tf.logging.info("Model saved in path: {}".format(save_path))
if curr_step == FLAGS.train_steps:
break
#改
profile_op_opt_builder = option_builder.ProfileOptionBuilder()
profile_op_opt_builder.select(['micros', 'occurrence'])
profile_op_opt_builder.order_by('occurrence')
profile_op_opt_builder.with_max_depth(10)
#
profile_op_opt_builder.with_step(5)
#将结果打印到文件
profile_op_opt_builder.with_file_output("./prof.txt")
#
profile_op_opt_builder.with_timeline_output("./prof.json")
# 显示视图为op view
profiler.profile_operations(profile_op_opt_builder.build())
def evaluate(n_token, cutoffs, ps_device):
##### Get input function and model function
eval_input_fn, eval_record_info = data_utils.get_input_fn(
record_info_dir=FLAGS.record_info_dir,
split=FLAGS.eval_split,
per_host_bsz=FLAGS.eval_batch_size,
tgt_len=FLAGS.tgt_len,
num_core_per_host=FLAGS.num_core_per_host,
num_hosts=1,
use_tpu=False)
num_batch = eval_record_info["num_batch"]
if FLAGS.max_eval_batch > 0:
num_batch = FLAGS.max_eval_batch
tf.logging.info("num of batches {}".format(num_batch))
##### Create computational graph
eval_set = eval_input_fn({
"batch_size": FLAGS.eval_batch_size,
"data_dir": FLAGS.data_dir})
input_feed, label_feed = eval_set.make_one_shot_iterator().get_next()
inputs = tf.split(input_feed, FLAGS.num_core_per_host, 0)
labels = tf.split(label_feed, FLAGS.num_core_per_host, 0)
per_core_bsz = FLAGS.eval_batch_size // FLAGS.num_core_per_host
tower_mems, tower_losses, tower_new_mems = [], [], []
for i in range(FLAGS.num_core_per_host):
with tf.device(assign_to_gpu(i, ps_device)), \
tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
mems_i = [tf.placeholder(tf.float32,
[FLAGS.mem_len, per_core_bsz, FLAGS.d_model])
for _ in range(FLAGS.n_layer)]
loss_i, new_mems_i = single_core_graph(
n_token=n_token,
cutoffs=cutoffs,
is_training=False,
inp=inputs[i],
tgt=labels[i],
mems=mems_i)
tower_mems.append(mems_i)
tower_losses.append(loss_i)
tower_new_mems.append(new_mems_i)
## sum losses across towers
if len(tower_losses) > 1:
loss = tf.add_n(tower_losses) / len(tower_losses)
else:
loss = tower_losses[0]
##### Evaluation loop
tower_mems_np = [
[np.zeros([FLAGS.mem_len, per_core_bsz, FLAGS.d_model], dtype=np.float32)
for layer in range(FLAGS.n_layer)]
for core in range(FLAGS.num_core_per_host)
]
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
sess.run(tf.global_variables_initializer())
if FLAGS.eval_ckpt_path is None:
eval_ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir)
else:
eval_ckpt_path = FLAGS.eval_ckpt_path
tf.logging.info("Evaluate {}".format(eval_ckpt_path))
saver.restore(sess, eval_ckpt_path)
fetches = [loss, tower_new_mems, tf.size(label_feed)]
format_str = " >> processing batch {{:{0}d}}/{{:{0}d}} ..".format(
len(str(num_batch)))
total_loss, total_cnt = 0, 0
for step in range(num_batch):
if step % (num_batch // 10) == 0:
tf.logging.info(format_str.format(step, num_batch))
feed_dict = {}
for i in range(FLAGS.num_core_per_host):
for m, m_np in zip(tower_mems[i], tower_mems_np[i]):
feed_dict[m] = m_np
fetched = sess.run(fetches, feed_dict=feed_dict)
loss_np, tower_mems_np, cnt_np = fetched[:3]
total_loss += loss_np * cnt_np
total_cnt += cnt_np
avg_loss = total_loss / total_cnt
tf.logging.info("| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
avg_loss, math.exp(avg_loss), avg_loss / math.log(2)))
def main(unused_argv):
del unused_argv # Unused
tf.logging.set_verbosity(tf.logging.INFO)
# Get corpus info
corpus_info = data_utils.get_corpus_info(FLAGS.corpus_info_path)
n_token = corpus_info["vocab_size"]
cutoffs = corpus_info["cutoffs"][1:-1]
tf.logging.info("n_token {}".format(n_token))
if FLAGS.do_train:
tf.logging.set_verbosity(tf.logging.INFO)
train(n_token, cutoffs, "/gpu:0")
if FLAGS.do_eval:
evaluate(n_token, cutoffs, "/gpu:0")
if __name__ == "__main__":
tf.app.run()
#!/bin/bash
'''export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_ENABLE_LOGGING_CMD=1
export ROCBLAS_LAYER=3
module unload compiler/rocm/2.9
echo "MIOPEN_FIND_MODE=$MIOPEN_FIND_MODE"
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank=$OMPI_COMM_WORLD_RANK
comm_size=$OMPI_COMM_WORLD_SIZE
#NCCL_DEBUG=INFO
'''
# Data
#DATA_ROOT=../data/enwik8/
DATA_ROOT=/work/home/hepj/tf1/transformer-xl-master/data/enwik8/
MODEL_DIR=./EXP-enwik8_1_test
# Model
N_LAYER=12
D_MODEL=512
D_EMBED=512
N_HEAD=8
D_HEAD=64
D_INNER=2048
# Training
TGT_LEN=512
MEM_LEN=512
TRAIN_STEPS=14483
BSZ=12 #12
NUM_CORE=1
# Testing
TEST_TGT_LEN=80
TEST_MEM_LEN=2100
TEST_CLAMP_LEN=820
TEST_BSZ=10
TEST_NUM_CORE=1
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${BSZ} \
--per_host_valid_bsz=${BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train_gpu_test.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=${MODEL_DIR} \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.1 \
--dropatt=0.0 \
--learning_rate=0.00025 \
--warmup_steps=0 \
--train_steps=${TRAIN_STEPS} \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${BSZ} \
--num_core_per_host=${NUM_CORE} \
--iterations=200 \
--save_steps=4000 \
--do_train=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-enwik8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.0 \
--dropatt=0.0 \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--do_train=False \
--do_eval=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
# Data
#DATA_ROOT=../data/enwik8/
DATA_ROOT=/work/home/hepj/tf1/transformer-xl-master/data/enwik8/
# Model
N_LAYER=12
D_MODEL=512
D_EMBED=512
N_HEAD=8
D_HEAD=64
D_INNER=2048
# Training
TGT_LEN=512
MEM_LEN=512
TRAIN_STEPS=14483 # 7242 #从数据读取信息可以知道数据有这么多个batch,因此跑这么多step
#测试使用12
BSZ=12
NUM_CORE=4
# Testing
TEST_TGT_LEN=80
TEST_MEM_LEN=2100
TEST_CLAMP_LEN=820
TEST_BSZ=10
TEST_NUM_CORE=1
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${BSZ} \
--per_host_valid_bsz=${BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
#改
#python train_gpu.py \
python train_gpu_test.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-enwik8_4_new_bs12 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.1 \
--dropatt=0.0 \
--learning_rate=0.00025 \
--warmup_steps=0 \
--train_steps=${TRAIN_STEPS} \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${BSZ} \
--num_core_per_host=${NUM_CORE} \
--iterations=200 \
--save_steps=4000 \
--do_train=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-enwik8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.0 \
--dropatt=0.0 \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--do_train=False \
--do_eval=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
'''export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_ENABLE_LOGGING_CMD=1
export ROCBLAS_LAYER=3
module unload compiler/rocm/2.9
echo "MIOPEN_FIND_MODE=$MIOPEN_FIND_MODE"
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank=$OMPI_COMM_WORLD_RANK
comm_size=$OMPI_COMM_WORLD_SIZE
#NCCL_DEBUG=INFO
'''
export HIP_VISIBLE_DEVICES=0
# Data
#DATA_ROOT=../data/enwik8/
DATA_ROOT=/public/home/hepj/SothisAI/transformer-xl-master/data/text8
# Model
N_LAYER=12
D_MODEL=512
D_EMBED=512
N_HEAD=8
D_HEAD=64
D_INNER=2048
# Training
TGT_LEN=512
MEM_LEN=512
TRAIN_STEPS=14483
BSZ=12
NUM_CORE=1
# Testing
TEST_TGT_LEN=80
TEST_MEM_LEN=2100
TEST_CLAMP_LEN=820
TEST_BSZ=10
TEST_NUM_CORE=1
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${BSZ} \
--per_host_valid_bsz=${BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train_gpu_test.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-enwik8_test \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.1 \
--dropatt=0.0 \
--learning_rate=0.00025 \
--warmup_steps=0 \
--train_steps=${TRAIN_STEPS} \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${BSZ} \
--num_core_per_host=${NUM_CORE} \
--iterations=200 \
--save_steps=4000 \
--do_train=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-enwik8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.0 \
--dropatt=0.0 \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--do_train=False \
--do_eval=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
# Path
LOCAL_DIR=../data/enwik8/
GSDATA=
GSEXP=
# TPU setting
NUM_HOST=2
NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
TEST_NUM_HOST=1
TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
# Model
N_LAYER=24
D_MODEL=1024
D_EMBED=1024
N_HEAD=8
D_HEAD=128
D_INNER=3072
# Training
TGT_LEN=768
MEM_LEN=768
TRAIN_BSZ=64
VALID_BSZ=64
# Testing
TEST_TGT_LEN=128
TEST_MEM_LEN=3800
TEST_CLAMP_LEN=1000
TEST_BSZ=16
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=enwik8 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${TRAIN_BSZ} \
--per_host_valid_bsz=${VALID_BSZ} \
--num_core_per_host=${NUM_CORE} \
--num_passes=10 \
--use_tpu=True \
${@:2}
SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/enwik8-tfrecords/
SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/enwik8-tfrecords/
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=enwik8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--num_passes=1 \
--use_tpu=True \
${@:2}
SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/enwik8-tfrecords/
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train.py \
--data_dir=${GSDATA}/enwik8-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/enwik8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.15 \
--dropatt=0.15 \
--learning_rate=0.00025 \
--warmup_steps=4000 \
--train_steps=400000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${TRAIN_BSZ} \
--use_tpu=True \
--num_host=${NUM_HOST} \
--num_core_per_host=${NUM_CORE} \
--iterations=1000 \
--save_steps=10000 \
--do_train=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train.py \
--data_dir=${GSDATA}/enwik8-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/enwik8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--eval_batch_size=${TEST_BSZ} \
--num_host=${TEST_NUM_HOST} \
--num_core_per_host=${TEST_NUM_CORE} \
--use_tpu=True \
--do_train=False \
--do_eval_only=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
# Data
DATA_ROOT=../data/one-billion-words/
# Model
DIV_VAL=4
N_LAYER=18
D_MODEL=1024
D_EMBED=1024
N_HEAD=8
D_HEAD=128
D_INNER=4096
# Training
TGT_LEN=256
MEM_LEN=256
BSZ=256
NUM_CORE=4
# Testing
TEST_TGT_LEN=32
TEST_MEM_LEN=128
TEST_CLAMP_LEN=-1
TEST_BSZ=16
TEST_NUM_CORE=1
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=lm1b \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${BSZ} \
--per_host_valid_bsz=${BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=lm1b \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-lm1b \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=False \
--proj_same_dim=False \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.1 \
--dropatt=0.0 \
--learning_rate=0.00025 \
--warmup_steps=0 \
--train_steps=400000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${BSZ} \
--num_core_per_host=${NUM_CORE} \
--iterations=200 \
--save_steps=4000 \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-lm1b \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=False \
--proj_same_dim=False \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.0 \
--dropatt=0.0 \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--do_train=False \
--do_eval=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
# Path
LOCAL_DIR=../data/one-billion-words/
GSDATA=
GSEXP=
# TPU setting
NUM_HOST=32
NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
TEST_NUM_HOST=1
TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
# Model
DIV_VAL=4
N_LAYER=24
D_MODEL=1280
D_EMBED=1280
N_HEAD=16
D_HEAD=80
D_INNER=8192
# Training
TGT_LEN=32
MEM_LEN=32
TRAIN_BSZ=512
VALID_BSZ=512
TRAIN_BSZ_PER_HOST=$((TRAIN_BSZ / NUM_HOST))
VALID_BSZ_PER_HOST=$((VALID_BSZ / NUM_HOST))
# Testing
TEST_TGT_LEN=32
TEST_MEM_LEN=128
TEST_CLAMP_LEN=-1
TEST_BSZ=8
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=lm1b \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${TRAIN_BSZ_PER_HOST} \
--per_host_valid_bsz=${VALID_BSZ_PER_HOST} \
--num_core_per_host=${NUM_CORE} \
--num_passes=10 \
--use_tpu=True \
${@:2}
SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/lm1b-tfrecords/
SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/lm1b-tfrecords/
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=lm1b \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--num_passes=1 \
--use_tpu=True \
${@:2}
SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/lm1b-tfrecords/
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train.py \
--data_dir=${GSDATA}/lm1b-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/lm1b \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=False \
--proj_same_dim=False \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.05 \
--dropatt=0.05 \
--init_std=0.005 \
--learning_rate=0.0001 \
--warmup_steps=30000 \
--train_steps=1200000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${TRAIN_BSZ} \
--num_hosts=${NUM_HOST} \
--num_core_per_host=${NUM_CORE} \
--iterations=1000 \
--save_steps=10000 \
--use_tpu=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train.py \
--data_dir=${GSDATA}/lm1b-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/lm1b \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=False \
--proj_same_dim=False \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_host=${TEST_NUM_HOST} \
--num_core_per_host=${TEST_NUM_CORE} \
--use_tpu=True \
--do_train=False \
--do_eval_only=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
# Data
DATA_ROOT=../data/text8/
# Model
N_LAYER=12
D_MODEL=512
D_EMBED=512
N_HEAD=8
D_HEAD=64
D_INNER=2048
# Training
TGT_LEN=512
MEM_LEN=512
BSZ=24
NUM_CORE=4
# Testing
TEST_TGT_LEN=80
TEST_MEM_LEN=2100
TEST_CLAMP_LEN=820
TEST_BSZ=10
TEST_NUM_CORE=1
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=text8 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${BSZ} \
--per_host_valid_bsz=${BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=text8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-text8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.1 \
--dropatt=0.0 \
--learning_rate=0.00025 \
--warmup_steps=0 \
--train_steps=400000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${BSZ} \
--num_core_per_host=${NUM_CORE} \
--iterations=200 \
--save_steps=4000 \
--do_train=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-text8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.0 \
--dropatt=0.0 \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--do_train=False \
--do_eval=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
\ No newline at end of file
#!/bin/bash
# Path
LOCAL_DIR=../data/text8/
GSDATA=
GSEXP=
# TPU setting
NUM_HOST=2
NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
TEST_NUM_HOST=1
TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
# Model
N_LAYER=24
D_MODEL=1024
D_EMBED=1024
N_HEAD=8
D_HEAD=128
D_INNER=3072
# Training
TGT_LEN=768
MEM_LEN=768
TRAIN_BSZ=64
VALID_BSZ=64
# Testing
TEST_TGT_LEN=128
TEST_MEM_LEN=3800
TEST_CLAMP_LEN=1000
TEST_BSZ=16
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=text8 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${TRAIN_BSZ} \
--per_host_valid_bsz=${VALID_BSZ} \
--num_core_per_host=${NUM_CORE} \
--num_passes=10 \
--use_tpu=True \
${@:2}
SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/text8-tfrecords/
SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/text8-tfrecords/
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=text8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--num_passes=1 \
--use_tpu=True \
${@:2}
SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/text8-tfrecords/
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train.py \
--data_dir=${GSDATA}/text8-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/text8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.15 \
--dropatt=0.15 \
--learning_rate=0.00025 \
--warmup_steps=4000 \
--train_steps=400000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${TRAIN_BSZ} \
--use_tpu=True \
--num_host=${NUM_HOST} \
--num_core_per_host=${NUM_CORE} \
--iterations=1000 \
--save_steps=10000 \
--do_train=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train.py \
--data_dir=${GSDATA}/text8-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/text8 \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--eval_batch_size=${TEST_BSZ} \
--num_host=${TEST_NUM_HOST} \
--num_core_per_host=${TEST_NUM_CORE} \
--use_tpu=True \
--do_train=False \
--do_eval_only=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
# Data
DATA_ROOT=../data/wikitext-103/
# Model
DIV_VAL=1
N_LAYER=16
D_MODEL=410
D_EMBED=410
N_HEAD=10
D_HEAD=41
D_INNER=2100
# Training
TGT_LEN=150
MEM_LEN=150
BSZ=60
NUM_CORE=4
# Testing
TEST_TGT_LEN=64
TEST_MEM_LEN=640
TEST_CLAMP_LEN=400
TEST_BSZ=10
TEST_NUM_CORE=1
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=wt103 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${BSZ} \
--per_host_valid_bsz=${BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${DATA_ROOT}/ \
--dataset=enwik8 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_passes=1 \
--use_tpu=False \
${@:2}
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-wt103 \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=True \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.1 \
--dropatt=0.0 \
--learning_rate=0.00025 \
--warmup_steps=0 \
--train_steps=400000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${BSZ} \
--num_core_per_host=${NUM_CORE} \
--iterations=200 \
--save_steps=4000 \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train_gpu.py \
--data_dir=${DATA_ROOT}/tfrecords \
--record_info_dir=${DATA_ROOT}/tfrecords/ \
--corpus_info_path=${DATA_ROOT}/corpus-info.json \
--model_dir=EXP-wt103 \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=True \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.0 \
--dropatt=0.0 \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--do_train=False \
--do_eval=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
\ No newline at end of file
#!/bin/bash
# Path
LOCAL_DIR=../data/wikitext-103/
GSDATA=
GSEXP=
# TPU setting
NUM_HOST=4
NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
TEST_NUM_HOST=1
TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
# Model
DIV_VAL=4
N_LAYER=18
D_MODEL=1024
D_EMBED=1024
N_HEAD=16
D_HEAD=64
D_INNER=4096
# Training
TGT_LEN=384
MEM_LEN=384
TRAIN_BSZ=128
VALID_BSZ=128
# Testing
TEST_TGT_LEN=128
TEST_MEM_LEN=1600
TEST_CLAMP_LEN=1000
TEST_BSZ=8
if [[ $1 == 'train_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=wt103 \
--tgt_len=${TGT_LEN} \
--per_host_train_bsz=${TRAIN_BSZ} \
--per_host_valid_bsz=${VALID_BSZ} \
--num_core_per_host=${NUM_CORE} \
--num_passes=10 \
--use_tpu=True \
${@:2}
SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/wt103-tfrecords/
SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/wt103-tfrecords/
elif [[ $1 == 'test_data' ]]; then
python data_utils.py \
--data_dir=${LOCAL_DIR}/ \
--dataset=wt103 \
--tgt_len=${TEST_TGT_LEN} \
--per_host_test_bsz=${TEST_BSZ} \
--num_core_per_host=${TEST_NUM_CORE} \
--num_passes=1 \
--use_tpu=True \
${@:2}
SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/wt103-tfrecords/
elif [[ $1 == 'train' ]]; then
echo 'Run training...'
python train.py \
--data_dir=${GSDATA}/wt103-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/wt103 \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=True \
--proj_same_dim=True \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--dropout=0.2 \
--dropatt=0.2 \
--init_std=0.005 \
--learning_rate=0.00025 \
--warmup_steps=16000 \
--train_steps=4000000 \
--tgt_len=${TGT_LEN} \
--mem_len=${MEM_LEN} \
--train_batch_size=${TRAIN_BSZ} \
--num_hosts=${NUM_HOST} \
--num_core_per_host=${NUM_CORE} \
--iterations=1000 \
--save_steps=10000 \
--use_tpu=True \
--do_eval=False \
${@:2}
elif [[ $1 == 'eval' ]]; then
echo 'Run evaluation...'
python train.py \
--data_dir=${GSDATA}/wt103-tfrecords \
--record_info_dir=${LOCAL_DIR}/tfrecords/ \
--corpus_info_path=${LOCAL_DIR}/corpus-info.json \
--model_dir=${GSEXP}/wt103 \
--div_val=${DIV_VAL} \
--untie_r=True \
--proj_share_all_but_first=True \
--proj_same_dim=True \
--n_layer=${N_LAYER} \
--d_model=${D_MODEL} \
--d_embed=${D_EMBED} \
--n_head=${N_HEAD} \
--d_head=${D_HEAD} \
--d_inner=${D_INNER} \
--tgt_len=${TEST_TGT_LEN} \
--mem_len=${TEST_MEM_LEN} \
--clamp_len=${TEST_CLAMP_LEN} \
--same_length=True \
--eval_batch_size=${TEST_BSZ} \
--num_host=${TEST_NUM_HOST} \
--num_core_per_host=${TEST_NUM_CORE} \
--use_tpu=True \
--do_train=False \
--do_eval_only=True \
--eval_split=test \
${@:2}
else
echo 'unknown argment 1'
fi
#!/bin/bash
URL=http://curtis.ml.cmu.edu/datasets/pretrained_xl
DATA_ROOT=./
function download () {
fileurl=${1}
filename=${fileurl##*/}
if [ ! -f ${filename} ]; then
echo ">>> Download '${filename}' from '${fileurl}'."
wget --quiet ${fileurl}
else
echo "*** File '${filename}' exists. Skip."
fi
}
cd $DATA_ROOT
mkdir -p pretrained_xl && cd pretrained_xl
# enwik8
mkdir -p tf_enwik8 && cd tf_enwik8
mkdir -p data && cd data
download ${URL}/tf_enwiki8/data/cache.pkl
download ${URL}/tf_enwiki8/data/corpus-info.json
cd ..
mkdir -p model && cd model
download ${URL}/tf_enwiki8/model/checkpoint
download ${URL}/tf_enwiki8/model/model.ckpt-0.data-00000-of-00001
download ${URL}/tf_enwiki8/model/model.ckpt-0.index
download ${URL}/tf_enwiki8/model/model.ckpt-0.meta
cd ..
cd ..
# text8
mkdir -p tf_text8 && cd tf_text8
mkdir -p data && cd data
download ${URL}/tf_text8/data/cache.pkl
download ${URL}/tf_text8/data/corpus-info.json
cd ..
mkdir -p model && cd model
download ${URL}/tf_text8/model/checkpoint
download ${URL}/tf_text8/model/model.ckpt-0.data-00000-of-00001
download ${URL}/tf_text8/model/model.ckpt-0.index
download ${URL}/tf_text8/model/model.ckpt-0.meta
cd ..
cd ..
# wt103
mkdir -p tf_wt103 && cd tf_wt103
mkdir -p data && cd data
download ${URL}/tf_wt103/data/cache.pkl
download ${URL}/tf_wt103/data/corpus-info.json
cd ..
mkdir -p model && cd model
download ${URL}/tf_wt103/model/checkpoint
download ${URL}/tf_wt103/model/model.ckpt-0.data-00000-of-00001
download ${URL}/tf_wt103/model/model.ckpt-0.index
download ${URL}/tf_wt103/model/model.ckpt-0.meta
cd ..
cd ..
# lm1b
mkdir -p tf_lm1b && cd tf_lm1b
mkdir -p data && cd data
download ${URL}/tf_lm1b/data/cache.pkl
download ${URL}/tf_lm1b/data/corpus-info.json
cd ..
mkdir -p model && cd model
download ${URL}/tf_lm1b/model/checkpoint
download ${URL}/tf_lm1b/model/model.ckpt-1191000.data-00000-of-00001
download ${URL}/tf_lm1b/model/model.ckpt-1191000.index
download ${URL}/tf_lm1b/model/model.ckpt-1191000.meta
cd ..
cd ..
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment