Commit da3f0934 authored by zhuwenwen's avatar zhuwenwen
Browse files

delete unused files

parent c4dd1fd4
.. Colossal-AI documentation master file, created by
sphinx-quickstart on Mon Oct 11 17:05:05 2021.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Colossal-AI API documentation
======================================
.. toctree::
:maxdepth: 2
:caption: API REFERENCE
colossalai/colossalai
Indices and tables
==================
* :ref:`genindex`
\ No newline at end of file
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=.build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
tensorboard
deepspeed
apex
sphinx
sphinx-rtd-theme
myst-parser
\ No newline at end of file
Compiling cuda extensions with
HIP version: 4.3.22313-cccb3896
clang version 14.0.0 (http://10.8.150.239/dcutoolkit/driverruntime/llvm-project.git 458573e609dd35aac1fa72e6136853de2b7651c8)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/dtk-22.04.2/llvm/bin
from /opt/dtk-22.04.2/bin
torch.__version__ = 1.10.0a0+gitc7f69d6-dtk22042
nvcc was not found. CUDA extension will not be installed. If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.
/public/home/huchen/colossalAI/ColossalAI/MANIFEST.in -> /public/home/huchen/colossalAI/ColossalAI/MANIFEST.in ok
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/compat.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/type_shim.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax_hip.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_sgd_kernel.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_scale_kernel.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_adam.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_adam.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax_hip.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.cpp -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_lamb.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/type_shim.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/layer_norm_cuda_kernel.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip_kernel.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_l2norm_kernel.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/compat.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/cross_entropy.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/normalize_kernels.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/cuda_util.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/hip_util.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/dropout_kernels.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/cublas_wrappers.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/cublas_wrappers.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/transform_kernels.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/transform_kernels.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/softmax_kernels.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/general_kernels.hip skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/cublas_wrappers.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/strided_batch_gemm.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/kernels.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/cublas_wrappers.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/normalize_layer.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/context.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/ls_cub.cuh -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/cuda_native/csrc/kernels/include/kernels.h -> /public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h skipped
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h -> None ignored
Total number of unsupported CUDA function calls: 0
Total number of replaced kernel launches: 139
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_sgd_kernel.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_scale_kernel.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_adam.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_l2norm_kernel.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_lamb.hip -> None ignored
Total number of unsupported CUDA function calls: 0
Total number of replaced kernel launches: 0
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax_hip.hip -> None ignored
Total number of unsupported CUDA function calls: 0
Total number of replaced kernel launches: 0
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax_hip.hip -> None ignored
Total number of unsupported CUDA function calls: 0
Total number of replaced kernel launches: 0
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip_kernel.hip -> None ignored
Total number of unsupported CUDA function calls: 0
Total number of replaced kernel launches: 0
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/type_shim.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/compat.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_apply.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.cpp -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/strided_batch_gemm.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/feed_forward.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cublas_wrappers.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/dropout.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/normalize_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/block_reduce.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/context.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/ls_cub.cuh -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/cross_entropy_layer.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/hip_util.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/softmax.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/include/kernels.h -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/cublas_wrappers.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/transform_kernels.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/dropout_kernels.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/normalize_kernels.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/softmax_kernels.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/general_kernels.hip -> None ignored
/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/hip_util.hip -> None ignored
Total number of unsupported CUDA function calls: 0
Total number of replaced kernel launches: 0
running bdist_wheel
running build
running build_py
not copying model_zoo/helper.py (output up-to-date)
not copying model_zoo/__init__.py (output up-to-date)
not copying colossalai/global_variables.py (output up-to-date)
not copying colossalai/initialize.py (output up-to-date)
not copying colossalai/constants.py (output up-to-date)
not copying colossalai/__init__.py (output up-to-date)
not copying colossalai/core.py (output up-to-date)
not copying model_zoo/bert/__init__.py (output up-to-date)
not copying model_zoo/gpt/gpt.py (output up-to-date)
not copying model_zoo/gpt/__init__.py (output up-to-date)
not copying model_zoo/mlp_mixer/__init__.py (output up-to-date)
not copying model_zoo/moe/models.py (output up-to-date)
not copying model_zoo/moe/util.py (output up-to-date)
not copying model_zoo/moe/__init__.py (output up-to-date)
not copying model_zoo/vit/vit.py (output up-to-date)
not copying model_zoo/vit/vision_transformer_from_config.py (output up-to-date)
not copying model_zoo/vit/__init__.py (output up-to-date)
not copying model_zoo/mlp_mixer/parallel_3d/__init__.py (output up-to-date)
not copying model_zoo/mlp_mixer/parallel_3d/mlp_mixer.py (output up-to-date)
not copying colossalai/logging/logging.py (output up-to-date)
not copying colossalai/logging/__init__.py (output up-to-date)
not copying colossalai/amp/__init__.py (output up-to-date)
not copying colossalai/amp/amp_type.py (output up-to-date)
not copying colossalai/zero/zero_redundancy_optimizer_level_3.py (output up-to-date)
not copying colossalai/zero/zero_redundancy_optimizer_level_2.py (output up-to-date)
not copying colossalai/zero/__init__.py (output up-to-date)
not copying colossalai/zero/loss_scaler.py (output up-to-date)
not copying colossalai/engine/__init__.py (output up-to-date)
not copying colossalai/engine/_base_engine.py (output up-to-date)
not copying colossalai/trainer/_trainer.py (output up-to-date)
not copying colossalai/trainer/__init__.py (output up-to-date)
not copying colossalai/registry/__init__.py (output up-to-date)
not copying colossalai/registry/registry.py (output up-to-date)
not copying colossalai/utils/checkpointing.py (output up-to-date)
not copying colossalai/utils/activation_checkpoint.py (output up-to-date)
not copying colossalai/utils/timer.py (output up-to-date)
not copying colossalai/utils/common.py (output up-to-date)
not copying colossalai/utils/memory.py (output up-to-date)
not copying colossalai/utils/__init__.py (output up-to-date)
not copying colossalai/utils/cuda.py (output up-to-date)
not copying colossalai/nn/init.py (output up-to-date)
not copying colossalai/nn/__init__.py (output up-to-date)
not copying colossalai/builder/builder.py (output up-to-date)
not copying colossalai/builder/__init__.py (output up-to-date)
not copying colossalai/builder/pipeline.py (output up-to-date)
not copying colossalai/context/config.py (output up-to-date)
not copying colossalai/context/parallel_context.py (output up-to-date)
not copying colossalai/context/parallel_mode.py (output up-to-date)
not copying colossalai/context/__init__.py (output up-to-date)
not copying colossalai/communication/p2p.py (output up-to-date)
not copying colossalai/communication/utils.py (output up-to-date)
not copying colossalai/communication/collective.py (output up-to-date)
not copying colossalai/communication/__init__.py (output up-to-date)
not copying colossalai/communication/ring.py (output up-to-date)
not copying colossalai/kernel/__init__.py (output up-to-date)
not copying colossalai/amp/apex_amp/apex_amp.py (output up-to-date)
not copying colossalai/amp/apex_amp/__init__.py (output up-to-date)
not copying colossalai/amp/naive_amp/__init__.py (output up-to-date)
not copying colossalai/amp/naive_amp/_fp16_optimizer.py (output up-to-date)
not copying colossalai/amp/naive_amp/naive_amp.py (output up-to-date)
not copying colossalai/amp/torch_amp/torch_amp.py (output up-to-date)
not copying colossalai/amp/torch_amp/_grad_scaler.py (output up-to-date)
not copying colossalai/amp/torch_amp/__init__.py (output up-to-date)
not copying colossalai/engine/ophooks/_memtracer_ophook.py (output up-to-date)
not copying colossalai/engine/ophooks/_base_ophook.py (output up-to-date)
not copying colossalai/engine/ophooks/__init__.py (output up-to-date)
not copying colossalai/engine/schedule/_base_schedule.py (output up-to-date)
not copying colossalai/engine/schedule/_pipeline_schedule.py (output up-to-date)
not copying colossalai/engine/schedule/_non_pipeline_schedule.py (output up-to-date)
not copying colossalai/engine/schedule/__init__.py (output up-to-date)
not copying colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py (output up-to-date)
not copying colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py (output up-to-date)
not copying colossalai/engine/gradient_handler/_base_gradient_handler.py (output up-to-date)
not copying colossalai/engine/gradient_handler/_moe_gradient_handler.py (output up-to-date)
not copying colossalai/engine/gradient_handler/_zero_gradient_handler.py (output up-to-date)
not copying colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py (output up-to-date)
not copying colossalai/engine/gradient_handler/__init__.py (output up-to-date)
not copying colossalai/trainer/hooks/_log_hook.py (output up-to-date)
not copying colossalai/trainer/hooks/_checkpoint_hook.py (output up-to-date)
not copying colossalai/trainer/hooks/__init__.py (output up-to-date)
not copying colossalai/trainer/hooks/_base_hook.py (output up-to-date)
not copying colossalai/trainer/hooks/_metric_hook.py (output up-to-date)
not copying colossalai/trainer/hooks/_lr_scheduler_hook.py (output up-to-date)
not copying colossalai/utils/data_sampler/data_parallel_sampler.py (output up-to-date)
not copying colossalai/utils/data_sampler/base_sampler.py (output up-to-date)
not copying colossalai/utils/data_sampler/__init__.py (output up-to-date)
not copying colossalai/utils/multi_tensor_apply/multi_tensor_apply.py (output up-to-date)
not copying colossalai/utils/multi_tensor_apply/__init__.py (output up-to-date)
not copying colossalai/utils/gradient_accumulation/__init__.py (output up-to-date)
not copying colossalai/utils/gradient_accumulation/_gradient_accumulation.py (output up-to-date)
not copying colossalai/nn/optimizer/fused_adam.py (output up-to-date)
not copying colossalai/nn/optimizer/lars.py (output up-to-date)
not copying colossalai/nn/optimizer/fused_sgd.py (output up-to-date)
not copying colossalai/nn/optimizer/fused_lamb.py (output up-to-date)
not copying colossalai/nn/optimizer/__init__.py (output up-to-date)
not copying colossalai/nn/optimizer/lamb.py (output up-to-date)
not copying colossalai/nn/optimizer/colossalai_optimizer.py (output up-to-date)
not copying colossalai/nn/layer/base_layer.py (output up-to-date)
not copying colossalai/nn/layer/__init__.py (output up-to-date)
not copying colossalai/nn/model/model_from_config.py (output up-to-date)
not copying colossalai/nn/model/__init__.py (output up-to-date)
not copying colossalai/nn/metric/_utils.py (output up-to-date)
not copying colossalai/nn/metric/accuracy_3d.py (output up-to-date)
not copying colossalai/nn/metric/accuracy_2p5d.py (output up-to-date)
not copying colossalai/nn/metric/accuracy_2d.py (output up-to-date)
not copying colossalai/nn/metric/__init__.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/delayed.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/torch.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/cosine.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/multistep.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/onecycle.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/linear.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/__init__.py (output up-to-date)
not copying colossalai/nn/lr_scheduler/poly.py (output up-to-date)
not copying colossalai/nn/loss/loss_3d.py (output up-to-date)
not copying colossalai/nn/loss/loss_2d.py (output up-to-date)
not copying colossalai/nn/loss/__init__.py (output up-to-date)
not copying colossalai/nn/loss/loss_2p5d.py (output up-to-date)
not copying colossalai/nn/loss/loss_moe.py (output up-to-date)
not copying colossalai/nn/loss/loss_1d.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2d/_utils.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2d/layers.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2d/_operation.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2d/__init__.py (output up-to-date)
not copying colossalai/nn/layer/parallel_1d/_utils.py (output up-to-date)
not copying colossalai/nn/layer/parallel_1d/layers.py (output up-to-date)
not copying colossalai/nn/layer/parallel_1d/_operation.py (output up-to-date)
not copying colossalai/nn/layer/parallel_1d/__init__.py (output up-to-date)
not copying colossalai/nn/layer/colossalai_layer/_utils.py (output up-to-date)
not copying colossalai/nn/layer/colossalai_layer/embedding.py (output up-to-date)
not copying colossalai/nn/layer/colossalai_layer/linear.py (output up-to-date)
not copying colossalai/nn/layer/colossalai_layer/normalization.py (output up-to-date)
not copying colossalai/nn/layer/colossalai_layer/dropout.py (output up-to-date)
not copying colossalai/nn/layer/colossalai_layer/__init__.py (output up-to-date)
not copying colossalai/nn/layer/parallel_sequence/_utils.py (output up-to-date)
not copying colossalai/nn/layer/parallel_sequence/layers.py (output up-to-date)
not copying colossalai/nn/layer/parallel_sequence/_operation.py (output up-to-date)
not copying colossalai/nn/layer/parallel_sequence/__init__.py (output up-to-date)
not copying colossalai/nn/layer/vanilla/layers.py (output up-to-date)
not copying colossalai/nn/layer/vanilla/__init__.py (output up-to-date)
not copying colossalai/nn/layer/moe/layers.py (output up-to-date)
not copying colossalai/nn/layer/moe/_operation.py (output up-to-date)
not copying colossalai/nn/layer/moe/__init__.py (output up-to-date)
not copying colossalai/nn/layer/utils/common.py (output up-to-date)
not copying colossalai/nn/layer/utils/__init__.py (output up-to-date)
not copying colossalai/nn/layer/parallel_3d/_utils.py (output up-to-date)
not copying colossalai/nn/layer/parallel_3d/layers.py (output up-to-date)
not copying colossalai/nn/layer/parallel_3d/_operation.py (output up-to-date)
not copying colossalai/nn/layer/parallel_3d/__init__.py (output up-to-date)
not copying colossalai/nn/layer/wrapper/lambda_wrapper.py (output up-to-date)
not copying colossalai/nn/layer/wrapper/pipeline_wrapper.py (output up-to-date)
not copying colossalai/nn/layer/wrapper/__init__.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2p5d/_utils.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2p5d/layers.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2p5d/_operation.py (output up-to-date)
not copying colossalai/nn/layer/parallel_2p5d/__init__.py (output up-to-date)
not copying colossalai/context/random/_helper.py (output up-to-date)
not copying colossalai/context/random/seed_manager.py (output up-to-date)
not copying colossalai/context/random/__init__.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_2p5d.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_tensor.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_data.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_3d.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_sequence.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_moe.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_model.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_2d.py (output up-to-date)
not copying colossalai/context/process_group_initializer/__init__.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_1d.py (output up-to-date)
not copying colossalai/context/process_group_initializer/initializer_pipeline.py (output up-to-date)
not copying colossalai/context/process_group_initializer/process_group_initializer.py (output up-to-date)
not copying colossalai/kernel/cuda_native/scaled_softmax.py (output up-to-date)
not copying colossalai/kernel/cuda_native/layer_norm.py (output up-to-date)
not copying colossalai/kernel/cuda_native/__init__.py (output up-to-date)
not copying colossalai/kernel/cuda_native/multihead_attention.py (output up-to-date)
not copying colossalai/kernel/jit/bias_gelu.py (output up-to-date)
not copying colossalai/kernel/jit/option.py (output up-to-date)
not copying colossalai/kernel/jit/bias_dropout_add.py (output up-to-date)
not copying colossalai/kernel/jit/__init__.py (output up-to-date)
running build_ext
building 'colossal_C' extension
Emitting ninja build file /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/build.ninja...
Compiling objects...
Using envvar MAX_JOBS (32) as the number of workers...
Successfully preprocessed all matching files.
Successfully preprocessed all matching files.
Successfully preprocessed all matching files.
Successfully preprocessed all matching files.
Successfully preprocessed all matching files.
Successfully preprocessed all matching files.
ninja: no work to do.
g++ -pthread -shared /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_l2norm_kernel.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_sgd_kernel.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_scale_kernel.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/colossal_C_frontend.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_adam.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multi_tensor_lamb.o -L/usr/local/lib/python3.7/site-packages/torch/lib -L/opt/dtk-22.04.2/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lpython3.7m -o build/lib.linux-x86_64-3.7/colossal_C.cpython-37m-x86_64-linux-gnu.so
building 'colossal_scaled_upper_triang_masked_softmax' extension
Emitting ninja build file /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/build.ninja...
Compiling objects...
Using envvar MAX_JOBS (32) as the number of workers...
ninja: no work to do.
g++ -pthread -shared /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_upper_triang_masked_softmax_hip.o -L/usr/local/lib/python3.7/site-packages/torch/lib -L/opt/dtk-22.04.2/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lpython3.7m -o build/lib.linux-x86_64-3.7/colossal_scaled_upper_triang_masked_softmax.cpython-37m-x86_64-linux-gnu.so
building 'colossal_scaled_masked_softmax' extension
Emitting ninja build file /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/build.ninja...
Compiling objects...
Using envvar MAX_JOBS (32) as the number of workers...
ninja: no work to do.
g++ -pthread -shared /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/scaled_masked_softmax_hip.o -L/usr/local/lib/python3.7/site-packages/torch/lib -L/opt/dtk-22.04.2/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lpython3.7m -o build/lib.linux-x86_64-3.7/colossal_scaled_masked_softmax.cpython-37m-x86_64-linux-gnu.so
building 'colossal_layer_norm_cuda' extension
Emitting ninja build file /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/build.ninja...
Compiling objects...
Using envvar MAX_JOBS (32) as the number of workers...
ninja: no work to do.
g++ -pthread -shared /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/layer_norm_hip_kernel.o -L/usr/local/lib/python3.7/site-packages/torch/lib -L/opt/dtk-22.04.2/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lpython3.7m -o build/lib.linux-x86_64-3.7/colossal_layer_norm_cuda.cpython-37m-x86_64-linux-gnu.so
building 'colossal_multihead_attention' extension
Emitting ninja build file /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/build.ninja...
Compiling objects...
Using envvar MAX_JOBS (32) as the number of workers...
ninja: no work to do.
g++ -pthread -shared /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/softmax_kernels.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/cublas_wrappers.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/multihead_attention_1d.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/normalize_kernels.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/general_kernels.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/hip_util.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/transform_kernels.o /public/home/huchen/colossalAI/ColossalAI/build/temp.linux-x86_64-3.7/public/home/huchen/colossalAI/ColossalAI/colossalai/kernel/hip_native/csrc/kernels/dropout_kernels.o -L/usr/local/lib/python3.7/site-packages/torch/lib -L/opt/dtk-22.04.2/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lpython3.7m -o build/lib.linux-x86_64-3.7/colossal_multihead_attention.cpython-37m-x86_64-linux-gnu.so
installing to build/bdist.linux-x86_64/wheel
running install
running install_lib
creating build/bdist.linux-x86_64/wheel
copying build/lib.linux-x86_64-3.7/colossal_multihead_attention.cpython-37m-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/wheel
copying build/lib.linux-x86_64-3.7/colossal_layer_norm_cuda.cpython-37m-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/wheel
copying build/lib.linux-x86_64-3.7/colossal_scaled_masked_softmax.cpython-37m-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/wheel
copying build/lib.linux-x86_64-3.7/colossal_scaled_upper_triang_masked_softmax.cpython-37m-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/wheel
copying build/lib.linux-x86_64-3.7/colossal_C.cpython-37m-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/wheel
creating build/bdist.linux-x86_64/wheel/model_zoo
copying build/lib.linux-x86_64-3.7/model_zoo/helper.py -> build/bdist.linux-x86_64/wheel/model_zoo
creating build/bdist.linux-x86_64/wheel/model_zoo/bert
copying build/lib.linux-x86_64-3.7/model_zoo/bert/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo/bert
creating build/bdist.linux-x86_64/wheel/model_zoo/gpt
copying build/lib.linux-x86_64-3.7/model_zoo/gpt/gpt.py -> build/bdist.linux-x86_64/wheel/model_zoo/gpt
copying build/lib.linux-x86_64-3.7/model_zoo/gpt/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo/gpt
creating build/bdist.linux-x86_64/wheel/model_zoo/mlp_mixer
copying build/lib.linux-x86_64-3.7/model_zoo/mlp_mixer/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo/mlp_mixer
creating build/bdist.linux-x86_64/wheel/model_zoo/mlp_mixer/parallel_3d
copying build/lib.linux-x86_64-3.7/model_zoo/mlp_mixer/parallel_3d/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo/mlp_mixer/parallel_3d
copying build/lib.linux-x86_64-3.7/model_zoo/mlp_mixer/parallel_3d/mlp_mixer.py -> build/bdist.linux-x86_64/wheel/model_zoo/mlp_mixer/parallel_3d
creating build/bdist.linux-x86_64/wheel/model_zoo/moe
copying build/lib.linux-x86_64-3.7/model_zoo/moe/models.py -> build/bdist.linux-x86_64/wheel/model_zoo/moe
copying build/lib.linux-x86_64-3.7/model_zoo/moe/util.py -> build/bdist.linux-x86_64/wheel/model_zoo/moe
copying build/lib.linux-x86_64-3.7/model_zoo/moe/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo/moe
copying build/lib.linux-x86_64-3.7/model_zoo/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo
creating build/bdist.linux-x86_64/wheel/model_zoo/vit
copying build/lib.linux-x86_64-3.7/model_zoo/vit/vit.py -> build/bdist.linux-x86_64/wheel/model_zoo/vit
copying build/lib.linux-x86_64-3.7/model_zoo/vit/vision_transformer_from_config.py -> build/bdist.linux-x86_64/wheel/model_zoo/vit
copying build/lib.linux-x86_64-3.7/model_zoo/vit/__init__.py -> build/bdist.linux-x86_64/wheel/model_zoo/vit
creating build/bdist.linux-x86_64/wheel/colossalai
creating build/bdist.linux-x86_64/wheel/colossalai/logging
copying build/lib.linux-x86_64-3.7/colossalai/logging/logging.py -> build/bdist.linux-x86_64/wheel/colossalai/logging
copying build/lib.linux-x86_64-3.7/colossalai/logging/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/logging
creating build/bdist.linux-x86_64/wheel/colossalai/amp
creating build/bdist.linux-x86_64/wheel/colossalai/amp/apex_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/apex_amp/apex_amp.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/apex_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/apex_amp/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/apex_amp
creating build/bdist.linux-x86_64/wheel/colossalai/amp/naive_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/naive_amp/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/naive_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/naive_amp/_fp16_optimizer.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/naive_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/naive_amp/naive_amp.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/naive_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/amp_type.py -> build/bdist.linux-x86_64/wheel/colossalai/amp
creating build/bdist.linux-x86_64/wheel/colossalai/amp/torch_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/torch_amp/torch_amp.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/torch_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/torch_amp/_grad_scaler.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/torch_amp
copying build/lib.linux-x86_64-3.7/colossalai/amp/torch_amp/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/amp/torch_amp
creating build/bdist.linux-x86_64/wheel/colossalai/zero
copying build/lib.linux-x86_64-3.7/colossalai/zero/zero_redundancy_optimizer_level_3.py -> build/bdist.linux-x86_64/wheel/colossalai/zero
copying build/lib.linux-x86_64-3.7/colossalai/zero/zero_redundancy_optimizer_level_2.py -> build/bdist.linux-x86_64/wheel/colossalai/zero
copying build/lib.linux-x86_64-3.7/colossalai/zero/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/zero
copying build/lib.linux-x86_64-3.7/colossalai/zero/loss_scaler.py -> build/bdist.linux-x86_64/wheel/colossalai/zero
copying build/lib.linux-x86_64-3.7/colossalai/global_variables.py -> build/bdist.linux-x86_64/wheel/colossalai
creating build/bdist.linux-x86_64/wheel/colossalai/engine
copying build/lib.linux-x86_64-3.7/colossalai/engine/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/engine
creating build/bdist.linux-x86_64/wheel/colossalai/engine/ophooks
copying build/lib.linux-x86_64-3.7/colossalai/engine/ophooks/_memtracer_ophook.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/ophooks
copying build/lib.linux-x86_64-3.7/colossalai/engine/ophooks/_base_ophook.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/ophooks
copying build/lib.linux-x86_64-3.7/colossalai/engine/ophooks/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/ophooks
creating build/bdist.linux-x86_64/wheel/colossalai/engine/schedule
copying build/lib.linux-x86_64-3.7/colossalai/engine/schedule/_base_schedule.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/schedule
copying build/lib.linux-x86_64-3.7/colossalai/engine/schedule/_pipeline_schedule.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/schedule
copying build/lib.linux-x86_64-3.7/colossalai/engine/schedule/_non_pipeline_schedule.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/schedule
copying build/lib.linux-x86_64-3.7/colossalai/engine/schedule/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/schedule
copying build/lib.linux-x86_64-3.7/colossalai/engine/_base_engine.py -> build/bdist.linux-x86_64/wheel/colossalai/engine
creating build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/_base_gradient_handler.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/_moe_gradient_handler.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/_zero_gradient_handler.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
copying build/lib.linux-x86_64-3.7/colossalai/engine/gradient_handler/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/engine/gradient_handler
creating build/bdist.linux-x86_64/wheel/colossalai/trainer
copying build/lib.linux-x86_64-3.7/colossalai/trainer/_trainer.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer
creating build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/hooks/_log_hook.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/hooks/_checkpoint_hook.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/hooks/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/hooks/_base_hook.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/hooks/_metric_hook.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/hooks/_lr_scheduler_hook.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer/hooks
copying build/lib.linux-x86_64-3.7/colossalai/trainer/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/trainer
creating build/bdist.linux-x86_64/wheel/colossalai/registry
copying build/lib.linux-x86_64-3.7/colossalai/registry/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/registry
copying build/lib.linux-x86_64-3.7/colossalai/registry/registry.py -> build/bdist.linux-x86_64/wheel/colossalai/registry
copying build/lib.linux-x86_64-3.7/colossalai/initialize.py -> build/bdist.linux-x86_64/wheel/colossalai
copying build/lib.linux-x86_64-3.7/colossalai/constants.py -> build/bdist.linux-x86_64/wheel/colossalai
copying build/lib.linux-x86_64-3.7/colossalai/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai
creating build/bdist.linux-x86_64/wheel/colossalai/utils
copying build/lib.linux-x86_64-3.7/colossalai/utils/checkpointing.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
creating build/bdist.linux-x86_64/wheel/colossalai/utils/data_sampler
copying build/lib.linux-x86_64-3.7/colossalai/utils/data_sampler/data_parallel_sampler.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/data_sampler
copying build/lib.linux-x86_64-3.7/colossalai/utils/data_sampler/base_sampler.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/data_sampler
copying build/lib.linux-x86_64-3.7/colossalai/utils/data_sampler/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/data_sampler
creating build/bdist.linux-x86_64/wheel/colossalai/utils/multi_tensor_apply
copying build/lib.linux-x86_64-3.7/colossalai/utils/multi_tensor_apply/multi_tensor_apply.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/multi_tensor_apply
copying build/lib.linux-x86_64-3.7/colossalai/utils/multi_tensor_apply/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/multi_tensor_apply
creating build/bdist.linux-x86_64/wheel/colossalai/utils/gradient_accumulation
copying build/lib.linux-x86_64-3.7/colossalai/utils/gradient_accumulation/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/gradient_accumulation
copying build/lib.linux-x86_64-3.7/colossalai/utils/gradient_accumulation/_gradient_accumulation.py -> build/bdist.linux-x86_64/wheel/colossalai/utils/gradient_accumulation
copying build/lib.linux-x86_64-3.7/colossalai/utils/activation_checkpoint.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
copying build/lib.linux-x86_64-3.7/colossalai/utils/timer.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
copying build/lib.linux-x86_64-3.7/colossalai/utils/common.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
copying build/lib.linux-x86_64-3.7/colossalai/utils/memory.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
copying build/lib.linux-x86_64-3.7/colossalai/utils/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
copying build/lib.linux-x86_64-3.7/colossalai/utils/cuda.py -> build/bdist.linux-x86_64/wheel/colossalai/utils
creating build/bdist.linux-x86_64/wheel/colossalai/nn
creating build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/fused_adam.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/lars.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/fused_sgd.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/fused_lamb.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/lamb.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
copying build/lib.linux-x86_64-3.7/colossalai/nn/optimizer/colossalai_optimizer.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/optimizer
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/base_layer.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2d/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2d/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2d/_operation.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2d/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2d
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_1d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_1d/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_1d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_1d/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_1d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_1d/_operation.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_1d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_1d/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_1d
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/colossalai_layer/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/colossalai_layer/embedding.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/colossalai_layer/linear.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/colossalai_layer/normalization.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/colossalai_layer/dropout.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/colossalai_layer/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/colossalai_layer
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_sequence
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_sequence/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_sequence
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_sequence/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_sequence
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_sequence/_operation.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_sequence
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_sequence/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_sequence
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/vanilla
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/vanilla/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/vanilla
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/vanilla/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/vanilla
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/moe
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/moe/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/moe
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/moe/_operation.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/moe
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/moe/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/moe
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/utils
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/utils/common.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/utils
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/utils/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/utils
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_3d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_3d/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_3d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_3d/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_3d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_3d/_operation.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_3d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_3d/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_3d
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/wrapper
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/wrapper/lambda_wrapper.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/wrapper
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/wrapper/pipeline_wrapper.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/wrapper
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/wrapper/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/wrapper
creating build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2p5d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2p5d/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2p5d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2p5d/layers.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2p5d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2p5d/_operation.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2p5d
copying build/lib.linux-x86_64-3.7/colossalai/nn/layer/parallel_2p5d/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/layer/parallel_2p5d
creating build/bdist.linux-x86_64/wheel/colossalai/nn/model
copying build/lib.linux-x86_64-3.7/colossalai/nn/model/model_from_config.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/model
copying build/lib.linux-x86_64-3.7/colossalai/nn/model/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/model
copying build/lib.linux-x86_64-3.7/colossalai/nn/init.py -> build/bdist.linux-x86_64/wheel/colossalai/nn
copying build/lib.linux-x86_64-3.7/colossalai/nn/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn
creating build/bdist.linux-x86_64/wheel/colossalai/nn/metric
copying build/lib.linux-x86_64-3.7/colossalai/nn/metric/_utils.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/metric
copying build/lib.linux-x86_64-3.7/colossalai/nn/metric/accuracy_3d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/metric
copying build/lib.linux-x86_64-3.7/colossalai/nn/metric/accuracy_2p5d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/metric
copying build/lib.linux-x86_64-3.7/colossalai/nn/metric/accuracy_2d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/metric
copying build/lib.linux-x86_64-3.7/colossalai/nn/metric/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/metric
creating build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/delayed.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/torch.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/cosine.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/multistep.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/onecycle.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/linear.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
copying build/lib.linux-x86_64-3.7/colossalai/nn/lr_scheduler/poly.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/lr_scheduler
creating build/bdist.linux-x86_64/wheel/colossalai/nn/loss
copying build/lib.linux-x86_64-3.7/colossalai/nn/loss/loss_3d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/loss
copying build/lib.linux-x86_64-3.7/colossalai/nn/loss/loss_2d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/loss
copying build/lib.linux-x86_64-3.7/colossalai/nn/loss/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/loss
copying build/lib.linux-x86_64-3.7/colossalai/nn/loss/loss_2p5d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/loss
copying build/lib.linux-x86_64-3.7/colossalai/nn/loss/loss_moe.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/loss
copying build/lib.linux-x86_64-3.7/colossalai/nn/loss/loss_1d.py -> build/bdist.linux-x86_64/wheel/colossalai/nn/loss
creating build/bdist.linux-x86_64/wheel/colossalai/builder
copying build/lib.linux-x86_64-3.7/colossalai/builder/builder.py -> build/bdist.linux-x86_64/wheel/colossalai/builder
copying build/lib.linux-x86_64-3.7/colossalai/builder/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/builder
copying build/lib.linux-x86_64-3.7/colossalai/builder/pipeline.py -> build/bdist.linux-x86_64/wheel/colossalai/builder
creating build/bdist.linux-x86_64/wheel/colossalai/context
copying build/lib.linux-x86_64-3.7/colossalai/context/config.py -> build/bdist.linux-x86_64/wheel/colossalai/context
copying build/lib.linux-x86_64-3.7/colossalai/context/parallel_context.py -> build/bdist.linux-x86_64/wheel/colossalai/context
copying build/lib.linux-x86_64-3.7/colossalai/context/parallel_mode.py -> build/bdist.linux-x86_64/wheel/colossalai/context
copying build/lib.linux-x86_64-3.7/colossalai/context/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/context
creating build/bdist.linux-x86_64/wheel/colossalai/context/random
copying build/lib.linux-x86_64-3.7/colossalai/context/random/_helper.py -> build/bdist.linux-x86_64/wheel/colossalai/context/random
copying build/lib.linux-x86_64-3.7/colossalai/context/random/seed_manager.py -> build/bdist.linux-x86_64/wheel/colossalai/context/random
copying build/lib.linux-x86_64-3.7/colossalai/context/random/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/context/random
creating build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_2p5d.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_tensor.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_data.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_3d.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_sequence.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_moe.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_model.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_2d.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_1d.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/initializer_pipeline.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
copying build/lib.linux-x86_64-3.7/colossalai/context/process_group_initializer/process_group_initializer.py -> build/bdist.linux-x86_64/wheel/colossalai/context/process_group_initializer
creating build/bdist.linux-x86_64/wheel/colossalai/communication
copying build/lib.linux-x86_64-3.7/colossalai/communication/p2p.py -> build/bdist.linux-x86_64/wheel/colossalai/communication
copying build/lib.linux-x86_64-3.7/colossalai/communication/utils.py -> build/bdist.linux-x86_64/wheel/colossalai/communication
copying build/lib.linux-x86_64-3.7/colossalai/communication/collective.py -> build/bdist.linux-x86_64/wheel/colossalai/communication
copying build/lib.linux-x86_64-3.7/colossalai/communication/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/communication
copying build/lib.linux-x86_64-3.7/colossalai/communication/ring.py -> build/bdist.linux-x86_64/wheel/colossalai/communication
copying build/lib.linux-x86_64-3.7/colossalai/core.py -> build/bdist.linux-x86_64/wheel/colossalai
creating build/bdist.linux-x86_64/wheel/colossalai/kernel
creating build/bdist.linux-x86_64/wheel/colossalai/kernel/cuda_native
copying build/lib.linux-x86_64-3.7/colossalai/kernel/cuda_native/scaled_softmax.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/cuda_native
copying build/lib.linux-x86_64-3.7/colossalai/kernel/cuda_native/layer_norm.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/cuda_native
copying build/lib.linux-x86_64-3.7/colossalai/kernel/cuda_native/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/cuda_native
copying build/lib.linux-x86_64-3.7/colossalai/kernel/cuda_native/multihead_attention.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/cuda_native
creating build/bdist.linux-x86_64/wheel/colossalai/kernel/jit
copying build/lib.linux-x86_64-3.7/colossalai/kernel/jit/bias_gelu.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/jit
copying build/lib.linux-x86_64-3.7/colossalai/kernel/jit/option.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/jit
copying build/lib.linux-x86_64-3.7/colossalai/kernel/jit/bias_dropout_add.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/jit
copying build/lib.linux-x86_64-3.7/colossalai/kernel/jit/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel/jit
copying build/lib.linux-x86_64-3.7/colossalai/kernel/__init__.py -> build/bdist.linux-x86_64/wheel/colossalai/kernel
running install_egg_info
running egg_info
writing colossalai.egg-info/PKG-INFO
writing dependency_links to colossalai.egg-info/dependency_links.txt
writing requirements to colossalai.egg-info/requires.txt
writing top-level names to colossalai.egg-info/top_level.txt
'license_file' option was not specified
reading manifest file 'colossalai.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
warning: no files found matching '*.txt'
warning: no files found matching '*.tr' under directory 'colossalai'
warning: no files found matching '*.cc' under directory 'colossalai'
writing manifest file 'colossalai.egg-info/SOURCES.txt'
Copying colossalai.egg-info to build/bdist.linux-x86_64/wheel/colossalai-0.0.2-py3.7.egg-info
Copying top_level.txt to build/bdist.linux-x86_64/wheel/colossalai-0.0.2-py3.7.egg-info/top_level.txt
Copying PKG-INFO to build/bdist.linux-x86_64/wheel/colossalai-0.0.2-py3.7.egg-info/PKG-INFO
Copying dependency_links.txt to build/bdist.linux-x86_64/wheel/colossalai-0.0.2-py3.7.egg-info/dependency_links.txt
Copying requires.txt to build/bdist.linux-x86_64/wheel/colossalai-0.0.2-py3.7.egg-info/requires.txt
Copying SOURCES.txt to build/bdist.linux-x86_64/wheel/colossalai-0.0.2-py3.7.egg-info/SOURCES.txt
running install_scripts
adding license file "LICENSE" (matched pattern "LICEN[CS]E*")
creating build/bdist.linux-x86_64/wheel/colossalai-0.0.2.dist-info/WHEEL
creating 'dist/colossalai-0.0.2-cp37-cp37m-linux_x86_64.whl' and adding 'build/bdist.linux-x86_64/wheel' to it
adding 'colossal_C.cpython-37m-x86_64-linux-gnu.so'
adding 'colossal_layer_norm_cuda.cpython-37m-x86_64-linux-gnu.so'
adding 'colossal_multihead_attention.cpython-37m-x86_64-linux-gnu.so'
adding 'colossal_scaled_masked_softmax.cpython-37m-x86_64-linux-gnu.so'
adding 'colossal_scaled_upper_triang_masked_softmax.cpython-37m-x86_64-linux-gnu.so'
adding 'colossalai/__init__.py'
adding 'colossalai/constants.py'
adding 'colossalai/core.py'
adding 'colossalai/global_variables.py'
adding 'colossalai/initialize.py'
adding 'colossalai/amp/__init__.py'
adding 'colossalai/amp/amp_type.py'
adding 'colossalai/amp/apex_amp/__init__.py'
adding 'colossalai/amp/apex_amp/apex_amp.py'
adding 'colossalai/amp/naive_amp/__init__.py'
adding 'colossalai/amp/naive_amp/_fp16_optimizer.py'
adding 'colossalai/amp/naive_amp/naive_amp.py'
adding 'colossalai/amp/torch_amp/__init__.py'
adding 'colossalai/amp/torch_amp/_grad_scaler.py'
adding 'colossalai/amp/torch_amp/torch_amp.py'
adding 'colossalai/builder/__init__.py'
adding 'colossalai/builder/builder.py'
adding 'colossalai/builder/pipeline.py'
adding 'colossalai/communication/__init__.py'
adding 'colossalai/communication/collective.py'
adding 'colossalai/communication/p2p.py'
adding 'colossalai/communication/ring.py'
adding 'colossalai/communication/utils.py'
adding 'colossalai/context/__init__.py'
adding 'colossalai/context/config.py'
adding 'colossalai/context/parallel_context.py'
adding 'colossalai/context/parallel_mode.py'
adding 'colossalai/context/process_group_initializer/__init__.py'
adding 'colossalai/context/process_group_initializer/initializer_1d.py'
adding 'colossalai/context/process_group_initializer/initializer_2d.py'
adding 'colossalai/context/process_group_initializer/initializer_2p5d.py'
adding 'colossalai/context/process_group_initializer/initializer_3d.py'
adding 'colossalai/context/process_group_initializer/initializer_data.py'
adding 'colossalai/context/process_group_initializer/initializer_model.py'
adding 'colossalai/context/process_group_initializer/initializer_moe.py'
adding 'colossalai/context/process_group_initializer/initializer_pipeline.py'
adding 'colossalai/context/process_group_initializer/initializer_sequence.py'
adding 'colossalai/context/process_group_initializer/initializer_tensor.py'
adding 'colossalai/context/process_group_initializer/process_group_initializer.py'
adding 'colossalai/context/random/__init__.py'
adding 'colossalai/context/random/_helper.py'
adding 'colossalai/context/random/seed_manager.py'
adding 'colossalai/engine/__init__.py'
adding 'colossalai/engine/_base_engine.py'
adding 'colossalai/engine/gradient_handler/__init__.py'
adding 'colossalai/engine/gradient_handler/_base_gradient_handler.py'
adding 'colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py'
adding 'colossalai/engine/gradient_handler/_moe_gradient_handler.py'
adding 'colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py'
adding 'colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py'
adding 'colossalai/engine/gradient_handler/_zero_gradient_handler.py'
adding 'colossalai/engine/ophooks/__init__.py'
adding 'colossalai/engine/ophooks/_base_ophook.py'
adding 'colossalai/engine/ophooks/_memtracer_ophook.py'
adding 'colossalai/engine/schedule/__init__.py'
adding 'colossalai/engine/schedule/_base_schedule.py'
adding 'colossalai/engine/schedule/_non_pipeline_schedule.py'
adding 'colossalai/engine/schedule/_pipeline_schedule.py'
adding 'colossalai/kernel/__init__.py'
adding 'colossalai/kernel/cuda_native/__init__.py'
adding 'colossalai/kernel/cuda_native/layer_norm.py'
adding 'colossalai/kernel/cuda_native/multihead_attention.py'
adding 'colossalai/kernel/cuda_native/scaled_softmax.py'
adding 'colossalai/kernel/jit/__init__.py'
adding 'colossalai/kernel/jit/bias_dropout_add.py'
adding 'colossalai/kernel/jit/bias_gelu.py'
adding 'colossalai/kernel/jit/option.py'
adding 'colossalai/logging/__init__.py'
adding 'colossalai/logging/logging.py'
adding 'colossalai/nn/__init__.py'
adding 'colossalai/nn/init.py'
adding 'colossalai/nn/layer/__init__.py'
adding 'colossalai/nn/layer/base_layer.py'
adding 'colossalai/nn/layer/colossalai_layer/__init__.py'
adding 'colossalai/nn/layer/colossalai_layer/_utils.py'
adding 'colossalai/nn/layer/colossalai_layer/dropout.py'
adding 'colossalai/nn/layer/colossalai_layer/embedding.py'
adding 'colossalai/nn/layer/colossalai_layer/linear.py'
adding 'colossalai/nn/layer/colossalai_layer/normalization.py'
adding 'colossalai/nn/layer/moe/__init__.py'
adding 'colossalai/nn/layer/moe/_operation.py'
adding 'colossalai/nn/layer/moe/layers.py'
adding 'colossalai/nn/layer/parallel_1d/__init__.py'
adding 'colossalai/nn/layer/parallel_1d/_operation.py'
adding 'colossalai/nn/layer/parallel_1d/_utils.py'
adding 'colossalai/nn/layer/parallel_1d/layers.py'
adding 'colossalai/nn/layer/parallel_2d/__init__.py'
adding 'colossalai/nn/layer/parallel_2d/_operation.py'
adding 'colossalai/nn/layer/parallel_2d/_utils.py'
adding 'colossalai/nn/layer/parallel_2d/layers.py'
adding 'colossalai/nn/layer/parallel_2p5d/__init__.py'
adding 'colossalai/nn/layer/parallel_2p5d/_operation.py'
adding 'colossalai/nn/layer/parallel_2p5d/_utils.py'
adding 'colossalai/nn/layer/parallel_2p5d/layers.py'
adding 'colossalai/nn/layer/parallel_3d/__init__.py'
adding 'colossalai/nn/layer/parallel_3d/_operation.py'
adding 'colossalai/nn/layer/parallel_3d/_utils.py'
adding 'colossalai/nn/layer/parallel_3d/layers.py'
adding 'colossalai/nn/layer/parallel_sequence/__init__.py'
adding 'colossalai/nn/layer/parallel_sequence/_operation.py'
adding 'colossalai/nn/layer/parallel_sequence/_utils.py'
adding 'colossalai/nn/layer/parallel_sequence/layers.py'
adding 'colossalai/nn/layer/utils/__init__.py'
adding 'colossalai/nn/layer/utils/common.py'
adding 'colossalai/nn/layer/vanilla/__init__.py'
adding 'colossalai/nn/layer/vanilla/layers.py'
adding 'colossalai/nn/layer/wrapper/__init__.py'
adding 'colossalai/nn/layer/wrapper/lambda_wrapper.py'
adding 'colossalai/nn/layer/wrapper/pipeline_wrapper.py'
adding 'colossalai/nn/loss/__init__.py'
adding 'colossalai/nn/loss/loss_1d.py'
adding 'colossalai/nn/loss/loss_2d.py'
adding 'colossalai/nn/loss/loss_2p5d.py'
adding 'colossalai/nn/loss/loss_3d.py'
adding 'colossalai/nn/loss/loss_moe.py'
adding 'colossalai/nn/lr_scheduler/__init__.py'
adding 'colossalai/nn/lr_scheduler/cosine.py'
adding 'colossalai/nn/lr_scheduler/delayed.py'
adding 'colossalai/nn/lr_scheduler/linear.py'
adding 'colossalai/nn/lr_scheduler/multistep.py'
adding 'colossalai/nn/lr_scheduler/onecycle.py'
adding 'colossalai/nn/lr_scheduler/poly.py'
adding 'colossalai/nn/lr_scheduler/torch.py'
adding 'colossalai/nn/metric/__init__.py'
adding 'colossalai/nn/metric/_utils.py'
adding 'colossalai/nn/metric/accuracy_2d.py'
adding 'colossalai/nn/metric/accuracy_2p5d.py'
adding 'colossalai/nn/metric/accuracy_3d.py'
adding 'colossalai/nn/model/__init__.py'
adding 'colossalai/nn/model/model_from_config.py'
adding 'colossalai/nn/optimizer/__init__.py'
adding 'colossalai/nn/optimizer/colossalai_optimizer.py'
adding 'colossalai/nn/optimizer/fused_adam.py'
adding 'colossalai/nn/optimizer/fused_lamb.py'
adding 'colossalai/nn/optimizer/fused_sgd.py'
adding 'colossalai/nn/optimizer/lamb.py'
adding 'colossalai/nn/optimizer/lars.py'
adding 'colossalai/registry/__init__.py'
adding 'colossalai/registry/registry.py'
adding 'colossalai/trainer/__init__.py'
adding 'colossalai/trainer/_trainer.py'
adding 'colossalai/trainer/hooks/__init__.py'
adding 'colossalai/trainer/hooks/_base_hook.py'
adding 'colossalai/trainer/hooks/_checkpoint_hook.py'
adding 'colossalai/trainer/hooks/_log_hook.py'
adding 'colossalai/trainer/hooks/_lr_scheduler_hook.py'
adding 'colossalai/trainer/hooks/_metric_hook.py'
adding 'colossalai/utils/__init__.py'
adding 'colossalai/utils/activation_checkpoint.py'
adding 'colossalai/utils/checkpointing.py'
adding 'colossalai/utils/common.py'
adding 'colossalai/utils/cuda.py'
adding 'colossalai/utils/memory.py'
adding 'colossalai/utils/timer.py'
adding 'colossalai/utils/data_sampler/__init__.py'
adding 'colossalai/utils/data_sampler/base_sampler.py'
adding 'colossalai/utils/data_sampler/data_parallel_sampler.py'
adding 'colossalai/utils/gradient_accumulation/__init__.py'
adding 'colossalai/utils/gradient_accumulation/_gradient_accumulation.py'
adding 'colossalai/utils/multi_tensor_apply/__init__.py'
adding 'colossalai/utils/multi_tensor_apply/multi_tensor_apply.py'
adding 'colossalai/zero/__init__.py'
adding 'colossalai/zero/loss_scaler.py'
adding 'colossalai/zero/zero_redundancy_optimizer_level_2.py'
adding 'colossalai/zero/zero_redundancy_optimizer_level_3.py'
adding 'model_zoo/__init__.py'
adding 'model_zoo/helper.py'
adding 'model_zoo/bert/__init__.py'
adding 'model_zoo/gpt/__init__.py'
adding 'model_zoo/gpt/gpt.py'
adding 'model_zoo/mlp_mixer/__init__.py'
adding 'model_zoo/mlp_mixer/parallel_3d/__init__.py'
adding 'model_zoo/mlp_mixer/parallel_3d/mlp_mixer.py'
adding 'model_zoo/moe/__init__.py'
adding 'model_zoo/moe/models.py'
adding 'model_zoo/moe/util.py'
adding 'model_zoo/vit/__init__.py'
adding 'model_zoo/vit/vision_transformer_from_config.py'
adding 'model_zoo/vit/vit.py'
adding 'colossalai-0.0.2.dist-info/LICENSE'
adding 'colossalai-0.0.2.dist-info/METADATA'
adding 'colossalai-0.0.2.dist-info/WHEEL'
adding 'colossalai-0.0.2.dist-info/top_level.txt'
adding 'colossalai-0.0.2.dist-info/RECORD'
removing build/bdist.linux-x86_64/wheel
from .gpt import *
\ No newline at end of file
import math
from typing import Callable
import torch
from colossalai import nn as col_nn
from colossalai.builder.pipeline import partition_uniform
from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.logging import get_dist_logger
from colossalai.nn.layer.utils import CheckpointModule, divide
from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper
from colossalai.registry import LAYERS, LOSSES, MODELS
from colossalai.utils import get_current_device
from torch import dtype, nn
__all__ = [
'GPT', 'GPTLMLoss', 'gpt2_small', 'gpt2_medium', 'gpt2_large', 'gpt2_xl', 'gpt2_8B', 'gpt2_xl_pipeline',
'gpt2_8B_pipeline', 'gpt3', 'gpt3_pipeline'
]
@LAYERS.register_module
class GPTEmbedding(nn.Module):
def __init__(self,
embedding_dim: int,
vocab_size: int,
max_position_embeddings: int,
num_tokentypes: int = 0,
padding_idx: int = None,
dropout: float = 0.,
dtype: dtype = None) -> None:
super().__init__()
self.word_embeddings = col_nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx, dtype=dtype)
self.position_embeddings = col_nn.Embedding(max_position_embeddings, embedding_dim, dtype=dtype)
if num_tokentypes > 0:
self.tokentype_embeddings = col_nn.Embedding(num_tokentypes, embedding_dim, dtype=dtype)
else:
self.tokentype_embeddings = None
self.dropout = col_nn.Dropout(dropout)
@property
def word_embedding_weight(self):
return self.word_embeddings.weight
def forward(self, input_ids, attention_mask=None, position_ids=None, tokentype_ids=None):
seq_length = input_ids.size(1)
if position_ids is None:
position_ids = torch.arange(seq_length, dtype=torch.long, device=get_current_device()).unsqueeze(0)
x = self.word_embeddings(input_ids) + self.position_embeddings(position_ids)
if self.tokentype_embeddings is not None and tokentype_ids is not None:
x = x + self.tokentype_embeddings(tokentype_ids)
x = self.dropout(x)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
# Adapted from huggingface
if attention_mask is not None:
batch_size = input_ids.shape[0]
attention_mask = attention_mask.view(batch_size, -1)
attention_mask = col_nn.partition_batch(attention_mask)
attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
attention_mask = attention_mask.to(dtype=x.dtype) # fp16 compatibility
attention_mask = (1.0 - attention_mask) * -10000.0
return x, attention_mask
@LAYERS.register_module
class GPTSelfAttention(nn.Module):
def __init__(self,
dim: int,
num_heads: int,
attention_dropout: float,
dropout: float,
bias: bool = True,
fuse_scale_mask_softmax: bool = False,
dtype: dtype = None) -> None:
super().__init__()
self.fuse_scale_mask_softmax = fuse_scale_mask_softmax
self.attention_head_size = divide(dim, num_heads)
self.query_key_value = col_nn.Linear(dim, 3 * dim, dtype=dtype, bias=bias)
if fuse_scale_mask_softmax:
from colossalai.kernel import FusedScaleMaskSoftmax
from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType
self.softmax = FusedScaleMaskSoftmax(input_in_fp16=True,
input_in_bf16=False,
attn_mask_type=AttnMaskType.causal,
scaled_masked_softmax_fusion=True,
mask_func=None,
softmax_in_fp32=True,
scale=math.sqrt(self.attention_head_size))
else:
self.softmax = nn.Softmax(dim=-1)
self.attention_dropout = col_nn.Dropout(attention_dropout)
self.dense = col_nn.Linear(dim, dim, dtype=dtype, bias=True)
self.dropout = col_nn.Dropout(dropout)
def forward(self, x, attention_mask=None):
qkv = self.query_key_value(x)
all_head_size = qkv.shape[-1] // 3
num_attention_heads = divide(all_head_size, self.attention_head_size)
new_qkv_shape = qkv.shape[:-1] + \
(num_attention_heads, 3 * self.attention_head_size)
qkv = qkv.view(new_qkv_shape)
qkv = qkv.permute((0, 2, 1, 3))
q, k, v = torch.chunk(qkv, 3, dim=-1)
x = torch.matmul(q, k.transpose(-1, -2))
if self.fuse_scale_mask_softmax:
x = self.softmax(x, attention_mask)
else:
x = x / math.sqrt(self.attention_head_size)
# causal mask
q_len, k_len = q.size(-2), k.size(-2)
causal_mask = torch.tril(torch.ones((q_len, k_len), dtype=torch.uint8,
device=get_current_device())).view(1, 1, q_len, k_len).bool()
x = torch.where(causal_mask, x, torch.tensor(-1e4, dtype=x.dtype, device=get_current_device()))
if attention_mask is not None:
x = x + attention_mask
x = self.softmax(x)
x = self.attention_dropout(x)
x = torch.matmul(x, v)
x = x.transpose(1, 2)
new_context_layer_shape = x.size()[:-2] + (all_head_size, )
x = x.reshape(new_context_layer_shape)
x = self.dense(x)
x = self.dropout(x)
return x
@LAYERS.register_module
class GPTMLP(nn.Module):
def __init__(self,
dim: int,
mlp_ratio: float,
activation: Callable,
dropout: float,
dtype: dtype = None,
bias: bool = True):
super().__init__()
intermediate_dim = int(dim * mlp_ratio)
self.dense_1 = col_nn.Linear(dim, intermediate_dim, dtype=dtype, bias=bias)
self.activation = activation
self.dense_2 = col_nn.Linear(intermediate_dim, dim, dtype=dtype, bias=bias)
self.dropout = col_nn.Dropout(dropout)
def forward(self, x):
x = self.dense_1(x)
x = self.activation(x)
x = self.dense_2(x)
x = self.dropout(x)
return x
@LAYERS.register_module
class GPTBlock(CheckpointModule):
def __init__(self,
dim: int,
num_heads: int,
mlp_ratio: float,
activation: Callable,
attention_dropout: float = 0.,
dropout: float = 0.,
layernorm_epsilon: float = 1e-5,
dtype: dtype = None,
bias: bool = True,
apply_post_layernorm: bool = False,
fuse_scale_mask_softmax: bool = False,
checkpoint: bool = False):
super().__init__(checkpoint)
self.apply_post_layernorm = apply_post_layernorm
self.norm1 = col_nn.LayerNorm(normalized_shape=dim, eps=layernorm_epsilon, dtype=dtype)
self.attn = GPTSelfAttention(dim=dim,
num_heads=num_heads,
attention_dropout=attention_dropout,
dropout=dropout,
bias=bias,
fuse_scale_mask_softmax=fuse_scale_mask_softmax,
dtype=dtype)
self.norm2 = col_nn.LayerNorm(normalized_shape=dim, eps=layernorm_epsilon, dtype=dtype)
self.mlp = GPTMLP(dim=dim, mlp_ratio=mlp_ratio, activation=activation, dropout=dropout, dtype=dtype, bias=bias)
def _forward(self, x, attention_mask=None):
if not self.apply_post_layernorm:
residual = x
x = self.norm1(x)
if self.apply_post_layernorm:
residual = x
x = residual + self.attn(x, attention_mask)
if not self.apply_post_layernorm:
residual = x
x = self.norm2(x)
if self.apply_post_layernorm:
residual = x
x = residual + self.mlp(x)
return x, attention_mask
@LAYERS.register_module
class GPTLMHead(nn.Module):
def __init__(self,
dim: int,
vocab_size: int,
word_embeeding_weight: nn.Parameter = None,
bias: bool = False,
dtype: dtype = None) -> None:
super().__init__()
self.dense = col_nn.Classifier(dim, vocab_size, word_embeeding_weight, bias=bias, dtype=dtype)
@property
def weight(self):
return self.dense.weight
def forward(self, x):
x = self.dense(x)
return x
@LOSSES.register_module
class GPTLMLoss(nn.Module):
def __init__(self):
super().__init__()
self.loss = col_nn.CrossEntropyLoss()
def forward(self, logits, labels):
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
# Flatten the tokens
return self.loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
@MODELS.register_module
class GPT(nn.Module):
def __init__(self,
vocab_size: int = 50304,
max_position_embeddings: int = 1024,
dim: int = 768,
num_heads: int = 12,
depth: int = 12,
mlp_ratio: float = 4.0,
dropout: float = 0.1,
embedding_dropout: float = 0.1,
attention_dropout: float = 0.1,
layernorm_epsilon: float = 1e-5,
activation: Callable = nn.functional.gelu,
padding_idx: int = None,
dtype: dtype = None,
bias: bool = True,
apply_post_layernorm: bool = False,
fuse_scale_mask_softmax: bool = False,
checkpoint: bool = False) -> None:
super().__init__()
self.embed = GPTEmbedding(embedding_dim=dim,
vocab_size=vocab_size,
max_position_embeddings=max_position_embeddings,
padding_idx=padding_idx,
dropout=embedding_dropout,
dtype=dtype)
self.blocks = nn.ModuleList([
GPTBlock(
dim=dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
activation=activation,
attention_dropout=attention_dropout,
dropout=dropout,
layernorm_epsilon=layernorm_epsilon,
dtype=dtype,
bias=bias,
apply_post_layernorm=apply_post_layernorm,
fuse_scale_mask_softmax=fuse_scale_mask_softmax,
checkpoint=checkpoint,
) for _ in range(depth)
])
self.norm = col_nn.LayerNorm(normalized_shape=dim, eps=layernorm_epsilon, dtype=dtype)
self.head = GPTLMHead(dim=dim,
vocab_size=vocab_size,
word_embeeding_weight=self.embed.word_embedding_weight,
dtype=dtype)
def forward(self, input_ids, attention_mask=None):
x, attention_mask = self.embed(input_ids, attention_mask)
for block in self.blocks:
x, attention_mask = block(x, attention_mask)
x = self.head(self.norm(x))
return x
class PipelineGPT(nn.Module):
def __init__(self,
vocab_size: int = 50304,
max_position_embeddings: int = 1024,
dim: int = 768,
num_heads: int = 12,
depth: int = 12,
mlp_ratio: float = 4.0,
dropout: float = 0.1,
embedding_dropout: float = 0.1,
attention_dropout: float = 0.1,
layernorm_epsilon: float = 1e-5,
activation: Callable = nn.functional.gelu,
padding_idx: int = None,
dtype: dtype = None,
bias: bool = True,
apply_post_layernorm: bool = False,
fuse_scale_mask_softmax: bool = False,
checkpoint: bool = False,
first: bool = False,
last: bool = False):
super().__init__()
self.checkpoint = checkpoint
self.first = first
self.last = last
if first:
self.embed = GPTEmbedding(embedding_dim=dim,
vocab_size=vocab_size,
max_position_embeddings=max_position_embeddings,
padding_idx=padding_idx,
dropout=embedding_dropout,
dtype=dtype)
self.blocks = nn.ModuleList([
GPTBlock(
dim=dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
activation=activation,
attention_dropout=attention_dropout,
dropout=dropout,
layernorm_epsilon=layernorm_epsilon,
dtype=dtype,
bias=bias,
apply_post_layernorm=apply_post_layernorm,
fuse_scale_mask_softmax=fuse_scale_mask_softmax,
checkpoint=checkpoint,
) for _ in range(depth)
])
if self.last:
self.norm = col_nn.LayerNorm(normalized_shape=dim, eps=layernorm_epsilon, dtype=dtype)
self.head = GPTLMHead(dim=dim, vocab_size=vocab_size, dtype=dtype)
def forward(self, x=None, input_ids=None, attention_mask=None):
if self.first:
x, attention_mask = self.embed(input_ids, attention_mask)
for block in self.blocks:
x, attention_mask = block(x, attention_mask)
if self.last:
x = self.head(self.norm(x))
return x
def _create_gpt_model(**model_kwargs):
model = GPT(**model_kwargs)
return model
def _create_gpt_pipeline_model(depth=48, num_chunks=1, layer_partitions=None, **model_kwargs):
logger = get_dist_logger()
pipeline_size = gpc.get_world_size(ParallelMode.PIPELINE)
pipeline_rank = gpc.get_local_rank(ParallelMode.PIPELINE)
rank = gpc.get_global_rank()
wrapper = PipelineSharedModuleWrapper([0, pipeline_size - 1])
parts = partition_uniform(depth, pipeline_size,
num_chunks)[pipeline_rank] if layer_partitions is None else layer_partitions
models = []
for start, end in parts:
model_kwargs['first'] = start == 0
model_kwargs['last'] = end == depth
model_kwargs['depth'] = end - start
chunk = PipelineGPT(**model_kwargs).to(get_current_device())
if start == 0:
wrapper.register_parameter(chunk.embed.word_embedding_weight)
elif end == depth:
wrapper.register_parameter(chunk.head.weight)
models.append(chunk)
logger.info(f'==> Rank {rank} built layer {start}-{end} / total {depth}')
if len(models) == 1:
model = models[0]
else:
model = nn.ModuleList(models)
return model
@MODELS.register_module
def gpt2_small(**kwargs):
model_kwargs = dict(dim=768, depth=12, num_heads=12, **kwargs)
return _create_gpt_model(**model_kwargs)
@MODELS.register_module
def gpt2_medium(**kwargs):
model_kwargs = dict(dim=1024, depth=24, num_heads=8, **kwargs)
return _create_gpt_model(**model_kwargs)
@MODELS.register_module
def gpt2_large(**kwargs):
model_kwargs = dict(dim=1536, depth=36, num_heads=12, **kwargs)
return _create_gpt_model(**model_kwargs)
@MODELS.register_module
def gpt2_xl(**kwargs):
model_kwargs = dict(dim=1600, depth=48, num_heads=16, **kwargs)
return _create_gpt_model(**model_kwargs)
@MODELS.register_module
def gpt2_8B(**kwargs):
model_kwargs = dict(dim=3072, depth=72, num_heads=24, **kwargs)
return _create_gpt_model(**model_kwargs)
@MODELS.register_module
def gpt2_xl_pipeline(**kwargs):
model_kwargs = dict(dim=1600, depth=48, num_heads=20, **kwargs)
return _create_gpt_pipeline_model(**model_kwargs)
@MODELS.register_module
def gpt2_8B_pipeline(**kwargs):
model_kwargs = dict(dim=3072, depth=72, num_heads=24, **kwargs)
return _create_gpt_pipeline_model(**model_kwargs)
@MODELS.register_module
def gpt3(**kwargs):
model_kwargs = dict(dim=12288, depth=96, num_heads=96, **kwargs)
return _create_gpt_model(**model_kwargs)
@MODELS.register_module
def gpt3_pipeline(**kwargs):
model_kwargs = dict(dim=12288, depth=96, num_heads=96, **kwargs)
return _create_gpt_pipeline_model(**model_kwargs)
import torch
import torch.nn as nn
from colossalai.nn.layer import WrappedDropPath as DropPath
class TransformerLayer(nn.Module):
"""Transformer layer builder.
"""
def __init__(self,
att: nn.Module,
ffn: nn.Module,
norm1: nn.Module,
norm2: nn.Module,
droppath=None,
droppath_rate: float = 0):
super().__init__()
self.att = att
self.ffn = ffn
self.norm1 = norm1
self.norm2 = norm2
self.droppath = DropPath(droppath_rate) if droppath is None else droppath
def forward(self, x):
x = x + self.droppath(self.att(self.norm1(x)))
x = x + self.droppath(self.ffn(self.norm2(x)))
return x
# modified from https://github.com/lucidrains/mlp-mixer-pytorch/blob/main/mlp_mixer_pytorch/mlp_mixer_pytorch.py
from functools import partial
from colossalai.context import ParallelMode
from colossalai.registry import MODELS
from torch import nn
from colossalai import nn as col_nn
from colossalai.nn.layer.parallel_3d._utils import get_depth_from_env
from einops.layers.torch import Rearrange, Reduce
__all__ = [
'MLPMixer',
]
class PreNormResidual(nn.Module):
def __init__(self, dim, fn, depth_3d):
super().__init__()
self.fn = fn
self.norm = col_nn.LayerNorm3D(
dim, depth_3d, ParallelMode.PARALLEL_3D_INPUT, ParallelMode.PARALLEL_3D_WEIGHT)
def forward(self, x):
return self.fn(self.norm(x)) + x
def FeedForward(dim, depth_3d, expansion_factor=4, dropout=0., dense=None):
if dense is None:
dense = partial(col_nn.Linear3D, depth=depth_3d, input_parallel_mode=ParallelMode.PARALLEL_3D_INPUT,
weight_parallel_mode=ParallelMode.PARALLEL_3D_WEIGHT)
return nn.Sequential(
dense(dim, dim * expansion_factor),
nn.GELU(),
nn.Dropout(dropout),
dense(dim * expansion_factor, dim),
nn.Dropout(dropout)
)
@MODELS.register_module
def MLPMixer(image_size, channels, patch_size, dim, depth, num_classes, expansion_factor=4, dropout=0.):
assert (image_size % patch_size) == 0, 'image must be divisible by patch size'
num_patches = (image_size // patch_size) ** 2
depth_3d = get_depth_from_env()
linear = partial(col_nn.Linear3D, depth=depth_3d, input_parallel_mode=ParallelMode.PARALLEL_3D_INPUT,
weight_parallel_mode=ParallelMode.PARALLEL_3D_WEIGHT)
norm_layer = partial(col_nn.LayerNorm3D, depth=depth_3d, input_parallel_mode=ParallelMode.PARALLEL_3D_INPUT,
weight_parallel_mode=ParallelMode.PARALLEL_3D_WEIGHT)
chan_first, chan_last = partial(nn.Conv1d, kernel_size=1), linear
return nn.Sequential(
Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)',
p1=patch_size, p2=patch_size),
linear((patch_size ** 2) * channels, dim),
*[nn.Sequential(
PreNormResidual(dim, FeedForward(
num_patches, expansion_factor, dropout, chan_first)),
PreNormResidual(dim, FeedForward(
dim, expansion_factor, dropout, chan_last))
) for _ in range(depth)],
norm_layer(dim),
Reduce('b n c -> b c', 'mean'),
linear(dim, num_classes)
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment