Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
f68eddfb
Unverified
Commit
f68eddfb
authored
Jan 13, 2022
by
ver217
Committed by
GitHub
Jan 13, 2022
Browse files
refactor kernel (#142)
parent
4a3d3446
Changes
24
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
255 additions
and
164 deletions
+255
-164
MANIFEST.in
MANIFEST.in
+1
-2
colossalai/kernel/__init__.py
colossalai/kernel/__init__.py
+0
-3
colossalai/kernel/cuda_native/__init__.py
colossalai/kernel/cuda_native/__init__.py
+1
-15
colossalai/kernel/cuda_native/builder.py
colossalai/kernel/cuda_native/builder.py
+0
-114
colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp
colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp
+0
-0
colossalai/kernel/cuda_native/csrc/compat.h
colossalai/kernel/cuda_native/csrc/compat.h
+1
-4
colossalai/kernel/cuda_native/csrc/multi_tensor_adam.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_adam.cu
+0
-0
colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh
colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh
+0
-0
colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu
...lai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu
+0
-0
colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu
+0
-0
colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu
...alai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu
+0
-0
colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu
...ssalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu
+0
-0
colossalai/kernel/cuda_native/csrc/type_shim.h
colossalai/kernel/cuda_native/csrc/type_shim.h
+199
-0
colossalai/kernel/cuda_native/layer_norm.py
colossalai/kernel/cuda_native/layer_norm.py
+9
-5
colossalai/kernel/cuda_native/multihead_attention.py
colossalai/kernel/cuda_native/multihead_attention.py
+15
-11
colossalai/kernel/cuda_native/scaled_softmax.py
colossalai/kernel/cuda_native/scaled_softmax.py
+20
-5
colossalai/kernel/jit/__init__.py
colossalai/kernel/jit/__init__.py
+6
-1
colossalai/kernel/jit/option.py
colossalai/kernel/jit/option.py
+1
-0
colossalai/nn/optimizer/fused_adam.py
colossalai/nn/optimizer/fused_adam.py
+1
-2
colossalai/nn/optimizer/fused_lamb.py
colossalai/nn/optimizer/fused_lamb.py
+1
-2
No files found.
MANIFEST.in
View file @
f68eddfb
include *.txt README.md
recursive-include requirements *.txt
recursive-include colossalai *.cpp *.h *.cu *.tr *.cuh *.cc
\ No newline at end of file
recursive-include csrc *.cpp *.h *.cu *.tr *.cuh *.cc
\ No newline at end of file
colossalai/kernel/__init__.py
View file @
f68eddfb
from
.jit.bias_dropout_add
import
bias_dropout_add_fused_train
,
bias_dropout_add_fused_inference
from
.jit.bias_gelu
import
bias_gelu_impl
from
.cuda_native
import
LayerNorm
,
FusedScaleMaskSoftmax
,
MultiHeadAttention
__all__
=
[
"bias_dropout_add_fused_train"
,
"bias_dropout_add_fused_inference"
,
"bias_gelu_impl"
,
"LayerNorm"
,
"FusedScaleMaskSoftmax"
,
"MultiHeadAttention"
]
colossalai/kernel/cuda_native/__init__.py
View file @
f68eddfb
from
.builder
import
_build_cuda_native_kernel
CUDA_NATIVE_KERNEL_BUILD
=
False
def
build_cuda_native_kernel
():
global
CUDA_NATIVE_KERNEL_BUILD
if
CUDA_NATIVE_KERNEL_BUILD
==
False
:
_build_cuda_native_kernel
()
CUDA_NATIVE_KERNEL_BUILD
=
True
build_cuda_native_kernel
()
from
.layer_norm
import
MixedFusedLayerNorm
as
LayerNorm
from
.scaled_softmax
import
FusedScaleMaskSoftmax
from
.multihead_attention
import
MultiHeadAttention
colossalai/kernel/cuda_native/builder.py
deleted
100644 → 0
View file @
4a3d3446
import
os
import
pathlib
import
subprocess
from
torch.utils
import
cpp_extension
# Setting this param to a list has a problem of generating different
# compilation commands (with diferent order of architectures) and
# leading to recompilation of fused kernels. Set it to empty string
# to avoid recompilation and assign arch flags explicity in
# extra_cuda_cflags below
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
""
def
_build_cuda_native_kernel
():
# Check if cuda 11 is installed for compute capability 8.0
cc_flag
=
[]
_
,
bare_metal_major
,
_
=
_get_cuda_bare_metal_version
(
cpp_extension
.
CUDA_HOME
)
if
int
(
bare_metal_major
)
>=
11
:
cc_flag
.
append
(
'-gencode'
)
cc_flag
.
append
(
'arch=compute_80,code=sm_80'
)
# Build path
basepath
=
pathlib
.
Path
(
__file__
).
parent
.
absolute
()
srcpath
=
basepath
/
'csrc'
buildpath
=
basepath
/
'build'
_create_build_dir
(
buildpath
)
# Helper function to build the kernels.
def
_cpp_extention_load_helper
(
name
,
sources
,
extra_cuda_flags
):
return
cpp_extension
.
load
(
name
=
name
,
sources
=
sources
,
build_directory
=
buildpath
,
extra_cflags
=
[
'-O3'
,
],
extra_include_paths
=
[
str
(
srcpath
/
'kernels'
/
'include'
)],
extra_cuda_cflags
=
[
'-O3'
,
'-gencode'
,
'arch=compute_70,code=sm_70'
,
'--use_fast_math'
]
+
extra_cuda_flags
+
cc_flag
,
verbose
=
False
)
# ==============
# Fused softmax.
# ==============
extra_cuda_flags
=
[
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'--expt-relaxed-constexpr'
,
'--expt-extended-lambda'
]
# Upper triangular softmax.
sources
=
[
srcpath
/
'scaled_upper_triang_masked_softmax.cpp'
,
srcpath
/
'scaled_upper_triang_masked_softmax_cuda.cu'
]
colossal_scaled_upper_triang_masked_softmax
=
_cpp_extention_load_helper
(
"colossal_scaled_upper_triang_masked_softmax"
,
sources
,
extra_cuda_flags
)
# Masked softmax.
sources
=
[
srcpath
/
'scaled_masked_softmax.cpp'
,
srcpath
/
'scaled_masked_softmax_cuda.cu'
]
colossal_scaled_masked_softmax
=
_cpp_extention_load_helper
(
"colossal_scaled_masked_softmax"
,
sources
,
extra_cuda_flags
)
# =================================
# Mixed precision fused layer norm.
# =================================
extra_cuda_flags
=
[
'-maxrregcount=50'
]
sources
=
[
srcpath
/
'layer_norm_cuda.cpp'
,
srcpath
/
'layer_norm_cuda_kernel.cu'
]
colossal_layer_norm_cuda
=
_cpp_extention_load_helper
(
"colossal_layer_norm_cuda"
,
sources
,
extra_cuda_flags
)
# ==========================================
# Mixed precision Transformer Encoder Layer.
# ==========================================
extra_cuda_flags
=
[
'-std=c++14'
,
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'-U__CUDA_NO_HALF2_OPERATORS__'
,
'-DTHRUST_IGNORE_CUB_VERSION_CHECK'
]
sources
=
[
srcpath
/
'multihead_attention_1d.cpp'
]
kernel_sources
=
[
"cublas_wrappers.cu"
,
"transform_kernels.cu"
,
"dropout_kernels.cu"
,
"normalize_kernels.cu"
,
"softmax_kernels.cu"
,
"general_kernels.cu"
,
"cuda_util.cu"
]
sources
+=
[(
srcpath
/
'kernels'
/
cu_file
)
for
cu_file
in
kernel_sources
]
colossal_multihead_attention
=
_cpp_extention_load_helper
(
"colossal_multihead_attention"
,
sources
,
extra_cuda_flags
)
def
_get_cuda_bare_metal_version
(
cuda_dir
):
raw_output
=
subprocess
.
check_output
([
cuda_dir
+
"/bin/nvcc"
,
"-V"
],
universal_newlines
=
True
)
output
=
raw_output
.
split
()
release_idx
=
output
.
index
(
"release"
)
+
1
release
=
output
[
release_idx
].
split
(
"."
)
bare_metal_major
=
release
[
0
]
bare_metal_minor
=
release
[
1
][
0
]
return
raw_output
,
bare_metal_major
,
bare_metal_minor
def
_create_build_dir
(
buildpath
):
try
:
os
.
mkdir
(
buildpath
)
except
OSError
:
if
not
os
.
path
.
isdir
(
buildpath
):
print
(
f
"Creation of the build directory
{
buildpath
}
failed"
)
csrc/colossal_C_frontend.cpp
→
colossalai/kernel/cuda_native/
csrc/colossal_C_frontend.cpp
View file @
f68eddfb
File moved
colossalai/kernel/cuda_native/csrc/compat.h
View file @
f68eddfb
/*This code from NVIDIA apex:
* https://github.com/NVIDIA/apex
* with minor changes. */
// modified from https://github.com/NVIDIA/apex/blob/master/csrc/compat.h
#ifndef TORCH_CHECK
#define TORCH_CHECK AT_CHECK
#endif
...
...
csrc/multi_tensor_adam.cu
→
colossalai/kernel/cuda_native/
csrc/multi_tensor_adam.cu
View file @
f68eddfb
File moved
csrc/multi_tensor_apply.cuh
→
colossalai/kernel/cuda_native/
csrc/multi_tensor_apply.cuh
View file @
f68eddfb
File moved
csrc/multi_tensor_l2norm_kernel.cu
→
colossalai/kernel/cuda_native/
csrc/multi_tensor_l2norm_kernel.cu
View file @
f68eddfb
File moved
csrc/multi_tensor_lamb.cu
→
colossalai/kernel/cuda_native/
csrc/multi_tensor_lamb.cu
View file @
f68eddfb
File moved
csrc/multi_tensor_scale_kernel.cu
→
colossalai/kernel/cuda_native/
csrc/multi_tensor_scale_kernel.cu
View file @
f68eddfb
File moved
csrc/multi_tensor_sgd_kernel.cu
→
colossalai/kernel/cuda_native/
csrc/multi_tensor_sgd_kernel.cu
View file @
f68eddfb
File moved
colossalai/kernel/cuda_native/csrc/type_shim.h
View file @
f68eddfb
...
...
@@ -71,3 +71,202 @@
default: \
AT_ERROR(#NAME, " not implemented for '", toString(TYPEIN), "'"); \
}
// Forward/backward compatiblity hack around
// https://github.com/pytorch/pytorch/commit/3aeb78079bcd68282fe9117088e138b77318e288
// pending more future-proof guidance from upstream.
// struct TypeShim
// {
// const at::Type& payload;
// TypeShim(const at::Type& type) : payload(type) {}
// // Enable trivial conversion to a const at::Type& for pre-3aeb78
// operator const at::Type&(){ return payload; };
// // Enable dispatch switch statements to take *this directly for post-3aeb78
// //operator at::ScalarType(){ return payload.; };
// };
#define DISPATCH_FLOAT_AND_HALF(TYPE, LEVEL, NAME, ...) \
switch (TYPE) \
{ \
case at::ScalarType::Float: \
{ \
using scalar_t_##LEVEL = float; \
__VA_ARGS__; \
break; \
} \
case at::ScalarType::Half: \
{ \
using scalar_t_##LEVEL = at::Half; \
__VA_ARGS__; \
break; \
} \
default: \
AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'"); \
}
#define DISPATCH_FLOAT_HALF_AND_BYTE(TYPE, LEVEL, NAME, ...) \
switch (TYPE) \
{ \
case at::ScalarType::Float: \
{ \
using scalar_t_##LEVEL = float; \
__VA_ARGS__; \
break; \
} \
case at::ScalarType::Half: \
{ \
using scalar_t_##LEVEL = at::Half; \
__VA_ARGS__; \
break; \
} \
case at::ScalarType::Byte: \
{ \
using scalar_t_##LEVEL = uint8_t; \
__VA_ARGS__; \
break; \
} \
default: \
AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'"); \
}
#define DISPATCH_DOUBLE_FLOAT_AND_HALF(TYPE, LEVEL, NAME, ...) \
switch (TYPE) \
{ \
case at::ScalarType::Double: \
{ \
using scalar_t_##LEVEL = double; \
__VA_ARGS__; \
break; \
} \
case at::ScalarType::Float: \
{ \
using scalar_t_##LEVEL = float; \
__VA_ARGS__; \
break; \
} \
case at::ScalarType::Half: \
{ \
using scalar_t_##LEVEL = at::Half; \
__VA_ARGS__; \
break; \
} \
default: \
AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'"); \
}
#define DISPATCH_DOUBLE_AND_FLOAT(TYPE, LEVEL, NAME, ...) \
switch (TYPE) \
{ \
case at::ScalarType::Double: \
{ \
using scalar_t_##LEVEL = double; \
__VA_ARGS__; \
break; \
} \
case at::ScalarType::Float: \
{ \
using scalar_t_##LEVEL = float; \
__VA_ARGS__; \
break; \
} \
default: \
AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'"); \
}
template
<
typename
T
>
__device__
__forceinline__
T
reduce_block_into_lanes
(
T
*
x
,
T
val
,
int
lanes
=
1
,
bool
share_result
=
false
)
// lanes is intended to be <= 32.
{
int
tid
=
threadIdx
.
x
+
threadIdx
.
y
*
blockDim
.
x
;
int
blockSize
=
blockDim
.
x
*
blockDim
.
y
;
// blockSize is intended to be a multiple of 32.
if
(
blockSize
>=
64
)
{
x
[
tid
]
=
val
;
__syncthreads
();
}
#pragma unroll
for
(
int
i
=
(
blockSize
>>
1
);
i
>=
64
;
i
>>=
1
)
{
if
(
tid
<
i
)
x
[
tid
]
=
x
[
tid
]
+
x
[
tid
+
i
];
__syncthreads
();
}
T
final
;
if
(
tid
<
32
)
{
if
(
blockSize
>=
64
)
final
=
x
[
tid
]
+
x
[
tid
+
32
];
else
final
=
val
;
// __SYNCWARP();
#pragma unroll
for
(
int
i
=
16
;
i
>=
lanes
;
i
>>=
1
)
final
=
final
+
__shfl_down_sync
(
0xffffffff
,
final
,
i
);
}
if
(
share_result
)
{
if
(
tid
<
lanes
)
x
[
tid
]
=
final
;
// EpilogueOp
// Make sure the smem result is visible to all warps.
__syncthreads
();
}
return
final
;
}
template
<
typename
T
>
__device__
__forceinline__
T
reduce_block_into_lanes_max_op
(
T
*
x
,
T
val
,
int
lanes
=
1
,
bool
share_result
=
false
)
// lanes is intended to be <= 32.
{
int
tid
=
threadIdx
.
x
+
threadIdx
.
y
*
blockDim
.
x
;
int
blockSize
=
blockDim
.
x
*
blockDim
.
y
;
// blockSize is intended to be a multiple of 32.
if
(
blockSize
>=
64
)
{
x
[
tid
]
=
val
;
__syncthreads
();
}
#pragma unroll
for
(
int
i
=
(
blockSize
>>
1
);
i
>=
64
;
i
>>=
1
)
{
if
(
tid
<
i
)
x
[
tid
]
=
fmaxf
(
fabsf
(
x
[
tid
]),
fabsf
(
x
[
tid
+
i
]));
__syncthreads
();
}
T
final
;
if
(
tid
<
32
)
{
if
(
blockSize
>=
64
)
final
=
fmaxf
(
fabsf
(
x
[
tid
]),
fabsf
(
x
[
tid
+
32
]));
else
final
=
val
;
// __SYNCWARP();
#pragma unroll
for
(
int
i
=
16
;
i
>=
lanes
;
i
>>=
1
)
final
=
fmaxf
(
fabsf
(
final
),
fabsf
(
__shfl_down_sync
(
0xffffffff
,
final
,
i
)));
}
if
(
share_result
)
{
if
(
tid
<
lanes
)
x
[
tid
]
=
final
;
// EpilogueOp
// Make sure the smem result is visible to all warps.
__syncthreads
();
}
return
final
;
}
\ No newline at end of file
colossalai/kernel/cuda_native/layer_norm.py
View file @
f68eddfb
...
...
@@ -48,7 +48,11 @@ class MixedFusedLayerNorm(torch.nn.Module):
super
(
MixedFusedLayerNorm
,
self
).
__init__
()
global
colossal_layer_norm_cuda
if
colossal_layer_norm_cuda
is
None
:
try
:
colossal_layer_norm_cuda
=
importlib
.
import_module
(
"colossal_layer_norm_cuda"
)
except
ImportError
:
raise
RuntimeError
(
'MixedFusedLayerNorm requires cuda extensions'
)
if
isinstance
(
normalized_shape
,
numbers
.
Integral
):
normalized_shape
=
(
normalized_shape
,)
...
...
colossalai/kernel/cuda_native/multihead_attention.py
View file @
f68eddfb
...
...
@@ -34,6 +34,7 @@ def calc_offset(sizes):
colossal_multihead_attention
=
None
@
dataclass
class
Config
:
max_batch_tokens
:
int
# max batch token numbers
...
...
@@ -94,7 +95,7 @@ class MultiHeadAttention1DFunc(Function):
input_mask
=
input_mask
.
to
(
torch
.
half
)
grad_input
,
grad_in_proj_weight
,
grad_in_proj_bias
,
grad_out_proj_weight
,
\
grad_out_proj_bias
,
grad_norm_weight
,
grad_norm_bias
=
backward_func
(
ctx
.
config
.
layer_id
,
grad_output
,
output
,
input
,
input_mask
,
in_proj_weight
,
\
ctx
.
config
.
layer_id
,
grad_output
,
output
,
input
,
input_mask
,
in_proj_weight
,
in_proj_bias
,
out_proj_weight
,
out_proj_bias
,
norm_weight
,
norm_bias
)
return
(
grad_input
,
None
,
grad_in_proj_weight
,
grad_in_proj_bias
,
grad_out_proj_weight
,
...
...
@@ -142,7 +143,10 @@ class MultiHeadAttention(nn.Module):
# Load cuda modules if needed
global
colossal_multihead_attention
if
colossal_multihead_attention
is
None
:
try
:
colossal_multihead_attention
=
importlib
.
import_module
(
"colossal_multihead_attention"
)
except
ImportError
:
raise
RuntimeError
(
'MultiHeadAttention requires cuda extensions'
)
# create the layer in cuda kernels.
cuda_module
=
colossal_multihead_attention
...
...
colossalai/kernel/cuda_native/scaled_softmax.py
View file @
f68eddfb
...
...
@@ -21,7 +21,10 @@ class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function):
@
staticmethod
def
forward
(
ctx
,
inputs
,
scale
):
try
:
import
colossal_scaled_upper_triang_masked_softmax
except
ImportError
:
raise
RuntimeError
(
'ScaledUpperTriangMaskedSoftmax requires cuda extensions'
)
scale_t
=
torch
.
tensor
([
scale
])
softmax_results
=
colossal_scaled_upper_triang_masked_softmax
.
forward
(
...
...
@@ -33,7 +36,10 @@ class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function):
@
staticmethod
def
backward
(
ctx
,
output_grads
):
try
:
import
colossal_scaled_upper_triang_masked_softmax
except
ImportError
:
raise
RuntimeError
(
'ScaledUpperTriangMaskedSoftmax requires cuda extensions'
)
softmax_results
,
scale_t
=
ctx
.
saved_tensors
input_grads
=
colossal_scaled_upper_triang_masked_softmax
.
backward
(
...
...
@@ -53,7 +59,10 @@ class ScaledMaskedSoftmax(torch.autograd.Function):
@
staticmethod
def
forward
(
ctx
,
inputs
,
mask
,
scale
):
try
:
import
colossal_scaled_masked_softmax
except
ImportError
:
raise
RuntimeError
(
'ScaledMaskedSoftmax requires cuda extensions'
)
scale_t
=
torch
.
tensor
([
scale
])
...
...
@@ -63,7 +72,10 @@ class ScaledMaskedSoftmax(torch.autograd.Function):
@
staticmethod
def
backward
(
ctx
,
output_grads
):
try
:
import
colossal_scaled_masked_softmax
except
ImportError
:
raise
RuntimeError
(
'ScaledMaskedSoftmax requires cuda extensions'
)
softmax_results
,
scale_t
=
ctx
.
saved_tensors
...
...
@@ -179,6 +191,9 @@ class FusedScaleMaskSoftmax(nn.Module):
@
staticmethod
def
get_batch_per_block
(
sq
,
sk
,
b
,
np
):
try
:
import
colossal_scaled_masked_softmax
except
ImportError
:
raise
RuntimeError
(
'ScaledMaskedSoftmax requires cuda extensions'
)
return
colossal_scaled_masked_softmax
.
get_batch_per_block
(
sq
,
sk
,
b
,
np
)
colossalai/kernel/jit/__init__.py
View file @
f68eddfb
from
.option
import
_set_jit_fusion_options
from
.bias_dropout_add
import
bias_dropout_add_fused_train
,
bias_dropout_add_fused_inference
from
.bias_gelu
import
bias_gelu_impl
_set_jit_fusion_options
()
__all__
=
[
"bias_dropout_add_fused_train"
,
"bias_dropout_add_fused_inference"
,
"bias_gelu_impl"
,
]
colossalai/kernel/jit/option.py
View file @
f68eddfb
...
...
@@ -2,6 +2,7 @@ import torch
JIT_OPTIONS_SET
=
False
def
_set_jit_fusion_options
():
"""Set PyTorch JIT layer fusion options."""
global
JIT_OPTIONS_SET
...
...
colossalai/nn/optimizer/fused_adam.py
View file @
f68eddfb
...
...
@@ -65,8 +65,7 @@ class FusedAdam(torch.optim.Optimizer):
self
.
_dummy_overflow_buf
=
torch
.
cuda
.
IntTensor
([
0
])
self
.
multi_tensor_adam
=
colossal_C
.
multi_tensor_adam
else
:
raise
RuntimeError
(
'apex.optimizers.FusedAdam requires cuda extensions'
)
raise
RuntimeError
(
'FusedAdam requires cuda extensions'
)
def
zero_grad
(
self
):
if
self
.
set_grad_none
:
...
...
colossalai/nn/optimizer/fused_lamb.py
View file @
f68eddfb
...
...
@@ -73,8 +73,7 @@ class FusedLAMB(torch.optim.Optimizer):
[
0
],
dtype
=
torch
.
int
,
device
=
self
.
param_groups
[
0
][
"params"
][
0
].
device
)
self
.
multi_tensor_lamb
=
colossal_C
.
multi_tensor_lamb
else
:
raise
RuntimeError
(
'apex.optimizers.FusedLAMB requires cuda extensions'
)
raise
RuntimeError
(
'FusedLAMB requires cuda extensions'
)
self
.
adam_w_mode
=
1
if
adam_w_mode
else
0
self
.
set_grad_none
=
set_grad_none
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment