Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
60821f53
"src/fastertransformer/vscode:/vscode.git/clone" did not exist on "fe46dac2c2ea1a988929fba05e9d3d3c9b11dfd7"
Unverified
Commit
60821f53
authored
Oct 16, 2021
by
Masaki Kozuki
Committed by
GitHub
Oct 15, 2021
Browse files
replace (#1191)
parent
1d5f7e55
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
18 additions
and
18 deletions
+18
-18
apex/contrib/csrc/multihead_attn/dropout.h
apex/contrib/csrc/multihead_attn/dropout.h
+4
-4
apex/contrib/csrc/multihead_attn/strided_batched_gemm.h
apex/contrib/csrc/multihead_attn/strided_batched_gemm.h
+1
-1
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
+7
-7
apex/contrib/csrc/optimizers/multi_tensor_distopt_adam_kernel.cu
...ntrib/csrc/optimizers/multi_tensor_distopt_adam_kernel.cu
+1
-1
apex/contrib/csrc/transducer/transducer_joint_kernel.cu
apex/contrib/csrc/transducer/transducer_joint_kernel.cu
+1
-1
apex/contrib/csrc/transducer/transducer_loss_kernel.cu
apex/contrib/csrc/transducer/transducer_loss_kernel.cu
+2
-2
apex/contrib/csrc/xentropy/xentropy_kernel.cu
apex/contrib/csrc/xentropy/xentropy_kernel.cu
+2
-2
No files found.
apex/contrib/csrc/multihead_attn/dropout.h
View file @
60821f53
...
@@ -222,7 +222,7 @@ void apex_fused_dropout_cuda(scalar_t const *inputs,
...
@@ -222,7 +222,7 @@ void apex_fused_dropout_cuda(scalar_t const *inputs,
}
}
apex_fused_dropout_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
outputs
,
mask
,
totalElements
,
p
,
rng_engine_inputs
);
apex_fused_dropout_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
outputs
,
mask
,
totalElements
,
p
,
rng_engine_inputs
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
template
<
template
<
...
@@ -260,7 +260,7 @@ void apex_dropout_add_cuda(scalar_t const *inputs,
...
@@ -260,7 +260,7 @@ void apex_dropout_add_cuda(scalar_t const *inputs,
}
}
apex_dropout_add_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
add_inputs
,
outputs
,
mask
,
totalElements
,
p
,
rng_engine_inputs
);
apex_dropout_add_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
add_inputs
,
outputs
,
mask
,
totalElements
,
p
,
rng_engine_inputs
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
template
<
template
<
...
@@ -281,7 +281,7 @@ void apex_add_cuda(scalar_t const *inputs,
...
@@ -281,7 +281,7 @@ void apex_add_cuda(scalar_t const *inputs,
grid
.
x
=
std
::
min
((
unsigned
int
)
at
::
cuda
::
getCurrentDeviceProperties
()
->
multiProcessorCount
*
blocks_per_sm
,
grid
.
x
);
grid
.
x
=
std
::
min
((
unsigned
int
)
at
::
cuda
::
getCurrentDeviceProperties
()
->
multiProcessorCount
*
blocks_per_sm
,
grid
.
x
);
apex_add_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
add_inputs
,
outputs
,
totalElements
);
apex_add_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
add_inputs
,
outputs
,
totalElements
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
template
<
typename
scalar_t
,
template
<
typename
scalar_t
,
...
@@ -302,7 +302,7 @@ void apex_masked_scale_cuda(scalar_t const *inputs,
...
@@ -302,7 +302,7 @@ void apex_masked_scale_cuda(scalar_t const *inputs,
grid
.
x
=
std
::
min
((
unsigned
int
)
at
::
cuda
::
getCurrentDeviceProperties
()
->
multiProcessorCount
*
blocks_per_sm
,
grid
.
x
);
grid
.
x
=
std
::
min
((
unsigned
int
)
at
::
cuda
::
getCurrentDeviceProperties
()
->
multiProcessorCount
*
blocks_per_sm
,
grid
.
x
);
apex_masked_scale_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
outputs
,
mask
,
totalElements
,
scale
);
apex_masked_scale_kernel
<
scalar_t
,
accscalar_t
,
IndexType
><<<
grid
,
dim_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
inputs
,
outputs
,
mask
,
totalElements
,
scale
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
apex/contrib/csrc/multihead_attn/strided_batched_gemm.h
View file @
60821f53
...
@@ -133,7 +133,7 @@ void CutlassGemm_FP32Accum(cudaStream_t stream, long m, long n, long k,
...
@@ -133,7 +133,7 @@ void CutlassGemm_FP32Accum(cudaStream_t stream, long m, long n, long k,
AT_ASSERTM
(
result
==
0
,
"Failed to initialize CUTLASS Gemm::Params object."
);
AT_ASSERTM
(
result
==
0
,
"Failed to initialize CUTLASS Gemm::Params object."
);
// Launch the CUTLASS GEMM kernel.
// Launch the CUTLASS GEMM kernel.
THCudaCheck
(
Gemm
::
launch
(
params
,
stream
));
C10_CUDA_CHECK
(
Gemm
::
launch
(
params
,
stream
));
// Update batched GEMM params based on completed work
// Update batched GEMM params based on completed work
batchesLeft
=
batchesLeft
-
iterBatchCount
;
batchesLeft
=
batchesLeft
-
iterBatchCount
;
...
...
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
View file @
60821f53
...
@@ -275,7 +275,7 @@ void fused_adam_cuda(
...
@@ -275,7 +275,7 @@ void fused_adam_cuda(
decay
);
decay
);
);
);
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
...
@@ -382,7 +382,7 @@ void fused_adam_cuda_mt(
...
@@ -382,7 +382,7 @@ void fused_adam_cuda_mt(
);
);
}
}
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
template
<
typename
FROM_T
,
typename
TO_T
>
template
<
typename
FROM_T
,
typename
TO_T
>
...
@@ -807,7 +807,7 @@ void fused_strided_check_finite(
...
@@ -807,7 +807,7 @@ void fused_strided_check_finite(
stride
,
stride
,
clear_overflow_first
);
clear_overflow_first
);
);
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
void
fused_reversible_adam_cuda
(
void
fused_reversible_adam_cuda
(
...
@@ -908,7 +908,7 @@ void fused_reversible_adam_cuda(
...
@@ -908,7 +908,7 @@ void fused_reversible_adam_cuda(
decay
);
decay
);
);
);
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
void
maybe_cast_cuda
(
void
maybe_cast_cuda
(
...
@@ -932,7 +932,7 @@ void maybe_cast_cuda(
...
@@ -932,7 +932,7 @@ void maybe_cast_cuda(
p_in
.
DATA_PTR
<
scalar_t_0
>
(),
p_in
.
DATA_PTR
<
scalar_t_0
>
(),
p_out
.
DATA_PTR
<
scalar_t_1
>
(),
p_out
.
DATA_PTR
<
scalar_t_1
>
(),
tsize
);
))
tsize
);
))
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
void
maybe_cast_cuda_mt
(
void
maybe_cast_cuda_mt
(
...
@@ -954,7 +954,7 @@ void maybe_cast_cuda_mt(
...
@@ -954,7 +954,7 @@ void maybe_cast_cuda_mt(
overflow_flag
,
overflow_flag
,
tensor_lists
,
tensor_lists
,
MaybeCastFunctor
<
2
,
scalar_t_0
,
scalar_t_1
>
());
))
MaybeCastFunctor
<
2
,
scalar_t_0
,
scalar_t_1
>
());
))
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
void
fused_maybe_adam_undo_cuda
(
void
fused_maybe_adam_undo_cuda
(
...
@@ -1032,5 +1032,5 @@ void fused_maybe_adam_undo_cuda(
...
@@ -1032,5 +1032,5 @@ void fused_maybe_adam_undo_cuda(
decay
);
decay
);
);
);
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
apex/contrib/csrc/optimizers/multi_tensor_distopt_adam_kernel.cu
View file @
60821f53
...
@@ -225,5 +225,5 @@ void multi_tensor_fused_adam_cuda(
...
@@ -225,5 +225,5 @@ void multi_tensor_fused_adam_cuda(
(
adamMode_t
)
mode
);
(
adamMode_t
)
mode
);
);
);
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
}
}
apex/contrib/csrc/transducer/transducer_joint_kernel.cu
View file @
60821f53
...
@@ -822,7 +822,7 @@ std::vector<torch::Tensor> transducer_joint_cuda_forward(
...
@@ -822,7 +822,7 @@ std::vector<torch::Tensor> transducer_joint_cuda_forward(
}));
}));
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
if
(
masked
)
if
(
masked
)
return
{
sum
,
mask
};
return
{
sum
,
mask
};
else
else
...
...
apex/contrib/csrc/transducer/transducer_loss_kernel.cu
View file @
60821f53
...
@@ -639,7 +639,7 @@ std::vector<torch::Tensor> transducer_loss_cuda_forward(
...
@@ -639,7 +639,7 @@ std::vector<torch::Tensor> transducer_loss_cuda_forward(
loss
.
data_ptr
<
scalar_t
>
());
loss
.
data_ptr
<
scalar_t
>
());
}));
}));
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
return
{
alpha
,
beta
,
loss
};
return
{
alpha
,
beta
,
loss
};
}
}
...
@@ -760,7 +760,7 @@ torch::Tensor transducer_loss_cuda_backward(
...
@@ -760,7 +760,7 @@ torch::Tensor transducer_loss_cuda_backward(
xGrad
.
data_ptr
<
scalar_t
>
());
xGrad
.
data_ptr
<
scalar_t
>
());
}));
}));
}
}
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
return
xGrad
;
return
xGrad
;
}
}
apex/contrib/csrc/xentropy/xentropy_kernel.cu
View file @
60821f53
...
@@ -629,7 +629,7 @@ std::vector<Tensor> host_softmax_xentropy(
...
@@ -629,7 +629,7 @@ std::vector<Tensor> host_softmax_xentropy(
}
}
);
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
std
::
vector
<
at
::
Tensor
>
ret
=
{
losses
,
max_log_sum_exp
};
std
::
vector
<
at
::
Tensor
>
ret
=
{
losses
,
max_log_sum_exp
};
return
ret
;
return
ret
;
...
@@ -699,7 +699,7 @@ Tensor host_softmax_xentropy_backward(
...
@@ -699,7 +699,7 @@ Tensor host_softmax_xentropy_backward(
}
}
);
);
THCudaCheck
(
cudaGetLastError
());
C10_CUDA_CHECK
(
cudaGetLastError
());
return
gI
;
return
gI
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment