"src/fastertransformer/vscode:/vscode.git/clone" did not exist on "fe46dac2c2ea1a988929fba05e9d3d3c9b11dfd7"
Unverified Commit 60821f53 authored by Masaki Kozuki's avatar Masaki Kozuki Committed by GitHub
Browse files

replace (#1191)

parent 1d5f7e55
...@@ -222,7 +222,7 @@ void apex_fused_dropout_cuda(scalar_t const *inputs, ...@@ -222,7 +222,7 @@ void apex_fused_dropout_cuda(scalar_t const *inputs,
} }
apex_fused_dropout_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, p, rng_engine_inputs); apex_fused_dropout_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, p, rng_engine_inputs);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template < template <
...@@ -260,7 +260,7 @@ void apex_dropout_add_cuda(scalar_t const *inputs, ...@@ -260,7 +260,7 @@ void apex_dropout_add_cuda(scalar_t const *inputs,
} }
apex_dropout_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, mask, totalElements, p, rng_engine_inputs); apex_dropout_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, mask, totalElements, p, rng_engine_inputs);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template < template <
...@@ -281,7 +281,7 @@ void apex_add_cuda(scalar_t const *inputs, ...@@ -281,7 +281,7 @@ void apex_add_cuda(scalar_t const *inputs,
grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x); grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x);
apex_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, totalElements); apex_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, totalElements);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template<typename scalar_t, template<typename scalar_t,
...@@ -302,7 +302,7 @@ void apex_masked_scale_cuda(scalar_t const *inputs, ...@@ -302,7 +302,7 @@ void apex_masked_scale_cuda(scalar_t const *inputs,
grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x); grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x);
apex_masked_scale_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, scale); apex_masked_scale_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, scale);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -133,7 +133,7 @@ void CutlassGemm_FP32Accum(cudaStream_t stream, long m, long n, long k, ...@@ -133,7 +133,7 @@ void CutlassGemm_FP32Accum(cudaStream_t stream, long m, long n, long k,
AT_ASSERTM(result == 0, "Failed to initialize CUTLASS Gemm::Params object."); AT_ASSERTM(result == 0, "Failed to initialize CUTLASS Gemm::Params object.");
// Launch the CUTLASS GEMM kernel. // Launch the CUTLASS GEMM kernel.
THCudaCheck(Gemm::launch(params, stream)); C10_CUDA_CHECK(Gemm::launch(params, stream));
// Update batched GEMM params based on completed work // Update batched GEMM params based on completed work
batchesLeft = batchesLeft - iterBatchCount; batchesLeft = batchesLeft - iterBatchCount;
......
...@@ -275,7 +275,7 @@ void fused_adam_cuda( ...@@ -275,7 +275,7 @@ void fused_adam_cuda(
decay); decay);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -382,7 +382,7 @@ void fused_adam_cuda_mt( ...@@ -382,7 +382,7 @@ void fused_adam_cuda_mt(
); );
} }
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template <typename FROM_T, typename TO_T> template <typename FROM_T, typename TO_T>
...@@ -807,7 +807,7 @@ void fused_strided_check_finite( ...@@ -807,7 +807,7 @@ void fused_strided_check_finite(
stride, stride,
clear_overflow_first); clear_overflow_first);
); );
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void fused_reversible_adam_cuda( void fused_reversible_adam_cuda(
...@@ -908,7 +908,7 @@ void fused_reversible_adam_cuda( ...@@ -908,7 +908,7 @@ void fused_reversible_adam_cuda(
decay); decay);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void maybe_cast_cuda( void maybe_cast_cuda(
...@@ -932,7 +932,7 @@ void maybe_cast_cuda( ...@@ -932,7 +932,7 @@ void maybe_cast_cuda(
p_in.DATA_PTR<scalar_t_0>(), p_in.DATA_PTR<scalar_t_0>(),
p_out.DATA_PTR<scalar_t_1>(), p_out.DATA_PTR<scalar_t_1>(),
tsize); )) tsize); ))
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void maybe_cast_cuda_mt( void maybe_cast_cuda_mt(
...@@ -954,7 +954,7 @@ void maybe_cast_cuda_mt( ...@@ -954,7 +954,7 @@ void maybe_cast_cuda_mt(
overflow_flag, overflow_flag,
tensor_lists, tensor_lists,
MaybeCastFunctor<2, scalar_t_0, scalar_t_1>()); )) MaybeCastFunctor<2, scalar_t_0, scalar_t_1>()); ))
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void fused_maybe_adam_undo_cuda( void fused_maybe_adam_undo_cuda(
...@@ -1032,5 +1032,5 @@ void fused_maybe_adam_undo_cuda( ...@@ -1032,5 +1032,5 @@ void fused_maybe_adam_undo_cuda(
decay); decay);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -225,5 +225,5 @@ void multi_tensor_fused_adam_cuda( ...@@ -225,5 +225,5 @@ void multi_tensor_fused_adam_cuda(
(adamMode_t) mode); (adamMode_t) mode);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -822,7 +822,7 @@ std::vector<torch::Tensor> transducer_joint_cuda_forward( ...@@ -822,7 +822,7 @@ std::vector<torch::Tensor> transducer_joint_cuda_forward(
})); }));
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
if (masked) if (masked)
return {sum, mask}; return {sum, mask};
else else
......
...@@ -639,7 +639,7 @@ std::vector<torch::Tensor> transducer_loss_cuda_forward( ...@@ -639,7 +639,7 @@ std::vector<torch::Tensor> transducer_loss_cuda_forward(
loss.data_ptr<scalar_t>()); loss.data_ptr<scalar_t>());
})); }));
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
return {alpha, beta, loss}; return {alpha, beta, loss};
} }
...@@ -760,7 +760,7 @@ torch::Tensor transducer_loss_cuda_backward( ...@@ -760,7 +760,7 @@ torch::Tensor transducer_loss_cuda_backward(
xGrad.data_ptr<scalar_t>()); xGrad.data_ptr<scalar_t>());
})); }));
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
return xGrad; return xGrad;
} }
...@@ -629,7 +629,7 @@ std::vector<Tensor> host_softmax_xentropy( ...@@ -629,7 +629,7 @@ std::vector<Tensor> host_softmax_xentropy(
} }
); );
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
std::vector<at::Tensor> ret = {losses, max_log_sum_exp}; std::vector<at::Tensor> ret = {losses, max_log_sum_exp};
return ret; return ret;
...@@ -699,7 +699,7 @@ Tensor host_softmax_xentropy_backward( ...@@ -699,7 +699,7 @@ Tensor host_softmax_xentropy_backward(
} }
); );
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
return gI; return gI;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment