Commit fec3141c authored by Hubert Lu's avatar Hubert Lu
Browse files

Replace THCudaCheck with C10_CUDA_CHECK

parent 2155dabf
...@@ -5,14 +5,9 @@ ...@@ -5,14 +5,9 @@
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
<<<<<<< HEAD
//#include <cuda_profiler_api.h> //#include <cuda_profiler_api.h>
#include "THC/THC.h"
=======
#include <cuda_profiler_api.h>
#include <ATen/ATen.h> #include <ATen/ATen.h>
>>>>>>> 0c7d8e3 (remove THC headers/functions (#1192))
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <torch/extension.h> #include <torch/extension.h>
......
...@@ -220,7 +220,7 @@ void apex_fused_dropout_cuda(scalar_t const *inputs, ...@@ -220,7 +220,7 @@ void apex_fused_dropout_cuda(scalar_t const *inputs,
} }
apex_fused_dropout_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, p, rng_engine_inputs); apex_fused_dropout_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, p, rng_engine_inputs);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template < template <
...@@ -258,7 +258,7 @@ void apex_dropout_add_cuda(scalar_t const *inputs, ...@@ -258,7 +258,7 @@ void apex_dropout_add_cuda(scalar_t const *inputs,
} }
apex_dropout_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, mask, totalElements, p, rng_engine_inputs); apex_dropout_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, mask, totalElements, p, rng_engine_inputs);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template < template <
...@@ -279,7 +279,7 @@ void apex_add_cuda(scalar_t const *inputs, ...@@ -279,7 +279,7 @@ void apex_add_cuda(scalar_t const *inputs,
grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x); grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x);
apex_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, totalElements); apex_add_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, add_inputs, outputs, totalElements);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template<typename scalar_t, template<typename scalar_t,
...@@ -300,7 +300,7 @@ void apex_masked_scale_cuda(scalar_t const *inputs, ...@@ -300,7 +300,7 @@ void apex_masked_scale_cuda(scalar_t const *inputs,
grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x); grid.x = std::min((unsigned int)at::cuda::getCurrentDeviceProperties()->multiProcessorCount * blocks_per_sm, grid.x);
apex_masked_scale_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, scale); apex_masked_scale_kernel<scalar_t, accscalar_t, IndexType><<<grid, dim_block, 0, at::cuda::getCurrentCUDAStream()>>>(inputs, outputs, mask, totalElements, scale);
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -502,7 +502,7 @@ std::vector<torch::Tensor> bwd_cuda( ...@@ -502,7 +502,7 @@ std::vector<torch::Tensor> bwd_cuda(
algo, algo,
solution_index, solution_index,
flags)); flags));
TORCH_CUDABLAS_CHECK(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH)); //TORCH_CUDABLAS_CHECK(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH));
return { return {
input_grads, input_grads,
......
...@@ -276,7 +276,7 @@ void fused_adam_cuda( ...@@ -276,7 +276,7 @@ void fused_adam_cuda(
decay); decay);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -383,7 +383,7 @@ void fused_adam_cuda_mt( ...@@ -383,7 +383,7 @@ void fused_adam_cuda_mt(
); );
} }
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
template <typename FROM_T, typename TO_T> template <typename FROM_T, typename TO_T>
...@@ -808,7 +808,7 @@ void fused_strided_check_finite( ...@@ -808,7 +808,7 @@ void fused_strided_check_finite(
stride, stride,
clear_overflow_first); clear_overflow_first);
); );
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void fused_reversible_adam_cuda( void fused_reversible_adam_cuda(
...@@ -909,7 +909,7 @@ void fused_reversible_adam_cuda( ...@@ -909,7 +909,7 @@ void fused_reversible_adam_cuda(
decay); decay);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void maybe_cast_cuda( void maybe_cast_cuda(
...@@ -933,7 +933,7 @@ void maybe_cast_cuda( ...@@ -933,7 +933,7 @@ void maybe_cast_cuda(
p_in.DATA_PTR<scalar_t_0>(), p_in.DATA_PTR<scalar_t_0>(),
p_out.DATA_PTR<scalar_t_1>(), p_out.DATA_PTR<scalar_t_1>(),
tsize); )) tsize); ))
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void maybe_cast_cuda_mt( void maybe_cast_cuda_mt(
...@@ -955,7 +955,7 @@ void maybe_cast_cuda_mt( ...@@ -955,7 +955,7 @@ void maybe_cast_cuda_mt(
overflow_flag, overflow_flag,
tensor_lists, tensor_lists,
MaybeCastFunctor<2, scalar_t_0, scalar_t_1>()); )) MaybeCastFunctor<2, scalar_t_0, scalar_t_1>()); ))
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
void fused_maybe_adam_undo_cuda( void fused_maybe_adam_undo_cuda(
...@@ -1033,5 +1033,5 @@ void fused_maybe_adam_undo_cuda( ...@@ -1033,5 +1033,5 @@ void fused_maybe_adam_undo_cuda(
decay); decay);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -224,5 +224,5 @@ void multi_tensor_fused_adam_cuda( ...@@ -224,5 +224,5 @@ void multi_tensor_fused_adam_cuda(
(adamMode_t) mode); (adamMode_t) mode);
); );
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
} }
...@@ -823,7 +823,7 @@ std::vector<torch::Tensor> transducer_joint_cuda_forward( ...@@ -823,7 +823,7 @@ std::vector<torch::Tensor> transducer_joint_cuda_forward(
})); }));
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
if (masked) if (masked)
return {sum, mask}; return {sum, mask};
else else
......
...@@ -640,7 +640,7 @@ std::vector<torch::Tensor> transducer_loss_cuda_forward( ...@@ -640,7 +640,7 @@ std::vector<torch::Tensor> transducer_loss_cuda_forward(
loss.data_ptr<scalar_t>()); loss.data_ptr<scalar_t>());
})); }));
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
return {alpha, beta, loss}; return {alpha, beta, loss};
} }
...@@ -761,7 +761,7 @@ torch::Tensor transducer_loss_cuda_backward( ...@@ -761,7 +761,7 @@ torch::Tensor transducer_loss_cuda_backward(
xGrad.data_ptr<scalar_t>()); xGrad.data_ptr<scalar_t>());
})); }));
} }
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
return xGrad; return xGrad;
} }
...@@ -634,7 +634,7 @@ std::vector<Tensor> host_softmax_xentropy( ...@@ -634,7 +634,7 @@ std::vector<Tensor> host_softmax_xentropy(
} }
); );
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
std::vector<at::Tensor> ret = {losses, max_log_sum_exp}; std::vector<at::Tensor> ret = {losses, max_log_sum_exp};
return ret; return ret;
...@@ -704,7 +704,7 @@ Tensor host_softmax_xentropy_backward( ...@@ -704,7 +704,7 @@ Tensor host_softmax_xentropy_backward(
} }
); );
THCudaCheck(cudaGetLastError()); C10_CUDA_CHECK(cudaGetLastError());
return gI; return gI;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment