Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastMoE
Commits
7d695acb
"src/libtorchaudio/sox/io.cpp" did not exist on "2c8665decd52df033a371964127b7e8fbfe4bec0"
Commit
7d695acb
authored
Mar 04, 2021
by
Rick Ho
Browse files
remove dependency on cuda sample repo
parent
7f784c84
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
584 additions
and
10 deletions
+584
-10
cuda/cuda_stream_manager.cpp
cuda/cuda_stream_manager.cpp
+0
-1
cuda/cuda_stream_manager.h
cuda/cuda_stream_manager.h
+1
-2
cuda/helper_cuda.h
cuda/helper_cuda.h
+579
-0
cuda/moe_comm_kernel.cu
cuda/moe_comm_kernel.cu
+0
-1
cuda/moe_compute_kernel.cu
cuda/moe_compute_kernel.cu
+0
-1
cuda/moe_cuda_kernel.h
cuda/moe_cuda_kernel.h
+1
-0
cuda/moe_fused_kernel.cu
cuda/moe_fused_kernel.cu
+0
-1
requirements.txt
requirements.txt
+1
-0
setup.py
setup.py
+2
-4
No files found.
cuda/cuda_stream_manager.cpp
View file @
7d695acb
...
...
@@ -5,7 +5,6 @@
#include <iostream>
#include "cuda_stream_manager.h"
#include <helper_cuda.h>
#define SMGR_N_STREAMS 16
...
...
cuda/cuda_stream_manager.h
View file @
7d695acb
#ifndef CUDA_STREAM_MANAGER_H
#define CUDA_STREAM_MANAGER_H
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include "helper_cuda.h"
#ifdef MOE_USE_NCCL
#include <nccl.h>
...
...
cuda/helper_cuda.h
0 → 100644
View file @
7d695acb
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions for initialization and error checking
// This file is clipped from the original header file by laekov
#include <cuda_runtime.h>
#include <cublas_v2.h>
#ifndef HELPER_CUDA_H
#define HELPER_CUDA_H
static
const
char
*
_cudaGetErrorEnum
(
cudaError_t
error
)
{
return
cudaGetErrorName
(
error
);
}
#ifdef CUDA_DRIVER_API
// CUDA Driver API errors
static
const
char
*
_cudaGetErrorEnum
(
CUresult
error
)
{
static
char
unknown
[]
=
"<unknown>"
;
const
char
*
ret
=
NULL
;
cuGetErrorName
(
error
,
&
ret
);
return
ret
?
ret
:
unknown
;
}
#endif
// cuBLAS API errors
static
const
char
*
_cudaGetErrorEnum
(
cublasStatus_t
error
)
{
switch
(
error
)
{
case
CUBLAS_STATUS_SUCCESS
:
return
"CUBLAS_STATUS_SUCCESS"
;
case
CUBLAS_STATUS_NOT_INITIALIZED
:
return
"CUBLAS_STATUS_NOT_INITIALIZED"
;
case
CUBLAS_STATUS_ALLOC_FAILED
:
return
"CUBLAS_STATUS_ALLOC_FAILED"
;
case
CUBLAS_STATUS_INVALID_VALUE
:
return
"CUBLAS_STATUS_INVALID_VALUE"
;
case
CUBLAS_STATUS_ARCH_MISMATCH
:
return
"CUBLAS_STATUS_ARCH_MISMATCH"
;
case
CUBLAS_STATUS_MAPPING_ERROR
:
return
"CUBLAS_STATUS_MAPPING_ERROR"
;
case
CUBLAS_STATUS_EXECUTION_FAILED
:
return
"CUBLAS_STATUS_EXECUTION_FAILED"
;
case
CUBLAS_STATUS_INTERNAL_ERROR
:
return
"CUBLAS_STATUS_INTERNAL_ERROR"
;
case
CUBLAS_STATUS_NOT_SUPPORTED
:
return
"CUBLAS_STATUS_NOT_SUPPORTED"
;
case
CUBLAS_STATUS_LICENSE_ERROR
:
return
"CUBLAS_STATUS_LICENSE_ERROR"
;
}
return
"<unknown>"
;
}
#ifdef _CUFFT_H_
// cuFFT API errors
static
const
char
*
_cudaGetErrorEnum
(
cufftResult
error
)
{
switch
(
error
)
{
case
CUFFT_SUCCESS
:
return
"CUFFT_SUCCESS"
;
case
CUFFT_INVALID_PLAN
:
return
"CUFFT_INVALID_PLAN"
;
case
CUFFT_ALLOC_FAILED
:
return
"CUFFT_ALLOC_FAILED"
;
case
CUFFT_INVALID_TYPE
:
return
"CUFFT_INVALID_TYPE"
;
case
CUFFT_INVALID_VALUE
:
return
"CUFFT_INVALID_VALUE"
;
case
CUFFT_INTERNAL_ERROR
:
return
"CUFFT_INTERNAL_ERROR"
;
case
CUFFT_EXEC_FAILED
:
return
"CUFFT_EXEC_FAILED"
;
case
CUFFT_SETUP_FAILED
:
return
"CUFFT_SETUP_FAILED"
;
case
CUFFT_INVALID_SIZE
:
return
"CUFFT_INVALID_SIZE"
;
case
CUFFT_UNALIGNED_DATA
:
return
"CUFFT_UNALIGNED_DATA"
;
case
CUFFT_INCOMPLETE_PARAMETER_LIST
:
return
"CUFFT_INCOMPLETE_PARAMETER_LIST"
;
case
CUFFT_INVALID_DEVICE
:
return
"CUFFT_INVALID_DEVICE"
;
case
CUFFT_PARSE_ERROR
:
return
"CUFFT_PARSE_ERROR"
;
case
CUFFT_NO_WORKSPACE
:
return
"CUFFT_NO_WORKSPACE"
;
case
CUFFT_NOT_IMPLEMENTED
:
return
"CUFFT_NOT_IMPLEMENTED"
;
case
CUFFT_LICENSE_ERROR
:
return
"CUFFT_LICENSE_ERROR"
;
case
CUFFT_NOT_SUPPORTED
:
return
"CUFFT_NOT_SUPPORTED"
;
}
return
"<unknown>"
;
}
#endif
#ifdef CUSPARSEAPI
// cuSPARSE API errors
static
const
char
*
_cudaGetErrorEnum
(
cusparseStatus_t
error
)
{
switch
(
error
)
{
case
CUSPARSE_STATUS_SUCCESS
:
return
"CUSPARSE_STATUS_SUCCESS"
;
case
CUSPARSE_STATUS_NOT_INITIALIZED
:
return
"CUSPARSE_STATUS_NOT_INITIALIZED"
;
case
CUSPARSE_STATUS_ALLOC_FAILED
:
return
"CUSPARSE_STATUS_ALLOC_FAILED"
;
case
CUSPARSE_STATUS_INVALID_VALUE
:
return
"CUSPARSE_STATUS_INVALID_VALUE"
;
case
CUSPARSE_STATUS_ARCH_MISMATCH
:
return
"CUSPARSE_STATUS_ARCH_MISMATCH"
;
case
CUSPARSE_STATUS_MAPPING_ERROR
:
return
"CUSPARSE_STATUS_MAPPING_ERROR"
;
case
CUSPARSE_STATUS_EXECUTION_FAILED
:
return
"CUSPARSE_STATUS_EXECUTION_FAILED"
;
case
CUSPARSE_STATUS_INTERNAL_ERROR
:
return
"CUSPARSE_STATUS_INTERNAL_ERROR"
;
case
CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED
:
return
"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"
;
}
return
"<unknown>"
;
}
#endif
#ifdef CUSOLVER_COMMON_H_
// cuSOLVER API errors
static
const
char
*
_cudaGetErrorEnum
(
cusolverStatus_t
error
)
{
switch
(
error
)
{
case
CUSOLVER_STATUS_SUCCESS
:
return
"CUSOLVER_STATUS_SUCCESS"
;
case
CUSOLVER_STATUS_NOT_INITIALIZED
:
return
"CUSOLVER_STATUS_NOT_INITIALIZED"
;
case
CUSOLVER_STATUS_ALLOC_FAILED
:
return
"CUSOLVER_STATUS_ALLOC_FAILED"
;
case
CUSOLVER_STATUS_INVALID_VALUE
:
return
"CUSOLVER_STATUS_INVALID_VALUE"
;
case
CUSOLVER_STATUS_ARCH_MISMATCH
:
return
"CUSOLVER_STATUS_ARCH_MISMATCH"
;
case
CUSOLVER_STATUS_MAPPING_ERROR
:
return
"CUSOLVER_STATUS_MAPPING_ERROR"
;
case
CUSOLVER_STATUS_EXECUTION_FAILED
:
return
"CUSOLVER_STATUS_EXECUTION_FAILED"
;
case
CUSOLVER_STATUS_INTERNAL_ERROR
:
return
"CUSOLVER_STATUS_INTERNAL_ERROR"
;
case
CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED
:
return
"CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"
;
case
CUSOLVER_STATUS_NOT_SUPPORTED
:
return
"CUSOLVER_STATUS_NOT_SUPPORTED "
;
case
CUSOLVER_STATUS_ZERO_PIVOT
:
return
"CUSOLVER_STATUS_ZERO_PIVOT"
;
case
CUSOLVER_STATUS_INVALID_LICENSE
:
return
"CUSOLVER_STATUS_INVALID_LICENSE"
;
}
return
"<unknown>"
;
}
#endif
#ifdef CURAND_H_
// cuRAND API errors
static
const
char
*
_cudaGetErrorEnum
(
curandStatus_t
error
)
{
switch
(
error
)
{
case
CURAND_STATUS_SUCCESS
:
return
"CURAND_STATUS_SUCCESS"
;
case
CURAND_STATUS_VERSION_MISMATCH
:
return
"CURAND_STATUS_VERSION_MISMATCH"
;
case
CURAND_STATUS_NOT_INITIALIZED
:
return
"CURAND_STATUS_NOT_INITIALIZED"
;
case
CURAND_STATUS_ALLOCATION_FAILED
:
return
"CURAND_STATUS_ALLOCATION_FAILED"
;
case
CURAND_STATUS_TYPE_ERROR
:
return
"CURAND_STATUS_TYPE_ERROR"
;
case
CURAND_STATUS_OUT_OF_RANGE
:
return
"CURAND_STATUS_OUT_OF_RANGE"
;
case
CURAND_STATUS_LENGTH_NOT_MULTIPLE
:
return
"CURAND_STATUS_LENGTH_NOT_MULTIPLE"
;
case
CURAND_STATUS_DOUBLE_PRECISION_REQUIRED
:
return
"CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"
;
case
CURAND_STATUS_LAUNCH_FAILURE
:
return
"CURAND_STATUS_LAUNCH_FAILURE"
;
case
CURAND_STATUS_PREEXISTING_FAILURE
:
return
"CURAND_STATUS_PREEXISTING_FAILURE"
;
case
CURAND_STATUS_INITIALIZATION_FAILED
:
return
"CURAND_STATUS_INITIALIZATION_FAILED"
;
case
CURAND_STATUS_ARCH_MISMATCH
:
return
"CURAND_STATUS_ARCH_MISMATCH"
;
case
CURAND_STATUS_INTERNAL_ERROR
:
return
"CURAND_STATUS_INTERNAL_ERROR"
;
}
return
"<unknown>"
;
}
#endif
#ifdef NVJPEGAPI
// nvJPEG API errors
static
const
char
*
_cudaGetErrorEnum
(
nvjpegStatus_t
error
)
{
switch
(
error
)
{
case
NVJPEG_STATUS_SUCCESS
:
return
"NVJPEG_STATUS_SUCCESS"
;
case
NVJPEG_STATUS_NOT_INITIALIZED
:
return
"NVJPEG_STATUS_NOT_INITIALIZED"
;
case
NVJPEG_STATUS_INVALID_PARAMETER
:
return
"NVJPEG_STATUS_INVALID_PARAMETER"
;
case
NVJPEG_STATUS_BAD_JPEG
:
return
"NVJPEG_STATUS_BAD_JPEG"
;
case
NVJPEG_STATUS_JPEG_NOT_SUPPORTED
:
return
"NVJPEG_STATUS_JPEG_NOT_SUPPORTED"
;
case
NVJPEG_STATUS_ALLOCATOR_FAILURE
:
return
"NVJPEG_STATUS_ALLOCATOR_FAILURE"
;
case
NVJPEG_STATUS_EXECUTION_FAILED
:
return
"NVJPEG_STATUS_EXECUTION_FAILED"
;
case
NVJPEG_STATUS_ARCH_MISMATCH
:
return
"NVJPEG_STATUS_ARCH_MISMATCH"
;
case
NVJPEG_STATUS_INTERNAL_ERROR
:
return
"NVJPEG_STATUS_INTERNAL_ERROR"
;
}
return
"<unknown>"
;
}
#endif
#ifdef NV_NPPIDEFS_H
// NPP API errors
static
const
char
*
_cudaGetErrorEnum
(
NppStatus
error
)
{
switch
(
error
)
{
case
NPP_NOT_SUPPORTED_MODE_ERROR
:
return
"NPP_NOT_SUPPORTED_MODE_ERROR"
;
case
NPP_ROUND_MODE_NOT_SUPPORTED_ERROR
:
return
"NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"
;
case
NPP_RESIZE_NO_OPERATION_ERROR
:
return
"NPP_RESIZE_NO_OPERATION_ERROR"
;
case
NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY
:
return
"NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"
;
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
case
NPP_BAD_ARG_ERROR
:
return
"NPP_BAD_ARGUMENT_ERROR"
;
case
NPP_COEFF_ERROR
:
return
"NPP_COEFFICIENT_ERROR"
;
case
NPP_RECT_ERROR
:
return
"NPP_RECTANGLE_ERROR"
;
case
NPP_QUAD_ERROR
:
return
"NPP_QUADRANGLE_ERROR"
;
case
NPP_MEM_ALLOC_ERR
:
return
"NPP_MEMORY_ALLOCATION_ERROR"
;
case
NPP_HISTO_NUMBER_OF_LEVELS_ERROR
:
return
"NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"
;
case
NPP_INVALID_INPUT
:
return
"NPP_INVALID_INPUT"
;
case
NPP_POINTER_ERROR
:
return
"NPP_POINTER_ERROR"
;
case
NPP_WARNING
:
return
"NPP_WARNING"
;
case
NPP_ODD_ROI_WARNING
:
return
"NPP_ODD_ROI_WARNING"
;
#else
// These are for CUDA 5.5 or higher
case
NPP_BAD_ARGUMENT_ERROR
:
return
"NPP_BAD_ARGUMENT_ERROR"
;
case
NPP_COEFFICIENT_ERROR
:
return
"NPP_COEFFICIENT_ERROR"
;
case
NPP_RECTANGLE_ERROR
:
return
"NPP_RECTANGLE_ERROR"
;
case
NPP_QUADRANGLE_ERROR
:
return
"NPP_QUADRANGLE_ERROR"
;
case
NPP_MEMORY_ALLOCATION_ERR
:
return
"NPP_MEMORY_ALLOCATION_ERROR"
;
case
NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR
:
return
"NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"
;
case
NPP_INVALID_HOST_POINTER_ERROR
:
return
"NPP_INVALID_HOST_POINTER_ERROR"
;
case
NPP_INVALID_DEVICE_POINTER_ERROR
:
return
"NPP_INVALID_DEVICE_POINTER_ERROR"
;
#endif
case
NPP_LUT_NUMBER_OF_LEVELS_ERROR
:
return
"NPP_LUT_NUMBER_OF_LEVELS_ERROR"
;
case
NPP_TEXTURE_BIND_ERROR
:
return
"NPP_TEXTURE_BIND_ERROR"
;
case
NPP_WRONG_INTERSECTION_ROI_ERROR
:
return
"NPP_WRONG_INTERSECTION_ROI_ERROR"
;
case
NPP_NOT_EVEN_STEP_ERROR
:
return
"NPP_NOT_EVEN_STEP_ERROR"
;
case
NPP_INTERPOLATION_ERROR
:
return
"NPP_INTERPOLATION_ERROR"
;
case
NPP_RESIZE_FACTOR_ERROR
:
return
"NPP_RESIZE_FACTOR_ERROR"
;
case
NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR
:
return
"NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"
;
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
case
NPP_MEMFREE_ERR
:
return
"NPP_MEMFREE_ERR"
;
case
NPP_MEMSET_ERR
:
return
"NPP_MEMSET_ERR"
;
case
NPP_MEMCPY_ERR
:
return
"NPP_MEMCPY_ERROR"
;
case
NPP_MIRROR_FLIP_ERR
:
return
"NPP_MIRROR_FLIP_ERR"
;
#else
case
NPP_MEMFREE_ERROR
:
return
"NPP_MEMFREE_ERROR"
;
case
NPP_MEMSET_ERROR
:
return
"NPP_MEMSET_ERROR"
;
case
NPP_MEMCPY_ERROR
:
return
"NPP_MEMCPY_ERROR"
;
case
NPP_MIRROR_FLIP_ERROR
:
return
"NPP_MIRROR_FLIP_ERROR"
;
#endif
case
NPP_ALIGNMENT_ERROR
:
return
"NPP_ALIGNMENT_ERROR"
;
case
NPP_STEP_ERROR
:
return
"NPP_STEP_ERROR"
;
case
NPP_SIZE_ERROR
:
return
"NPP_SIZE_ERROR"
;
case
NPP_NULL_POINTER_ERROR
:
return
"NPP_NULL_POINTER_ERROR"
;
case
NPP_CUDA_KERNEL_EXECUTION_ERROR
:
return
"NPP_CUDA_KERNEL_EXECUTION_ERROR"
;
case
NPP_NOT_IMPLEMENTED_ERROR
:
return
"NPP_NOT_IMPLEMENTED_ERROR"
;
case
NPP_ERROR
:
return
"NPP_ERROR"
;
case
NPP_SUCCESS
:
return
"NPP_SUCCESS"
;
case
NPP_WRONG_INTERSECTION_QUAD_WARNING
:
return
"NPP_WRONG_INTERSECTION_QUAD_WARNING"
;
case
NPP_MISALIGNED_DST_ROI_WARNING
:
return
"NPP_MISALIGNED_DST_ROI_WARNING"
;
case
NPP_AFFINE_QUAD_INCORRECT_WARNING
:
return
"NPP_AFFINE_QUAD_INCORRECT_WARNING"
;
case
NPP_DOUBLE_SIZE_WARNING
:
return
"NPP_DOUBLE_SIZE_WARNING"
;
case
NPP_WRONG_INTERSECTION_ROI_WARNING
:
return
"NPP_WRONG_INTERSECTION_ROI_WARNING"
;
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
/* These are 6.0 or higher */
case
NPP_LUT_PALETTE_BITSIZE_ERROR
:
return
"NPP_LUT_PALETTE_BITSIZE_ERROR"
;
case
NPP_ZC_MODE_NOT_SUPPORTED_ERROR
:
return
"NPP_ZC_MODE_NOT_SUPPORTED_ERROR"
;
case
NPP_QUALITY_INDEX_ERROR
:
return
"NPP_QUALITY_INDEX_ERROR"
;
case
NPP_CHANNEL_ORDER_ERROR
:
return
"NPP_CHANNEL_ORDER_ERROR"
;
case
NPP_ZERO_MASK_VALUE_ERROR
:
return
"NPP_ZERO_MASK_VALUE_ERROR"
;
case
NPP_NUMBER_OF_CHANNELS_ERROR
:
return
"NPP_NUMBER_OF_CHANNELS_ERROR"
;
case
NPP_COI_ERROR
:
return
"NPP_COI_ERROR"
;
case
NPP_DIVISOR_ERROR
:
return
"NPP_DIVISOR_ERROR"
;
case
NPP_CHANNEL_ERROR
:
return
"NPP_CHANNEL_ERROR"
;
case
NPP_STRIDE_ERROR
:
return
"NPP_STRIDE_ERROR"
;
case
NPP_ANCHOR_ERROR
:
return
"NPP_ANCHOR_ERROR"
;
case
NPP_MASK_SIZE_ERROR
:
return
"NPP_MASK_SIZE_ERROR"
;
case
NPP_MOMENT_00_ZERO_ERROR
:
return
"NPP_MOMENT_00_ZERO_ERROR"
;
case
NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR
:
return
"NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"
;
case
NPP_THRESHOLD_ERROR
:
return
"NPP_THRESHOLD_ERROR"
;
case
NPP_CONTEXT_MATCH_ERROR
:
return
"NPP_CONTEXT_MATCH_ERROR"
;
case
NPP_FFT_FLAG_ERROR
:
return
"NPP_FFT_FLAG_ERROR"
;
case
NPP_FFT_ORDER_ERROR
:
return
"NPP_FFT_ORDER_ERROR"
;
case
NPP_SCALE_RANGE_ERROR
:
return
"NPP_SCALE_RANGE_ERROR"
;
case
NPP_DATA_TYPE_ERROR
:
return
"NPP_DATA_TYPE_ERROR"
;
case
NPP_OUT_OFF_RANGE_ERROR
:
return
"NPP_OUT_OFF_RANGE_ERROR"
;
case
NPP_DIVIDE_BY_ZERO_ERROR
:
return
"NPP_DIVIDE_BY_ZERO_ERROR"
;
case
NPP_RANGE_ERROR
:
return
"NPP_RANGE_ERROR"
;
case
NPP_NO_MEMORY_ERROR
:
return
"NPP_NO_MEMORY_ERROR"
;
case
NPP_ERROR_RESERVED
:
return
"NPP_ERROR_RESERVED"
;
case
NPP_NO_OPERATION_WARNING
:
return
"NPP_NO_OPERATION_WARNING"
;
case
NPP_DIVIDE_BY_ZERO_WARNING
:
return
"NPP_DIVIDE_BY_ZERO_WARNING"
;
#endif
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
/* These are 7.0 or higher */
case
NPP_OVERFLOW_ERROR
:
return
"NPP_OVERFLOW_ERROR"
;
case
NPP_CORRUPTED_DATA_ERROR
:
return
"NPP_CORRUPTED_DATA_ERROR"
;
#endif
}
return
"<unknown>"
;
}
#endif
template
<
typename
T
>
void
check
(
T
result
,
char
const
*
const
func
,
const
char
*
const
file
,
int
const
line
)
{
if
(
result
)
{
fprintf
(
stderr
,
"CUDA error at %s:%d code=%d(%s)
\"
%s
\"
\n
"
,
file
,
line
,
static_cast
<
unsigned
int
>
(
result
),
_cudaGetErrorEnum
(
result
),
func
);
exit
(
EXIT_FAILURE
);
}
}
// This will output the proper CUDA error strings in the event
// that a CUDA host call returns an error
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
#endif // HELPER_CUDA_H
cuda/moe_comm_kernel.cu
View file @
7d695acb
...
...
@@ -7,7 +7,6 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <helper_cuda.h>
#include "cuda_stream_manager.h"
...
...
cuda/moe_compute_kernel.cu
View file @
7d695acb
...
...
@@ -7,7 +7,6 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <helper_cuda.h>
#include <c10/cuda/CUDAGuard.h>
#include "timer.hh"
...
...
cuda/moe_cuda_kernel.h
View file @
7d695acb
...
...
@@ -4,6 +4,7 @@
#include <vector>
#include <torch/extension.h>
#include <torch/torch.h>
#include "helper_cuda.h"
std
::
vector
<
torch
::
Tensor
>
moe_cuda_expert_count
(
torch
::
Tensor
gate
,
size_t
num_expert
);
...
...
cuda/moe_fused_kernel.cu
View file @
7d695acb
...
...
@@ -7,7 +7,6 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <helper_cuda.h>
#include <c10/cuda/CUDAGuard.h>
#include "cuda_stream_manager.h"
...
...
requirements.txt
View file @
7d695acb
torch
numpy
setup.py
View file @
7d695acb
...
...
@@ -3,11 +3,9 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import
os
CUDA_HELPER
=
os
.
environ
.
get
(
'CUDA_HELPER'
,
'/usr/local/cuda/samples/common/inc'
)
cxx_flags
=
[
'-I{}'
.
format
(
CUDA_HELPER
)
]
cxx_flags
=
[]
ext_libs
=
[]
if
os
.
environ
.
get
(
'USE_NCCL'
,
'0'
)
==
'1'
:
cxx_flags
.
append
(
'-DMOE_USE_NCCL'
)
ext_libs
.
append
(
'nccl'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment