Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
1635fd92
Unverified
Commit
1635fd92
authored
Sep 10, 2025
by
PanZezhong1725
Committed by
GitHub
Sep 10, 2025
Browse files
issue/440 feat: add softplus operator
parent
97f9ac7e
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
522 additions
and
4 deletions
+522
-4
include/infiniop.h
include/infiniop.h
+1
-0
include/infiniop/ops/softplus.h
include/infiniop/ops/softplus.h
+24
-0
scripts/python_test.py
scripts/python_test.py
+1
-0
src/infiniop/ops/softplus/cpu/softplus_cpu.cc
src/infiniop/ops/softplus/cpu/softplus_cpu.cc
+52
-0
src/infiniop/ops/softplus/cpu/softplus_cpu.h
src/infiniop/ops/softplus/cpu/softplus_cpu.h
+23
-0
src/infiniop/ops/softplus/cuda/kernel.cuh
src/infiniop/ops/softplus/cuda/kernel.cuh
+34
-0
src/infiniop/ops/softplus/nvidia/softplus_nvidia.cu
src/infiniop/ops/softplus/nvidia/softplus_nvidia.cu
+59
-0
src/infiniop/ops/softplus/nvidia/softplus_nvidia.cuh
src/infiniop/ops/softplus/nvidia/softplus_nvidia.cuh
+8
-0
src/infiniop/ops/softplus/operator.cc
src/infiniop/ops/softplus/operator.cc
+131
-0
test/infiniop/libinfiniop/op_register.py
test/infiniop/libinfiniop/op_register.py
+27
-4
test/infiniop/softplus.py
test/infiniop/softplus.py
+162
-0
No files found.
include/infiniop.h
View file @
1635fd92
...
...
@@ -16,6 +16,7 @@
#include "infiniop/ops/rms_norm.h"
#include "infiniop/ops/rope.h"
#include "infiniop/ops/rope_v2.h"
#include "infiniop/ops/softplus.h"
#include "infiniop/ops/sub.h"
#include "infiniop/ops/swiglu.h"
#include "infiniop/ops/topkrouter.h"
...
...
include/infiniop/ops/softplus.h
0 → 100644
View file @
1635fd92
#ifndef __INFINIOP_SOFTPLUS_API_H__
#define __INFINIOP_SOFTPLUS_API_H__
#include "../operator_descriptor.h"
typedef
struct
InfiniopDescriptor
*
infiniopSoftplusDescriptor_t
;
__C
__export
infiniStatus_t
infiniopCreateSoftplusDescriptor
(
infiniopHandle_t
handle
,
infiniopSoftplusDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
x
);
__C
__export
infiniStatus_t
infiniopGetSoftplusWorkspaceSize
(
infiniopSoftplusDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniStatus_t
infiniopSoftplus
(
infiniopSoftplusDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
void
*
stream
);
__C
__export
infiniStatus_t
infiniopDestroySoftplusDescriptor
(
infiniopSoftplusDescriptor_t
desc
);
#endif
scripts/python_test.py
View file @
1635fd92
...
...
@@ -24,6 +24,7 @@ def run_tests(args):
"rope.py"
,
"sub.py"
,
"swiglu.py"
,
"softplus.py"
,
]:
result
=
subprocess
.
run
(
f
"python
{
test
}
{
args
}
--debug"
,
text
=
True
,
encoding
=
"utf-8"
,
shell
=
True
...
...
src/infiniop/ops/softplus/cpu/softplus_cpu.cc
0 → 100644
View file @
1635fd92
#include "softplus_cpu.h"
namespace
op
::
softplus
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
cpu
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
x_desc
=
input_desc_vec
.
at
(
0
);
const
auto
&
y_shape
=
out_desc
->
shape
();
const
auto
&
x_shape
=
x_desc
->
shape
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
,
INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE
(
y_shape
,
x_shape
);
// create CPU elementwise descriptor
CREATE_ELEMENTWISE_CPU_DESCRIPTOR
(
handle
,
dtype
,
out_desc
,
input_desc_vec
);
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
output
,
std
::
vector
<
const
void
*>
inputs
,
void
*
stream
)
const
{
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
SoftplusOp
,
fp16_t
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
SoftplusOp
,
float
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
SoftplusOp
,
double
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
SoftplusOp
,
bf16_t
>
(
_info
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::softplus::cpu
src/infiniop/ops/softplus/cpu/softplus_cpu.h
0 → 100644
View file @
1635fd92
#ifndef __SOFTPLUS_CPU_H__
#define __SOFTPLUS_CPU_H__
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR
(
softplus
,
cpu
)
namespace
op
::
softplus
::
cpu
{
typedef
struct
SoftplusOp
{
public:
static
constexpr
size_t
num_inputs
=
1
;
template
<
typename
T
>
T
operator
()(
const
T
&
x
)
const
{
if
(
x
>
T
(
20
))
{
return
x
;
}
else
{
return
std
::
log
(
T
(
1
)
+
std
::
exp
(
x
));
}
}
}
SoftplusOp
;
}
// namespace op::softplus::cpu
#endif // __SOFTPLUS_CPU_H__
src/infiniop/ops/softplus/cuda/kernel.cuh
0 → 100644
View file @
1635fd92
#ifndef __SOFTPLUS_CUDA_H__
#define __SOFTPLUS_CUDA_H__
namespace
op
::
softplus
::
cuda
{
typedef
struct
SoftplusOp
{
public:
static
constexpr
size_t
num_inputs
=
1
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
// promote to float for stability, then cast back
float
xf
=
__half2float
(
x
);
float
out
=
(
xf
>
20.0
f
)
?
xf
:
log1pf
(
expf
(
xf
));
return
__float2half
(
out
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat16
>
)
{
float
xf
=
__bfloat162float
(
x
);
float
out
=
(
xf
>
20.0
f
)
?
xf
:
log1pf
(
expf
(
xf
));
return
__float2bfloat16
(
out
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
// process as two lanes
float2
xf
=
__half22float2
(
x
);
xf
.
x
=
(
xf
.
x
>
20.0
f
)
?
xf
.
x
:
log1pf
(
expf
(
xf
.
x
));
xf
.
y
=
(
xf
.
y
>
20.0
f
)
?
xf
.
y
:
log1pf
(
expf
(
xf
.
y
));
return
__floats2half2_rn
(
xf
.
x
,
xf
.
y
);
}
else
{
// default: float, double, etc.
return
(
x
>
T
(
20
))
?
x
:
log1p
(
exp
(
x
));
}
}
}
SoftplusOp
;
}
// namespace op::softplus::cuda
#endif // __SOFTPLUS_CUDA_H__
src/infiniop/ops/softplus/nvidia/softplus_nvidia.cu
0 → 100644
View file @
1635fd92
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh"
#include "softplus_nvidia.cuh"
namespace
op
::
softplus
::
nvidia
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
nvidia
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
x_desc
=
input_desc_vec
.
at
(
0
);
const
auto
&
y_shape
=
out_desc
->
shape
();
const
auto
&
x_shape
=
x_desc
->
shape
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
,
INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE
(
y_shape
,
x_shape
);
// create CUDA elementwise descriptor
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR
(
handle
,
dtype
,
out_desc
,
input_desc_vec
)
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
output
,
std
::
vector
<
const
void
*>
inputs
,
void
*
stream
)
const
{
if
(
workspace_size
<
_workspace_size
)
{
return
INFINI_STATUS_INSUFFICIENT_WORKSPACE
;
}
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
256
,
cuda
::
SoftplusOp
,
half
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
256
,
cuda
::
SoftplusOp
,
cuda_bfloat16
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
256
,
cuda
::
SoftplusOp
,
float
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
256
,
cuda
::
SoftplusOp
,
double
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::softplus::nvidia
src/infiniop/ops/softplus/nvidia/softplus_nvidia.cuh
0 → 100644
View file @
1635fd92
#ifndef __SOFTPLUS_CUDA_API_H__
#define __SOFTPLUS_CUDA_API_H__
#include "../../../elementwise/nvidia/elementwise_nvidia_api.cuh"
ELEMENTWISE_DESCRIPTOR
(
softplus
,
nvidia
)
#endif // __SOFTPLUS_CUDA_API_H__
src/infiniop/ops/softplus/operator.cc
0 → 100644
View file @
1635fd92
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/softplus.h"
#ifdef ENABLE_CPU_API
#include "cpu/softplus_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/softplus_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/softplus_metax.h"
#endif
__C
infiniStatus_t
infiniopCreateSoftplusDescriptor
(
infiniopHandle_t
handle
,
infiniopSoftplusDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
)
{
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::softplus::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::softplus::NAMESPACE::Descriptor **>(desc_ptr), \
y_desc, \
{x_desc})
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef CREATE
}
__C
infiniStatus_t
infiniopGetSoftplusWorkspaceSize
(
infiniopSoftplusDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::softplus::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef GET
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopSoftplus
(
infiniopSoftplusDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::softplus::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, y, {x}, stream)
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroySoftplusDescriptor
(
infiniopSoftplusDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<const op::softplus::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
DELETE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef DELETE
}
test/infiniop/libinfiniop/op_register.py
View file @
1635fd92
...
...
@@ -490,6 +490,7 @@ def swiglu_(lib):
infiniopOperatorDescriptor_t
,
]
@
OpRegister
.
operator
def
conv_
(
lib
):
lib
.
infiniopCreateConvDescriptor
.
restype
=
c_int32
...
...
@@ -526,6 +527,7 @@ def conv_(lib):
infiniopOperatorDescriptor_t
,
]
@
OpRegister
.
operator
def
topkrouter_
(
lib
):
lib
.
infiniopCreateTopkrouterDescriptor
.
restype
=
c_int32
...
...
@@ -533,7 +535,7 @@ def topkrouter_(lib):
infiniopHandle_t
,
POINTER
(
infiniopOperatorDescriptor_t
),
infiniopTensorDescriptor_t
,
infiniopTensorDescriptor_t
infiniopTensorDescriptor_t
,
]
lib
.
infiniopGetTopkrouterWorkspaceSize
.
restype
=
c_int32
...
...
@@ -562,7 +564,6 @@ def topkrouter_(lib):
]
@
OpRegister
.
operator
def
dequantize_
(
lib
):
lib
.
infiniopCreateDequantizeDescriptor
.
restype
=
c_int32
...
...
@@ -596,3 +597,25 @@ def dequantize_(lib):
lib
.
infiniopDestroyDequantizeDescriptor
.
argtypes
=
[
infiniopOperatorDescriptor_t
,
]
@
OpRegister
.
operator
def
softplus_
(
lib
):
lib
.
infiniopCreateSoftplusDescriptor
.
restype
=
c_int32
lib
.
infiniopCreateSoftplusDescriptor
.
argtypes
=
[
infiniopHandle_t
,
POINTER
(
infiniopOperatorDescriptor_t
),
infiniopTensorDescriptor_t
,
infiniopTensorDescriptor_t
,
]
lib
.
infiniopSoftplus
.
restype
=
c_int32
lib
.
infiniopSoftplus
.
argtypes
=
[
infiniopOperatorDescriptor_t
,
c_void_p
,
c_size_t
,
c_void_p
,
c_void_p
,
c_void_p
,
]
lib
.
infiniopDestroySoftplusDescriptor
.
restype
=
c_int32
lib
.
infiniopDestroySoftplusDescriptor
.
argtypes
=
[
infiniopOperatorDescriptor_t
]
test/infiniop/softplus.py
0 → 100644
View file @
1635fd92
import
ctypes
from
ctypes
import
c_uint64
from
enum
import
Enum
,
auto
import
torch
from
libinfiniop
import
(
LIBINFINIOP
,
InfiniDeviceNames
,
InfiniDtype
,
InfiniDtypeNames
,
TestTensor
,
TestWorkspace
,
check_error
,
debug
,
get_args
,
get_test_devices
,
get_tolerance
,
infiniopOperatorDescriptor_t
,
profile_operation
,
test_operator
,
)
# ==============================================================================
# Configuration (Internal Use Only)
# ==============================================================================
# These are not meant to be imported from other modules
_TEST_CASES_
=
[
# tensor_shape, inplace
# TODO: Uncomment the following line.
# ((),),
((
1
,
3
),),
((
3
,
3
),),
((
32
,
20
,
512
),),
((
33
,
333
,
333
),),
((
32
,
256
,
112
,
112
),),
((
3
,
3
,
13
,
9
,
17
),),
]
class
Inplace
(
Enum
):
OUT_OF_PLACE
=
auto
()
INPLACE_X
=
auto
()
# Inplace options applied for each test case in _TEST_CASES_
_INPLACE
=
[
Inplace
.
OUT_OF_PLACE
,
Inplace
.
INPLACE_X
,
]
# Form the test cases by appending each element of _INPLACE to each tuple in _TEST_CASES_
_TEST_CASES
=
[
test_case
+
(
inplace_item
,)
for
test_case
in
_TEST_CASES_
for
inplace_item
in
_INPLACE
]
# Data types used for testing
_TENSOR_DTYPES
=
[
InfiniDtype
.
F16
,
InfiniDtype
.
F32
,
InfiniDtype
.
BF16
]
# Tolerance map for different data types
_TOLERANCE_MAP
=
{
InfiniDtype
.
F16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
InfiniDtype
.
F32
:
{
"atol"
:
1e-7
,
"rtol"
:
1e-7
},
InfiniDtype
.
BF16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
}
DEBUG
=
False
PROFILE
=
False
NUM_PRERUN
=
10
NUM_ITERATIONS
=
1000
def
softplus
(
x
):
return
torch
.
nn
.
functional
.
softplus
(
x
).
to
(
x
.
dtype
)
def
test
(
handle
,
device
,
shape
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
dtype
=
torch
.
float16
,
sync
=
None
):
x_torch_tensor
=
torch
.
rand
(
shape
)
*
2
-
1
x
=
TestTensor
(
shape
,
x_torch_tensor
.
stride
(),
dtype
,
device
,
mode
=
"manual"
,
set_tensor
=
x_torch_tensor
,
)
if
inplace
==
Inplace
.
INPLACE_X
:
y
=
x
else
:
y
=
TestTensor
(
shape
,
None
,
dtype
,
device
)
if
y
.
is_broadcast
():
return
print
(
f
"Testing Softplus on
{
InfiniDeviceNames
[
device
]
}
with shape:
{
shape
}
dtype:
{
InfiniDtypeNames
[
dtype
]
}
inplace:
{
inplace
}
"
)
ans
=
softplus
(
x
.
torch_tensor
())
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopOperatorDescriptor_t
()
check_error
(
LIBINFINIOP
.
infiniopCreateSoftplusDescriptor
(
handle
,
ctypes
.
byref
(
descriptor
),
y
.
descriptor
,
x
.
descriptor
)
)
# Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
for
tensor
in
[
x
,
y
]:
tensor
.
destroy_desc
()
workspace_size
=
c_uint64
(
0
)
check_error
(
LIBINFINIOP
.
infiniopGetSoftplusWorkspaceSize
(
descriptor
,
ctypes
.
byref
(
workspace_size
)
)
)
workspace
=
TestWorkspace
(
workspace_size
.
value
,
y
.
device
)
def
lib_softplus
():
LIBINFINIOP
.
infiniopSoftplus
(
descriptor
,
workspace
.
data
(),
workspace
.
size
(),
y
.
data
(),
x
.
data
(),
None
)
lib_softplus
()
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
debug
(
y
.
actual_tensor
(),
ans
,
atol
=
atol
,
rtol
=
rtol
)
assert
torch
.
allclose
(
y
.
actual_tensor
(),
ans
,
atol
=
atol
,
rtol
=
rtol
)
# Profiling workflow
if
PROFILE
:
# fmt: off
profile_operation
(
"PyTorch"
,
lambda
:
softplus
(
x
.
torch_tensor
()),
device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
profile_operation
(
" lib"
,
lambda
:
lib_softplus
(),
device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
# fmt: on
check_error
(
LIBINFINIOP
.
infiniopDestroySoftplusDescriptor
(
descriptor
))
if
__name__
==
"__main__"
:
args
=
get_args
()
# Configure testing options
DEBUG
=
args
.
debug
PROFILE
=
args
.
profile
NUM_PRERUN
=
args
.
num_prerun
NUM_ITERATIONS
=
args
.
num_iterations
for
device
in
get_test_devices
(
args
):
test_operator
(
device
,
test
,
_TEST_CASES
,
_TENSOR_DTYPES
)
print
(
"
\033
[92mTest passed!
\033
[0m"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment