Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
45a3794b
Commit
45a3794b
authored
Mar 11, 2026
by
wooway777
Browse files
issue/1031 T1-1-17
parent
cb7f0b7d
Changes
108
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
897 additions
and
4 deletions
+897
-4
scripts/python_test.py
scripts/python_test.py
+5
-4
src/infinicore/ops/avg_pool1d/avg_pool1d.cc
src/infinicore/ops/avg_pool1d/avg_pool1d.cc
+68
-0
src/infinicore/ops/avg_pool1d/avg_pool1d_infiniop.cc
src/infinicore/ops/avg_pool1d/avg_pool1d_infiniop.cc
+69
-0
src/infinicore/ops/cross_entropy/cross_entropy.cc
src/infinicore/ops/cross_entropy/cross_entropy.cc
+45
-0
src/infinicore/ops/cross_entropy/cross_entropy_infiniop.cc
src/infinicore/ops/cross_entropy/cross_entropy_infiniop.cc
+64
-0
src/infinicore/ops/equal/equal.cc
src/infinicore/ops/equal/equal.cc
+31
-0
src/infinicore/ops/equal/equal_infiniop.cc
src/infinicore/ops/equal/equal_infiniop.cc
+57
-0
src/infinicore/ops/hardswish/hardswish.cc
src/infinicore/ops/hardswish/hardswish.cc
+38
-0
src/infinicore/ops/hardswish/hardswish_infiniop.cc
src/infinicore/ops/hardswish/hardswish_infiniop.cc
+61
-0
src/infinicore/ops/hardtanh/hardtanh.cc
src/infinicore/ops/hardtanh/hardtanh.cc
+38
-0
src/infinicore/ops/hardtanh/hardtanh_infiniop.cc
src/infinicore/ops/hardtanh/hardtanh_infiniop.cc
+63
-0
src/infinicore/pybind11/ops.hpp
src/infinicore/pybind11/ops.hpp
+10
-0
src/infinicore/pybind11/ops/avg_pool1d.hpp
src/infinicore/pybind11/ops/avg_pool1d.hpp
+37
-0
src/infinicore/pybind11/ops/cross_entropy.hpp
src/infinicore/pybind11/ops/cross_entropy.hpp
+26
-0
src/infinicore/pybind11/ops/equal.hpp
src/infinicore/pybind11/ops/equal.hpp
+26
-0
src/infinicore/pybind11/ops/hardswish.hpp
src/infinicore/pybind11/ops/hardswish.hpp
+24
-0
src/infinicore/pybind11/ops/hardtanh.hpp
src/infinicore/pybind11/ops/hardtanh.hpp
+28
-0
src/infiniop/ops/avg_pool1d/avg_pool1d.h
src/infiniop/ops/avg_pool1d/avg_pool1d.h
+103
-0
src/infiniop/ops/avg_pool1d/cpu/avg_pool1d_cpu.cc
src/infiniop/ops/avg_pool1d/cpu/avg_pool1d_cpu.cc
+96
-0
src/infiniop/ops/avg_pool1d/cpu/avg_pool1d_cpu.h
src/infiniop/ops/avg_pool1d/cpu/avg_pool1d_cpu.h
+8
-0
No files found.
scripts/python_test.py
View file @
45a3794b
...
...
@@ -17,12 +17,12 @@ def run_tests(args):
"causal_softmax.py"
,
"clip.py"
,
"conv.py"
,
#"dequantize_awq.py",
#
"dequantize_awq.py",
"gelu.py"
,
"gemm.py"
,
#"layer_norm.py",
#
"layer_norm.py",
"logsoftmax.py"
,
#"lp_norm.py",
#
"lp_norm.py",
"mul.py"
,
"ones.py"
,
"random_sample.py"
,
...
...
@@ -31,7 +31,7 @@ def run_tests(args):
"rms_norm.py"
,
"rope.py"
,
"sigmoid.py"
,
#"softmax.py",
#
"softmax.py",
"softplus.py"
,
"sub.py"
,
"swiglu.py"
,
...
...
@@ -42,6 +42,7 @@ def run_tests(args):
# "paged_attention.py",
# "paged_caching.py",
# "paged_attention_prefill.py"
"cross_entropy.py"
,
]:
result
=
subprocess
.
run
(
f
"python
{
test
}
{
args
}
--debug"
,
text
=
True
,
encoding
=
"utf-8"
,
shell
=
True
...
...
src/infinicore/ops/avg_pool1d/avg_pool1d.cc
0 → 100644
View file @
45a3794b
#include "infinicore/ops/avg_pool1d.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace
infinicore
::
op
{
common
::
OpDispatcher
<
AvgPool1d
::
schema
>
&
AvgPool1d
::
dispatcher
()
{
static
common
::
OpDispatcher
<
AvgPool1d
::
schema
>
dispatcher_
;
return
dispatcher_
;
}
void
AvgPool1d
::
execute
(
Tensor
output
,
Tensor
input
,
size_t
kernel_size
,
size_t
stride
,
size_t
padding
)
{
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
output
,
input
);
if
(
stride
==
0
)
{
stride
=
kernel_size
;
}
infinicore
::
context
::
setDevice
(
output
->
device
());
auto
device_type
=
output
->
device
().
getType
();
auto
func
=
dispatcher
().
lookup
(
device_type
);
if
(
func
==
nullptr
)
{
throw
std
::
runtime_error
(
"No AvgPool1d implementation for device type: "
+
std
::
to_string
(
static_cast
<
int
>
(
device_type
)));
}
func
(
output
,
input
,
kernel_size
,
stride
,
padding
);
}
Tensor
avg_pool1d
(
Tensor
input
,
size_t
kernel_size
,
size_t
stride
,
size_t
padding
)
{
if
(
stride
==
0
)
{
stride
=
kernel_size
;
}
const
auto
&
shape
=
input
->
shape
();
if
(
shape
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"AvgPool1d expects tensors with shape [N, C, L]"
);
}
const
size_t
n
=
shape
[
0
];
const
size_t
c
=
shape
[
1
];
const
size_t
l_in
=
shape
[
2
];
if
(
l_in
+
2
*
padding
<
kernel_size
)
{
throw
std
::
runtime_error
(
"AvgPool1d kernel_size is larger than padded length"
);
}
const
size_t
out_width
=
(
l_in
+
2
*
padding
-
kernel_size
)
/
stride
+
1
;
Shape
out_shape
=
{
n
,
c
,
out_width
};
auto
output
=
Tensor
::
empty
(
out_shape
,
input
->
dtype
(),
input
->
device
());
avg_pool1d_
(
output
,
input
,
kernel_size
,
stride
,
padding
);
return
output
;
}
void
avg_pool1d_
(
Tensor
output
,
Tensor
input
,
size_t
kernel_size
,
size_t
stride
,
size_t
padding
)
{
AvgPool1d
::
execute
(
output
,
input
,
kernel_size
,
stride
,
padding
);
}
}
// namespace infinicore::op
src/infinicore/ops/avg_pool1d/avg_pool1d_infiniop.cc
0 → 100644
View file @
45a3794b
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/avg_pool1d.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace
infinicore
::
op
::
avg_pool1d_impl
::
infiniop
{
thread_local
common
::
OpCache
<
size_t
,
infiniopAvgPool1dDescriptor_t
>
caches
(
100
,
[](
infiniopAvgPool1dDescriptor_t
&
desc
)
{
if
(
desc
!=
nullptr
)
{
INFINICORE_CHECK_ERROR
(
infiniopDestroyAvgPool1dDescriptor
(
desc
));
desc
=
nullptr
;
}
});
void
calculate
(
Tensor
output
,
Tensor
input
,
size_t
kernel_size
,
size_t
stride
,
size_t
padding
)
{
if
(
stride
==
0
)
{
stride
=
kernel_size
;
}
size_t
seed
=
hash_combine
(
output
,
input
,
kernel_size
,
stride
,
padding
);
auto
device
=
context
::
getDevice
();
auto
&
cache
=
caches
.
getCache
(
device
);
auto
desc_opt
=
cache
.
get
(
seed
);
infiniopAvgPool1dDescriptor_t
desc
=
nullptr
;
if
(
!
desc_opt
)
{
INFINICORE_CHECK_ERROR
(
infiniopCreateAvgPool1dDescriptor
(
context
::
getInfiniopHandle
(
device
),
&
desc
,
output
->
desc
(),
input
->
desc
(),
kernel_size
,
stride
,
padding
));
cache
.
put
(
seed
,
desc
);
}
else
{
desc
=
*
desc_opt
;
}
size_t
workspace_size
=
0
;
INFINICORE_CHECK_ERROR
(
infiniopGetAvgPool1dWorkspaceSize
(
desc
,
&
workspace_size
));
std
::
shared_ptr
<
Memory
>
workspace
=
context
::
allocateMemory
(
workspace_size
);
INFINICORE_CHECK_ERROR
(
infiniopAvgPool1d
(
desc
,
workspace
->
data
(),
workspace_size
,
output
->
data
(),
input
->
data
(),
context
::
getStream
()));
}
static
bool
registered
=
[]()
{
AvgPool1d
::
dispatcher
().
registerAll
(
&
calculate
,
false
);
return
true
;
}();
}
// namespace infinicore::op::avg_pool1d_impl::infiniop
src/infinicore/ops/cross_entropy/cross_entropy.cc
0 → 100644
View file @
45a3794b
#include "infinicore/ops/cross_entropy.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace
infinicore
::
op
{
common
::
OpDispatcher
<
CrossEntropy
::
schema
>
&
CrossEntropy
::
dispatcher
()
{
static
common
::
OpDispatcher
<
CrossEntropy
::
schema
>
dispatcher_
;
return
dispatcher_
;
};
void
CrossEntropy
::
execute
(
Tensor
output
,
Tensor
input
,
Tensor
target
)
{
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
output
,
input
);
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
input
,
target
);
infinicore
::
context
::
setDevice
(
output
->
device
());
auto
device_type
=
output
->
device
().
getType
();
auto
func
=
dispatcher
().
lookup
(
device_type
);
if
(
func
==
nullptr
)
{
throw
std
::
runtime_error
(
"No CrossEntropy implementation found for device type: "
+
std
::
to_string
(
static_cast
<
int
>
(
device_type
)));
}
func
(
output
,
input
,
target
);
}
Tensor
cross_entropy
(
Tensor
input
,
Tensor
target
)
{
Shape
shape
=
target
->
shape
();
auto
output
=
Tensor
::
empty
(
shape
,
input
->
dtype
(),
input
->
device
());
cross_entropy_
(
output
,
input
,
target
);
return
output
;
}
void
cross_entropy_
(
Tensor
output
,
Tensor
input
,
Tensor
target
)
{
CrossEntropy
::
execute
(
output
,
input
,
target
);
}
}
// namespace infinicore::op
src/infinicore/ops/cross_entropy/cross_entropy_infiniop.cc
0 → 100644
View file @
45a3794b
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/cross_entropy.hpp"
#include <infiniop.h>
namespace
infinicore
::
op
::
cross_entropy_impl
::
infiniop
{
thread_local
common
::
OpCache
<
size_t
,
infiniopCrossEntropyDescriptor_t
>
caches
(
100
,
[](
infiniopCrossEntropyDescriptor_t
&
desc
)
{
if
(
desc
!=
nullptr
)
{
INFINICORE_CHECK_ERROR
(
infiniopDestroyCrossEntropyDescriptor
(
desc
));
desc
=
nullptr
;
}
});
void
calculate
(
Tensor
output
,
Tensor
input
,
Tensor
target
)
{
size_t
seed
=
hash_combine
(
output
,
input
,
target
);
auto
device
=
context
::
getDevice
();
auto
&
cache
=
caches
.
getCache
(
device
);
auto
desc_opt
=
cache
.
get
(
seed
);
infiniopCrossEntropyDescriptor_t
desc
=
nullptr
;
if
(
!
desc_opt
)
{
INFINICORE_CHECK_ERROR
(
infiniopCreateCrossEntropyDescriptor
(
context
::
getInfiniopHandle
(
device
),
&
desc
,
output
->
desc
(),
input
->
desc
(),
target
->
desc
()));
cache
.
put
(
seed
,
desc
);
}
else
{
desc
=
*
desc_opt
;
}
size_t
workspace_size
=
0
;
INFINICORE_CHECK_ERROR
(
infiniopGetCrossEntropyWorkspaceSize
(
desc
,
&
workspace_size
));
std
::
shared_ptr
<
Memory
>
workspace
=
context
::
allocateMemory
(
workspace_size
);
INFINICORE_CHECK_ERROR
(
infiniopCrossEntropy
(
desc
,
workspace
->
data
(),
workspace_size
,
output
->
data
(),
input
->
data
(),
target
->
data
(),
context
::
getStream
()));
}
static
bool
registered
=
[]()
{
CrossEntropy
::
dispatcher
().
registerAll
(
&
calculate
,
false
);
return
true
;
}();
}
// namespace infinicore::op::cross_entropy_impl::infiniop
src/infinicore/ops/equal/equal.cc
0 → 100644
View file @
45a3794b
#include "infinicore/ops/equal.hpp"
#include "../../utils.hpp"
namespace
infinicore
::
op
{
common
::
OpDispatcher
<
Equal
::
schema
>
&
Equal
::
dispatcher
()
{
static
common
::
OpDispatcher
<
Equal
::
schema
>
dispatcher_
;
return
dispatcher_
;
};
void
Equal
::
execute
(
Tensor
out
,
Tensor
a
,
Tensor
b
)
{
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
out
,
a
,
b
);
infinicore
::
context
::
setDevice
(
out
->
device
());
dispatcher
().
lookup
(
out
->
device
().
getType
())(
out
,
a
,
b
);
}
Tensor
equal
(
Tensor
a
,
Tensor
b
)
{
auto
out
=
Tensor
::
empty
(
a
->
shape
(),
DataType
::
BOOL
,
a
->
device
());
equal_
(
out
,
a
,
b
);
return
out
;
}
void
equal_
(
Tensor
out
,
Tensor
a
,
Tensor
b
)
{
if
(
out
->
dtype
()
!=
DataType
::
BOOL
)
{
throw
std
::
runtime_error
(
"Equal expects bool output tensor."
);
}
Equal
::
execute
(
out
,
a
,
b
);
}
}
// namespace infinicore::op
src/infinicore/ops/equal/equal_infiniop.cc
0 → 100644
View file @
45a3794b
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/equal.hpp"
#include <infiniop.h>
namespace
infinicore
::
op
::
equal_impl
::
infiniop
{
thread_local
common
::
OpCache
<
size_t
,
infiniopEqualDescriptor_t
>
caches
(
100
,
[](
infiniopEqualDescriptor_t
&
desc
)
{
if
(
desc
!=
nullptr
)
{
INFINICORE_CHECK_ERROR
(
infiniopDestroyEqualDescriptor
(
desc
));
desc
=
nullptr
;
}
});
void
calculate
(
Tensor
out
,
Tensor
a
,
Tensor
b
)
{
size_t
seed
=
hash_combine
(
out
,
a
,
b
);
auto
device
=
context
::
getDevice
();
auto
&
cache
=
caches
.
getCache
(
device
);
infiniopEqualDescriptor_t
desc
=
nullptr
;
if
(
auto
cached
=
cache
.
get
(
seed
))
{
desc
=
*
cached
;
}
else
{
INFINICORE_CHECK_ERROR
(
infiniopCreateEqualDescriptor
(
context
::
getInfiniopHandle
(
device
),
&
desc
,
out
->
desc
(),
a
->
desc
(),
b
->
desc
()));
cache
.
put
(
seed
,
desc
);
}
size_t
workspace_size
=
0
;
INFINICORE_CHECK_ERROR
(
infiniopGetEqualWorkspaceSize
(
desc
,
&
workspace_size
));
std
::
shared_ptr
<
Memory
>
workspace
;
void
*
workspace_ptr
=
nullptr
;
if
(
workspace_size
!=
0
)
{
workspace
=
context
::
allocateMemory
(
workspace_size
);
workspace_ptr
=
workspace
->
data
();
}
INFINICORE_CHECK_ERROR
(
infiniopEqual
(
desc
,
workspace_ptr
,
workspace_size
,
out
->
data
(),
a
->
data
(),
b
->
data
(),
context
::
getStream
()));
}
static
bool
registered
=
[]()
{
Equal
::
dispatcher
().
registerAll
(
&
calculate
,
false
);
return
true
;
}();
}
// namespace infinicore::op::equal_impl::infiniop
src/infinicore/ops/hardswish/hardswish.cc
0 → 100644
View file @
45a3794b
#include "infinicore/ops/hardswish.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace
infinicore
::
op
{
common
::
OpDispatcher
<
Hardswish
::
schema
>
&
Hardswish
::
dispatcher
()
{
static
common
::
OpDispatcher
<
Hardswish
::
schema
>
dispatcher_
;
return
dispatcher_
;
}
void
Hardswish
::
execute
(
Tensor
output
,
Tensor
input
)
{
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
output
,
input
);
infinicore
::
context
::
setDevice
(
output
->
device
());
auto
device_type
=
output
->
device
().
getType
();
auto
func
=
dispatcher
().
lookup
(
device_type
);
if
(
func
==
nullptr
)
{
throw
std
::
runtime_error
(
"No Hardswish implementation found for device type: "
+
std
::
to_string
(
static_cast
<
int
>
(
device_type
)));
}
func
(
output
,
input
);
}
Tensor
hardswish
(
Tensor
input
)
{
auto
output
=
Tensor
::
empty
(
input
->
shape
(),
input
->
dtype
(),
input
->
device
());
hardswish_
(
output
,
input
);
return
output
;
}
void
hardswish_
(
Tensor
output
,
Tensor
input
)
{
Hardswish
::
execute
(
output
,
input
);
}
}
// namespace infinicore::op
src/infinicore/ops/hardswish/hardswish_infiniop.cc
0 → 100644
View file @
45a3794b
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/hardswish.hpp"
#include <infiniop.h>
namespace
infinicore
::
op
::
hardswish_impl
::
infiniop
{
thread_local
common
::
OpCache
<
size_t
,
infiniopHardSwishDescriptor_t
>
caches
(
100
,
[](
infiniopHardSwishDescriptor_t
&
desc
)
{
if
(
desc
!=
nullptr
)
{
INFINICORE_CHECK_ERROR
(
infiniopDestroyHardSwishDescriptor
(
desc
));
desc
=
nullptr
;
}
});
void
calculate
(
Tensor
output
,
Tensor
input
)
{
size_t
seed
=
hash_combine
(
output
,
input
);
auto
device
=
context
::
getDevice
();
auto
&
cache
=
caches
.
getCache
(
device
);
auto
desc_opt
=
cache
.
get
(
seed
);
infiniopHardSwishDescriptor_t
desc
=
nullptr
;
if
(
!
desc_opt
)
{
INFINICORE_CHECK_ERROR
(
infiniopCreateHardSwishDescriptor
(
context
::
getInfiniopHandle
(
device
),
&
desc
,
output
->
desc
(),
input
->
desc
()));
cache
.
put
(
seed
,
desc
);
}
else
{
desc
=
*
desc_opt
;
}
size_t
workspace_size
=
0
;
INFINICORE_CHECK_ERROR
(
infiniopGetHardSwishWorkspaceSize
(
desc
,
&
workspace_size
));
std
::
shared_ptr
<
Memory
>
workspace
;
void
*
workspace_ptr
=
nullptr
;
if
(
workspace_size
!=
0
)
{
workspace
=
context
::
allocateMemory
(
workspace_size
);
workspace_ptr
=
workspace
->
data
();
}
INFINICORE_CHECK_ERROR
(
infiniopHardSwish
(
desc
,
workspace_ptr
,
workspace_size
,
output
->
data
(),
input
->
data
(),
context
::
getStream
()));
}
static
bool
registered
=
[]()
{
Hardswish
::
dispatcher
().
registerAll
(
&
calculate
,
false
);
return
true
;
}();
}
// namespace infinicore::op::hardswish_impl::infiniop
src/infinicore/ops/hardtanh/hardtanh.cc
0 → 100644
View file @
45a3794b
#include "infinicore/ops/hardtanh.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace
infinicore
::
op
{
common
::
OpDispatcher
<
HardTanh
::
schema
>
&
HardTanh
::
dispatcher
()
{
static
common
::
OpDispatcher
<
HardTanh
::
schema
>
dispatcher_
;
return
dispatcher_
;
}
void
HardTanh
::
execute
(
Tensor
output
,
Tensor
input
,
float
min_val
,
float
max_val
)
{
INFINICORE_ASSERT_TENSORS_SAME_DEVICE
(
output
,
input
);
infinicore
::
context
::
setDevice
(
output
->
device
());
auto
device_type
=
output
->
device
().
getType
();
auto
func
=
dispatcher
().
lookup
(
device_type
);
if
(
func
==
nullptr
)
{
throw
std
::
runtime_error
(
"No HardTanh implementation found for device type: "
+
std
::
to_string
(
static_cast
<
int
>
(
device_type
)));
}
func
(
output
,
input
,
min_val
,
max_val
);
}
Tensor
hardtanh
(
Tensor
input
,
float
min_val
,
float
max_val
)
{
auto
output
=
Tensor
::
empty
(
input
->
shape
(),
input
->
dtype
(),
input
->
device
());
hardtanh_
(
output
,
input
,
min_val
,
max_val
);
return
output
;
}
void
hardtanh_
(
Tensor
output
,
Tensor
input
,
float
min_val
,
float
max_val
)
{
HardTanh
::
execute
(
output
,
input
,
min_val
,
max_val
);
}
}
// namespace infinicore::op
src/infinicore/ops/hardtanh/hardtanh_infiniop.cc
0 → 100644
View file @
45a3794b
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/hardtanh.hpp"
#include <infiniop.h>
namespace
infinicore
::
op
::
hardtanh_impl
::
infiniop
{
thread_local
common
::
OpCache
<
size_t
,
infiniopHardTanhDescriptor_t
>
caches
(
100
,
[](
infiniopHardTanhDescriptor_t
&
desc
)
{
if
(
desc
!=
nullptr
)
{
INFINICORE_CHECK_ERROR
(
infiniopDestroyHardTanhDescriptor
(
desc
));
desc
=
nullptr
;
}
});
void
calculate
(
Tensor
output
,
Tensor
input
,
float
min_val
,
float
max_val
)
{
size_t
seed
=
hash_combine
(
output
,
input
,
min_val
,
max_val
);
auto
device
=
context
::
getDevice
();
auto
&
cache
=
caches
.
getCache
(
device
);
auto
desc_opt
=
cache
.
get
(
seed
);
infiniopHardTanhDescriptor_t
desc
=
nullptr
;
if
(
!
desc_opt
)
{
INFINICORE_CHECK_ERROR
(
infiniopCreateHardTanhDescriptor
(
context
::
getInfiniopHandle
(
device
),
&
desc
,
output
->
desc
(),
input
->
desc
(),
min_val
,
max_val
));
cache
.
put
(
seed
,
desc
);
}
else
{
desc
=
*
desc_opt
;
}
size_t
workspace_size
=
0
;
INFINICORE_CHECK_ERROR
(
infiniopGetHardTanhWorkspaceSize
(
desc
,
&
workspace_size
));
std
::
shared_ptr
<
Memory
>
workspace
;
void
*
workspace_ptr
=
nullptr
;
if
(
workspace_size
!=
0
)
{
workspace
=
context
::
allocateMemory
(
workspace_size
);
workspace_ptr
=
workspace
->
data
();
}
INFINICORE_CHECK_ERROR
(
infiniopHardTanh
(
desc
,
workspace_ptr
,
workspace_size
,
output
->
data
(),
input
->
data
(),
context
::
getStream
()));
}
static
bool
registered
=
[]()
{
HardTanh
::
dispatcher
().
registerAll
(
&
calculate
,
false
);
return
true
;
}();
}
// namespace infinicore::op::hardtanh_impl::infiniop
src/infinicore/pybind11/ops.hpp
View file @
45a3794b
...
...
@@ -6,9 +6,14 @@
#include "ops/add_rms_norm.hpp"
#include "ops/all.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/equal.hpp"
#include "ops/flash_attention.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/linear.hpp"
#include "ops/linear_w8a8i8.hpp"
...
...
@@ -45,12 +50,16 @@ inline void bind(py::module &m) {
bind_matmul
(
m
);
bind_mul
(
m
);
bind_mha_varlen
(
m
);
bind_hardswish
(
m
);
bind_hardtanh
(
m
);
bind_paged_attention
(
m
);
bind_paged_attention_prefill
(
m
);
bind_paged_caching
(
m
);
bind_random_sample
(
m
);
bind_cross_entropy
(
m
);
bind_rearrange
(
m
);
bind_rms_norm
(
m
);
bind_avg_pool1d
(
m
);
bind_silu
(
m
);
bind_swiglu
(
m
);
bind_rope
(
m
);
...
...
@@ -62,6 +71,7 @@ inline void bind(py::module &m) {
bind_var
(
m
);
bind_topk
(
m
);
bind_all
(
m
);
bind_equal
(
m
);
}
}
// namespace infinicore::ops
src/infinicore/pybind11/ops/avg_pool1d.hpp
0 → 100644
View file @
45a3794b
#pragma once
#include <optional>
#include <pybind11/pybind11.h>
#include "infinicore/ops/avg_pool1d.hpp"
namespace
py
=
pybind11
;
namespace
infinicore
::
ops
{
inline
void
bind_avg_pool1d
(
py
::
module
&
m
)
{
m
.
def
(
"avg_pool1d"
,
[](
::
infinicore
::
Tensor
input
,
size_t
kernel_size
,
std
::
optional
<
size_t
>
stride
,
size_t
padding
)
{
return
op
::
avg_pool1d
(
input
,
kernel_size
,
stride
.
value_or
(
0
),
padding
);
},
py
::
arg
(
"input"
),
py
::
arg
(
"kernel_size"
),
py
::
arg
(
"stride"
)
=
py
::
none
(),
py
::
arg
(
"padding"
)
=
0
,
R"doc(AvgPool1d out-of-place.)doc"
);
m
.
def
(
"avg_pool1d_"
,
[](
::
infinicore
::
Tensor
output
,
::
infinicore
::
Tensor
input
,
size_t
kernel_size
,
std
::
optional
<
size_t
>
stride
,
size_t
padding
)
{
op
::
avg_pool1d_
(
output
,
input
,
kernel_size
,
stride
.
value_or
(
0
),
padding
);
},
py
::
arg
(
"output"
),
py
::
arg
(
"input"
),
py
::
arg
(
"kernel_size"
),
py
::
arg
(
"stride"
)
=
py
::
none
(),
py
::
arg
(
"padding"
)
=
0
,
R"doc(AvgPool1d in-place variant writing to provided output tensor.)doc"
);
}
}
// namespace infinicore::ops
src/infinicore/pybind11/ops/cross_entropy.hpp
0 → 100644
View file @
45a3794b
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/cross_entropy.hpp"
namespace
py
=
pybind11
;
namespace
infinicore
::
ops
{
inline
void
bind_cross_entropy
(
py
::
module
&
m
)
{
m
.
def
(
"cross_entropy"
,
&
op
::
cross_entropy
,
py
::
arg
(
"logits"
),
py
::
arg
(
"target"
),
R"doc(Token-wise cross entropy loss without reduction.)doc"
);
m
.
def
(
"cross_entropy_"
,
&
op
::
cross_entropy_
,
py
::
arg
(
"loss"
),
py
::
arg
(
"logits"
),
py
::
arg
(
"target"
),
R"doc(Write cross entropy loss into a provided tensor.)doc"
);
}
}
// namespace infinicore::ops
src/infinicore/pybind11/ops/equal.hpp
0 → 100644
View file @
45a3794b
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/equal.hpp"
namespace
py
=
pybind11
;
namespace
infinicore
::
ops
{
inline
void
bind_equal
(
py
::
module
&
m
)
{
m
.
def
(
"equal"
,
&
op
::
equal
,
py
::
arg
(
"a"
),
py
::
arg
(
"b"
),
R"doc(Elementwise equality returning a bool tensor.)doc"
);
m
.
def
(
"equal_"
,
&
op
::
equal_
,
py
::
arg
(
"out"
),
py
::
arg
(
"a"
),
py
::
arg
(
"b"
),
R"doc(In-place elementwise equality writing into `out`.)doc"
);
}
}
// namespace infinicore::ops
src/infinicore/pybind11/ops/hardswish.hpp
0 → 100644
View file @
45a3794b
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/hardswish.hpp"
namespace
py
=
pybind11
;
namespace
infinicore
::
ops
{
inline
void
bind_hardswish
(
py
::
module
&
m
)
{
m
.
def
(
"hardswish"
,
&
op
::
hardswish
,
py
::
arg
(
"input"
),
R"doc(Out-of-place Hardswish activation.)doc"
);
m
.
def
(
"hardswish_"
,
&
op
::
hardswish_
,
py
::
arg
(
"output"
),
py
::
arg
(
"input"
),
R"doc(In-place Hardswish activation.)doc"
);
}
}
// namespace infinicore::ops
src/infinicore/pybind11/ops/hardtanh.hpp
0 → 100644
View file @
45a3794b
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/hardtanh.hpp"
namespace
py
=
pybind11
;
namespace
infinicore
::
ops
{
inline
void
bind_hardtanh
(
py
::
module
&
m
)
{
m
.
def
(
"hardtanh"
,
&
op
::
hardtanh
,
py
::
arg
(
"input"
),
py
::
arg
(
"min_val"
)
=
-
1.0
f
,
py
::
arg
(
"max_val"
)
=
1.0
f
,
R"doc(Apply the HardTanh activation.)doc"
);
m
.
def
(
"hardtanh_"
,
&
op
::
hardtanh_
,
py
::
arg
(
"output"
),
py
::
arg
(
"input"
),
py
::
arg
(
"min_val"
)
=
-
1.0
f
,
py
::
arg
(
"max_val"
)
=
1.0
f
,
R"doc(In-place HardTanh activation.)doc"
);
}
}
// namespace infinicore::ops
src/infiniop/ops/avg_pool1d/avg_pool1d.h
0 → 100644
View file @
45a3794b
#ifndef __AVG_POOL1D_H__
#define __AVG_POOL1D_H__
#include "../../../utils.h"
#include "../../operator.h"
#include "../../tensor.h"
#include "infiniop/ops/avg_pool1d.h"
#define DESCRIPTOR(NAMESPACE) \
namespace op::avg_pool1d::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
AvgPool1dInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
AvgPool1dInfo info, \
size_t workspace_size_, \
Opaque *opaque, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size_) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t y_desc, \
infiniopTensorDescriptor_t x_desc, \
size_t kernel_size, \
size_t stride, \
size_t padding); \
\
infiniStatus_t calculate( \
void *workspace, \
size_t workspace_size, \
void *y, \
const void *x, \
void *stream) const; \
}; \
}
class
AvgPool1dInfo
{
private:
AvgPool1dInfo
()
=
default
;
public:
infiniDtype_t
dtype
;
size_t
batch
,
channels
,
in_width
,
out_width
;
size_t
kernel_size
,
stride
,
padding
;
ptrdiff_t
y_stride_batch
,
y_stride_channel
,
y_stride_width
;
ptrdiff_t
x_stride_batch
,
x_stride_channel
,
x_stride_width
;
static
utils
::
Result
<
AvgPool1dInfo
>
createAvgPool1dInfo
(
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
size_t
kernel_size
,
size_t
stride
,
size_t
padding
)
{
CHECK_OR_RETURN
(
y_desc
!=
nullptr
&&
x_desc
!=
nullptr
,
INFINI_STATUS_NULL_POINTER
);
const
infiniDtype_t
dtype
=
y_desc
->
dtype
();
CHECK_OR_RETURN
(
dtype
==
x_desc
->
dtype
(),
INFINI_STATUS_BAD_TENSOR_DTYPE
);
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_BF16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
);
CHECK_OR_RETURN
(
y_desc
->
ndim
()
==
3
&&
x_desc
->
ndim
()
==
3
,
INFINI_STATUS_BAD_TENSOR_SHAPE
);
size_t
batch
=
x_desc
->
dim
(
0
);
size_t
channels
=
x_desc
->
dim
(
1
);
size_t
in_width
=
x_desc
->
dim
(
2
);
CHECK_OR_RETURN
(
y_desc
->
dim
(
0
)
==
batch
,
INFINI_STATUS_BAD_TENSOR_SHAPE
);
CHECK_OR_RETURN
(
y_desc
->
dim
(
1
)
==
channels
,
INFINI_STATUS_BAD_TENSOR_SHAPE
);
size_t
padded_len
=
in_width
+
2
*
padding
;
CHECK_OR_RETURN
(
padded_len
>=
kernel_size
,
INFINI_STATUS_BAD_TENSOR_SHAPE
);
size_t
expected_out_width
=
(
padded_len
-
kernel_size
)
/
stride
+
1
;
CHECK_OR_RETURN
(
y_desc
->
dim
(
2
)
==
expected_out_width
,
INFINI_STATUS_BAD_TENSOR_SHAPE
);
size_t
out_width
=
expected_out_width
;
return
utils
::
Result
<
AvgPool1dInfo
>
(
AvgPool1dInfo
{
dtype
,
batch
,
channels
,
in_width
,
out_width
,
kernel_size
,
stride
,
padding
,
y_desc
->
stride
(
0
),
y_desc
->
stride
(
1
),
y_desc
->
stride
(
2
),
x_desc
->
stride
(
0
),
x_desc
->
stride
(
1
),
x_desc
->
stride
(
2
)});
}
};
#endif
src/infiniop/ops/avg_pool1d/cpu/avg_pool1d_cpu.cc
0 → 100644
View file @
45a3794b
#include "avg_pool1d_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
#include <algorithm>
namespace
op
::
avg_pool1d
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
size_t
kernel_size
,
size_t
stride
,
size_t
padding
)
{
auto
handle
=
reinterpret_cast
<
device
::
cpu
::
Handle
*>
(
handle_
);
auto
info
=
AvgPool1dInfo
::
createAvgPool1dInfo
(
y_desc
,
x_desc
,
kernel_size
,
stride
,
padding
);
CHECK_RESULT
(
info
);
*
desc_ptr
=
new
Descriptor
(
info
.
take
(),
0
,
nullptr
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
template
<
typename
T
>
infiniStatus_t
calculateAvgPool1d
(
const
AvgPool1dInfo
&
info
,
T
*
y
,
const
T
*
x
)
{
const
float
inv_kernel
=
1.0
f
/
static_cast
<
float
>
(
info
.
kernel_size
);
#pragma omp parallel for
for
(
ptrdiff_t
bc
=
0
;
bc
<
ptrdiff_t
(
info
.
batch
*
info
.
channels
);
++
bc
)
{
ptrdiff_t
b
=
bc
/
info
.
channels
;
ptrdiff_t
c
=
bc
%
info
.
channels
;
size_t
y_base
=
b
*
info
.
y_stride_batch
+
c
*
info
.
y_stride_channel
;
size_t
x_base
=
b
*
info
.
x_stride_batch
+
c
*
info
.
x_stride_channel
;
for
(
size_t
ow
=
0
;
ow
<
info
.
out_width
;
++
ow
)
{
size_t
y_offset
=
y_base
+
ow
*
info
.
y_stride_width
;
long
long
start_w
=
static_cast
<
long
long
>
(
ow
*
info
.
stride
)
-
info
.
padding
;
long
long
end_w
=
start_w
+
info
.
kernel_size
;
long
long
valid_start
=
std
::
max
(
0LL
,
start_w
);
long
long
valid_end
=
std
::
min
(
static_cast
<
long
long
>
(
info
.
in_width
),
end_w
);
float
sum
=
0.0
f
;
for
(
long
long
iw
=
valid_start
;
iw
<
valid_end
;
++
iw
)
{
size_t
x_offset
=
x_base
+
iw
*
info
.
x_stride_width
;
sum
+=
utils
::
cast
<
float
>
(
x
[
x_offset
]);
}
const
float
avg
=
sum
*
inv_kernel
;
y
[
y_offset
]
=
utils
::
cast
<
T
>
(
avg
);
}
}
return
INFINI_STATUS_SUCCESS
;
}
#define CALCULATE(TDATA) calculateAvgPool1d(_info, (TDATA *)y, (const TDATA *)x)
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
void
*
stream
)
const
{
switch
(
_info
.
dtype
)
{
case
INFINI_DTYPE_F16
:
return
CALCULATE
(
fp16_t
);
case
INFINI_DTYPE_BF16
:
return
CALCULATE
(
bf16_t
);
case
INFINI_DTYPE_F32
:
return
CALCULATE
(
float
);
case
INFINI_DTYPE_F64
:
return
CALCULATE
(
double
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
#undef CALCULATE
}
// namespace op::avg_pool1d::cpu
src/infiniop/ops/avg_pool1d/cpu/avg_pool1d_cpu.h
0 → 100644
View file @
45a3794b
#ifndef __INFINIOP_AVG_POOL1D_CPU_H__
#define __INFINIOP_AVG_POOL1D_CPU_H__
#include "../avg_pool1d.h"
DESCRIPTOR
(
cpu
)
#endif
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment