Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
52f0dcf0
Unverified
Commit
52f0dcf0
authored
Feb 12, 2026
by
thatPepe
Committed by
GitHub
Feb 12, 2026
Browse files
Merge pull request #1019 from InfiniTensor/issue/1008
Issue/1008
parents
d0f405ce
68026bd1
Changes
292
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
44 additions
and
14 deletions
+44
-14
test/infinicore/ops/upsample_bilinear.py
test/infinicore/ops/upsample_bilinear.py
+1
-1
test/infinicore/ops/upsample_nearest.py
test/infinicore/ops/upsample_nearest.py
+1
-1
test/infinicore/ops/vander.py
test/infinicore/ops/vander.py
+1
-1
test/infinicore/ops/var.py
test/infinicore/ops/var.py
+1
-1
test/infinicore/ops/var_mean.py
test/infinicore/ops/var_mean.py
+1
-1
test/infinicore/ops/vdot.py
test/infinicore/ops/vdot.py
+1
-1
test/infinicore/ops/where.py
test/infinicore/ops/where.py
+1
-1
test/infiniop/gelu.py
test/infiniop/gelu.py
+10
-0
test/infiniop/ones.py
test/infiniop/ones.py
+7
-0
test/infiniop/zeros.py
test/infiniop/zeros.py
+7
-0
xmake.lua
xmake.lua
+5
-3
xmake/iluvatar.lua
xmake/iluvatar.lua
+8
-4
No files found.
test/infinicore/ops/upsample_bilinear.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
...
...
test/infinicore/ops/upsample_nearest.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
...
...
test/infinicore/ops/vander.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
# Test cases format: (input_shape, input_strides_or_None, N)
...
...
test/infinicore/ops/var.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
...
...
test/infinicore/ops/var_mean.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
...
...
test/infinicore/ops/vdot.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
# Test cases format: (vec1_shape, vec2_shape, vec1_strides_or_None, vec2_strides_or_None)
...
...
test/infinicore/ops/where.py
View file @
52f0dcf0
...
...
@@ -3,8 +3,8 @@ import os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
import
torch
from
framework
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
# Test cases format: (condition_shape, cond_strides_or_None, x_shape_or_None, y_shape_or_None)
...
...
test/infiniop/gelu.py
View file @
52f0dcf0
...
...
@@ -15,6 +15,7 @@ from libinfiniop import (
InfiniDtype
,
InfiniDtypeNames
,
InfiniDeviceNames
,
InfiniDeviceEnum
,
infiniopOperatorDescriptor_t
,
)
from
enum
import
Enum
,
auto
...
...
@@ -83,6 +84,12 @@ def test(
dtype
=
torch
.
float16
,
sync
=
None
,
):
# Skip strided cases on Iluvatar: GELU with non-contiguous tensors can hang the GPU (requires ixsmi -r to recover)
if
device
==
InfiniDeviceEnum
.
ILUVATAR
and
(
input_stride
is
not
None
or
output_stride
is
not
None
):
return
input
=
TestTensor
(
shape
,
input_stride
,
dtype
,
device
)
if
inplace
==
Inplace
.
INPLACE
:
if
input_stride
!=
output_stride
:
...
...
@@ -141,6 +148,9 @@ def test(
lib_gelu
()
if
sync
is
not
None
:
sync
()
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
debug
(
output
.
actual_tensor
(),
output
.
torch_tensor
(),
atol
=
atol
,
rtol
=
rtol
)
...
...
test/infiniop/ones.py
View file @
52f0dcf0
...
...
@@ -15,6 +15,7 @@ from libinfiniop import (
InfiniDtype
,
InfiniDtypeNames
,
InfiniDeviceNames
,
InfiniDeviceEnum
,
infiniopOperatorDescriptor_t
,
)
from
enum
import
Enum
,
auto
...
...
@@ -112,6 +113,12 @@ def test(
dtype
=
None
,
sync
=
None
,
):
# Skip strided cases on Iluvatar: Ones with non-contiguous tensors can hang the GPU (requires ixsmi -r to recover)
if
device
==
InfiniDeviceEnum
.
ILUVATAR
and
(
x_stride
is
not
None
or
y_stride
is
not
None
):
return
if
dtype
in
[
InfiniDtype
.
F16
,
InfiniDtype
.
BF16
,
InfiniDtype
.
F32
,
InfiniDtype
.
F64
]:
x
=
TestTensor
(
shape
,
x_stride
,
dtype
,
device
)
elif
dtype
in
[
InfiniDtype
.
BYTE
,
InfiniDtype
.
U8
,
InfiniDtype
.
U16
,
InfiniDtype
.
U32
,
InfiniDtype
.
U64
,
...
...
test/infiniop/zeros.py
View file @
52f0dcf0
...
...
@@ -15,6 +15,7 @@ from libinfiniop import (
InfiniDtype
,
InfiniDtypeNames
,
InfiniDeviceNames
,
InfiniDeviceEnum
,
infiniopOperatorDescriptor_t
,
)
from
enum
import
Enum
,
auto
...
...
@@ -114,6 +115,12 @@ def test(
dtype
=
None
,
sync
=
None
,
):
# Skip strided cases on Iluvatar: Zeros with non-contiguous tensors can hang the GPU (requires ixsmi -r to recover)
if
device
==
InfiniDeviceEnum
.
ILUVATAR
and
(
x_stride
is
not
None
or
y_stride
is
not
None
):
return
if
dtype
in
[
InfiniDtype
.
F16
,
InfiniDtype
.
BF16
,
InfiniDtype
.
F32
,
InfiniDtype
.
F64
]:
x
=
TestTensor
(
shape
,
x_stride
,
dtype
,
device
)
elif
dtype
in
[
InfiniDtype
.
BYTE
,
InfiniDtype
.
U8
,
InfiniDtype
.
U16
,
InfiniDtype
.
U32
,
InfiniDtype
.
U64
,
...
...
xmake.lua
View file @
52f0dcf0
...
...
@@ -115,10 +115,12 @@ option("iluvatar-gpu")
set_description
(
"Whether to compile implementations for Iluvatar GPU"
)
option_end
()
option
(
"i
vcore-20
"
)
set_default
(
false
)
option
(
"i
luvatar_arch
"
)
set_default
(
"ivcore20"
)
set_showmenu
(
true
)
set_description
(
"Use ivcore20"
)
set_description
(
"Set Iluvatar GPU architecture (e.g. ivcore20)"
)
set_values
(
"ivcore20"
)
set_category
(
"option"
)
option_end
()
if
has_config
(
"iluvatar-gpu"
)
then
...
...
xmake/iluvatar.lua
View file @
52f0dcf0
toolchain
(
"iluvatar.toolchain"
)
local
iluvatar_arch
=
get_config
(
"iluvatar_arch"
)
or
"ivcore20"
toolchain
(
"iluvatar.toolchain"
)
set_toolset
(
"cc"
,
"clang"
)
set_toolset
(
"cxx"
,
"clang++"
)
set_toolset
(
"cu"
,
"clang++"
)
...
...
@@ -44,15 +46,15 @@ target("infiniop-iluvatar")
set_warnings
(
"all"
,
"error"
)
add_cuflags
(
"-Wno-error=unused-private-field"
,
"-Wno-error=unused-variable"
,
"-Wno-unused-variable"
)
add_cuflags
(
"-fPIC"
,
"-x"
,
"ivcore"
,
"-std=c++17"
,
{
force
=
true
})
if
has_config
(
"ivcore-20"
)
then
add_cuflags
(
"--cuda-gpu-arch=ivcore20"
,
{
force
=
true
})
end
add_cuflags
(
"--cuda-gpu-arch="
..
iluvatar_arch
,
{
force
=
true
})
add_culdflags
(
"-fPIC"
)
add_cxflags
(
"-fPIC"
,
"-Wno-error=unused-variable"
,
"-Wno-unused-variable"
)
add_cxxflags
(
"-fPIC"
,
"-Wno-error=unused-variable"
,
"-Wno-unused-variable"
)
-- set_languages("cxx17") 天数似乎不能用这个配置
add_files
(
"../src/infiniop/devices/nvidia/*.cu"
,
"../src/infiniop/ops/*/nvidia/*.cu"
)
-- skip scaled_mm, adapt it later
-- remove_files("../src/infiniop/ops/scaled_mm/nvidia/*.cu")
-- 天数平台不支持部分 NVIDIA PTX 指令,AWQ 反量化改用 CUDA C++ 实现
add_files
(
"../src/infiniop/ops/dequantize_awq/iluvatar/*.cu"
)
...
...
@@ -75,6 +77,7 @@ target("infinirt-iluvatar")
set_warnings
(
"all"
,
"error"
)
add_cuflags
(
"-fPIC"
,
"-x"
,
"ivcore"
,
"-std=c++17"
,
{
force
=
true
})
add_cuflags
(
"--cuda-gpu-arch="
..
iluvatar_arch
,
{
force
=
true
})
add_culdflags
(
"-fPIC"
)
add_cxflags
(
"-fPIC"
)
add_cxxflags
(
"-fPIC"
)
...
...
@@ -97,6 +100,7 @@ target("infiniccl-iluvatar")
set_warnings
(
"all"
,
"error"
)
add_cuflags
(
"-fPIC"
,
"-x"
,
"ivcore"
,
"-std=c++17"
,
{
force
=
true
})
add_cuflags
(
"--cuda-gpu-arch="
..
iluvatar_arch
,
{
force
=
true
})
add_culdflags
(
"-fPIC"
)
add_cxflags
(
"-fPIC"
)
add_cxxflags
(
"-fPIC"
)
...
...
Prev
1
…
11
12
13
14
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment