Unverified Commit 52f0dcf0 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1019 from InfiniTensor/issue/1008

Issue/1008
parents d0f405ce 68026bd1
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import (
BaseOperatorTest,
TensorSpec,
......
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import (
BaseOperatorTest,
TensorSpec,
......
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import BaseOperatorTest, TensorSpec, TestCase, GenericTestRunner
# Test cases format: (input_shape, input_strides_or_None, N)
......
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import (
BaseOperatorTest,
TensorSpec,
......
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import (
BaseOperatorTest,
TensorSpec,
......
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import BaseOperatorTest, TensorSpec, TestCase, GenericTestRunner
# Test cases format: (vec1_shape, vec2_shape, vec1_strides_or_None, vec2_strides_or_None)
......
......@@ -3,8 +3,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
import torch
from framework import BaseOperatorTest, TensorSpec, TestCase, GenericTestRunner
# Test cases format: (condition_shape, cond_strides_or_None, x_shape_or_None, y_shape_or_None)
......
......@@ -15,6 +15,7 @@ from libinfiniop import (
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
InfiniDeviceEnum,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
......@@ -83,6 +84,12 @@ def test(
dtype=torch.float16,
sync=None,
):
# Skip strided cases on Iluvatar: GELU with non-contiguous tensors can hang the GPU (requires ixsmi -r to recover)
if device == InfiniDeviceEnum.ILUVATAR and (
input_stride is not None or output_stride is not None
):
return
input = TestTensor(shape, input_stride, dtype, device)
if inplace == Inplace.INPLACE:
if input_stride != output_stride:
......@@ -141,6 +148,9 @@ def test(
lib_gelu()
if sync is not None:
sync()
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
if DEBUG:
debug(output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol)
......
......@@ -15,6 +15,7 @@ from libinfiniop import (
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
InfiniDeviceEnum,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
......@@ -112,6 +113,12 @@ def test(
dtype=None,
sync=None,
):
# Skip strided cases on Iluvatar: Ones with non-contiguous tensors can hang the GPU (requires ixsmi -r to recover)
if device == InfiniDeviceEnum.ILUVATAR and (
x_stride is not None or y_stride is not None
):
return
if dtype in [InfiniDtype.F16, InfiniDtype.BF16, InfiniDtype.F32, InfiniDtype.F64]:
x = TestTensor(shape, x_stride, dtype, device)
elif dtype in [InfiniDtype.BYTE, InfiniDtype.U8, InfiniDtype.U16, InfiniDtype.U32, InfiniDtype.U64,
......
......@@ -15,6 +15,7 @@ from libinfiniop import (
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
InfiniDeviceEnum,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
......@@ -114,6 +115,12 @@ def test(
dtype=None,
sync=None,
):
# Skip strided cases on Iluvatar: Zeros with non-contiguous tensors can hang the GPU (requires ixsmi -r to recover)
if device == InfiniDeviceEnum.ILUVATAR and (
x_stride is not None or y_stride is not None
):
return
if dtype in [InfiniDtype.F16, InfiniDtype.BF16, InfiniDtype.F32, InfiniDtype.F64]:
x = TestTensor(shape, x_stride, dtype, device)
elif dtype in [InfiniDtype.BYTE, InfiniDtype.U8, InfiniDtype.U16, InfiniDtype.U32, InfiniDtype.U64,
......
......@@ -115,10 +115,12 @@ option("iluvatar-gpu")
set_description("Whether to compile implementations for Iluvatar GPU")
option_end()
option("ivcore-20")
set_default(false)
option("iluvatar_arch")
set_default("ivcore20")
set_showmenu(true)
set_description("Use ivcore20")
set_description("Set Iluvatar GPU architecture (e.g. ivcore20)")
set_values("ivcore20")
set_category("option")
option_end()
if has_config("iluvatar-gpu") then
......
toolchain("iluvatar.toolchain")
local iluvatar_arch = get_config("iluvatar_arch") or "ivcore20"
toolchain("iluvatar.toolchain")
set_toolset("cc" , "clang" )
set_toolset("cxx" , "clang++")
set_toolset("cu" , "clang++")
......@@ -44,15 +46,15 @@ target("infiniop-iluvatar")
set_warnings("all", "error")
add_cuflags("-Wno-error=unused-private-field", "-Wno-error=unused-variable", "-Wno-unused-variable")
add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true})
if has_config("ivcore-20") then
add_cuflags("--cuda-gpu-arch=ivcore20", {force = true})
end
add_cuflags("--cuda-gpu-arch=" .. iluvatar_arch, {force = true})
add_culdflags("-fPIC")
add_cxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable")
add_cxxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable")
-- set_languages("cxx17") 天数似乎不能用这个配置
add_files("../src/infiniop/devices/nvidia/*.cu", "../src/infiniop/ops/*/nvidia/*.cu")
-- skip scaled_mm, adapt it later
-- remove_files("../src/infiniop/ops/scaled_mm/nvidia/*.cu")
-- 天数平台不支持部分 NVIDIA PTX 指令,AWQ 反量化改用 CUDA C++ 实现
add_files("../src/infiniop/ops/dequantize_awq/iluvatar/*.cu")
......@@ -75,6 +77,7 @@ target("infinirt-iluvatar")
set_warnings("all", "error")
add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true})
add_cuflags("--cuda-gpu-arch=" .. iluvatar_arch, {force = true})
add_culdflags("-fPIC")
add_cxflags("-fPIC")
add_cxxflags("-fPIC")
......@@ -97,6 +100,7 @@ target("infiniccl-iluvatar")
set_warnings("all", "error")
add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true})
add_cuflags("--cuda-gpu-arch=" .. iluvatar_arch, {force = true})
add_culdflags("-fPIC")
add_cxflags("-fPIC")
add_cxxflags("-fPIC")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment