issue/486 Adapt seven operators to Hygon machines.

Co-authored-by: zhuyue <zhuyue@qiyuanlab.com>

issue/486 Adapt seven operators to Hygon machines.
Co-authored-by: zhuyue <zhuyue@qiyuanlab.com>
e698ef6b · gongchensu · GitHub · 3959c943 · e698ef6b · e698ef6b
Unverified Commit e698ef6b authored Sep 29, 2025 by gongchensu Committed by GitHub Sep 29, 2025
7 changed files
--- a/src/infinirt-test/main.cc
+++ b/src/infinirt-test/main.cc
@@ -21,7 +21,7 @@ void printUsage() {
              << "  moore" << std::endl
              << "  iluvatar" << std::endl
              << "  kunlun" << std::endl
-              << "  sugon" << std::endl
+              << "  hygon" << std::endl
              << std::endl;
    exit(EXIT_FAILURE);
 }
@@ -52,7 +52,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
        else PARSE_DEVICE("--moore", INFINI_DEVICE_MOORE)
        else PARSE_DEVICE("--iluvatar", INFINI_DEVICE_ILUVATAR)
        else PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN)
-        else PARSE_DEVICE("--sugon", INFINI_DEVICE_SUGON)
+        else PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON)
        else {
            printUsage();
        }

--- a/src/infinirt/cuda/infinirt_cuda.cuh
+++ b/src/infinirt/cuda/infinirt_cuda.cuh
@@ -3,7 +3,7 @@
 #include "../infinirt_impl.h"
 namespace infinirt::cuda {
-#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
+#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API)
 INFINIRT_DEVICE_API_IMPL
 #else
 INFINIRT_DEVICE_API_NOOP

--- a/src/infinirt/infinirt.cc
+++ b/src/infinirt/infinirt.cc
@@ -23,7 +23,7 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
        return INFINI_STATUS_NULL_POINTER;
    }
    for (size_t i = 0; i < INFINI_DEVICE_TYPE_COUNT; i++) {
-        if (i == INFINI_DEVICE_ILUVATAR || i == INFINI_DEVICE_KUNLUN || i == INFINI_DEVICE_SUGON) {
+        if (i == INFINI_DEVICE_ILUVATAR || i == INFINI_DEVICE_KUNLUN || i == INFINI_DEVICE_HYGON) {
            count_array[i] = 0;
            continue;
        }
@@ -77,6 +77,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
        case INFINI_DEVICE_ILUVATAR:                                   \
            _status = infinirt::cuda::API PARAMS;                      \
            break;                                                     \
+        case INFINI_DEVICE_HYGON:                                      \
+            _status = infinirt::cuda::API PARAMS;                      \
+            break;                                                     \
        default:                                                       \
            _status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;         \
        }                                                              \

--- a/test/infiniop/libinfiniop/devices.py
+++ b/test/infiniop/libinfiniop/devices.py
@@ -7,7 +7,7 @@ class InfiniDeviceEnum:
    MOORE = 5
    ILUVATAR = 6
    KUNLUN = 7
-    SUGON = 8
+    HYGON = 8
 InfiniDeviceNames = {
@@ -19,7 +19,7 @@ InfiniDeviceNames = {
    InfiniDeviceEnum.MOORE: "Moore",
    InfiniDeviceEnum.ILUVATAR: "Iluvatar",
    InfiniDeviceEnum.KUNLUN: "Kunlun",
-    InfiniDeviceEnum.SUGON: "Sugon",
+    InfiniDeviceEnum.HYGON: "Hygon",
 }
 # Mapping that maps InfiniDeviceEnum to torch device string
@@ -32,5 +32,5 @@ torch_device_map = {
    InfiniDeviceEnum.MOORE: "musa",
    InfiniDeviceEnum.ILUVATAR: "cuda",
    InfiniDeviceEnum.KUNLUN: "cuda",
-    InfiniDeviceEnum.SUGON: "cuda",
+    InfiniDeviceEnum.HYGON: "cuda",
 }
--- a/test/infiniop/libinfiniop/utils.py
+++ b/test/infiniop/libinfiniop/utils.py
@@ -341,6 +341,11 @@ def get_args():
        action="store_true",
        help="Run KUNLUN XPU test",
    )
+    parser.add_argument(
+        "--hygon",
+        action="store_true",
+        help="Run HYGON DCU test",
+    )
    return parser.parse_args()
@@ -648,6 +653,10 @@ def get_test_devices(args):
        import torch_xmlir
        devices_to_test.append(InfiniDeviceEnum.KUNLUN)
+    if args.hygon:
+        import torch
+        devices_to_test.append(InfiniDeviceEnum.HYGON)
    if not devices_to_test:
        devices_to_test = [InfiniDeviceEnum.CPU]

--- a/xmake.lua
+++ b/xmake.lua
@@ -124,15 +124,16 @@ if has_config("moore-gpu") then
    includes("xmake/moore.lua")
 end
-- 海光
+-- 海光DCU
-option("sugon-dcu")
+option("hygon-dcu")
    set_default(false)
    set_showmenu(true)
-    set_description("Whether to compile implementations for Sugon DCU")
+    set_description("Whether to compile implementations for Hygon DCU")
 option_end()
-if has_config("sugon-dcu") then
+if has_config("hygon-dcu") then
-    add_defines("ENABLE_SUGON_CUDA_API")
+    add_defines("ENABLE_HYGON_API")
+    includes("xmake/hygon.lua")
 end
 -- 昆仑芯
@@ -219,6 +220,9 @@ target("infinirt")
    if has_config("kunlun-xpu") then
        add_deps("infinirt-kunlun")
    end
+    if has_config("hygon-dcu") then
+        add_deps("infinirt-hygon")
+    end
    set_languages("cxx17")
    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
    add_files("src/infinirt/*.cc")
@@ -238,20 +242,6 @@ target("infiniop")
    if has_config("iluvatar-gpu") then
        add_deps("infiniop-iluvatar")
    end
-    if has_config("sugon-dcu") then
-        local builddir = string.format(
-            "build/%s/%s/%s",
-            get_config("plat"),
-            get_config("arch"),
-            get_config("mode")
-        )
-        add_shflags("-s", "-shared", "-fPIC")
-        add_links("cublas", "cudnn", "cudadevrt", "cudart_static", "rt", "pthread", "dl")
-        -- Using -linfiniop-nvidia will fail, manually link the target using full path
-        add_deps("nv-gpu", {inherit = false})
-        add_links(builddir.."/libinfiniop-nvidia.a")
-        set_toolchains("sugon-dcu-linker")
-    end
    if has_config("cambricon-mlu") then
        add_deps("infiniop-cambricon")
@@ -268,6 +258,9 @@ target("infiniop")
    if has_config("kunlun-xpu") then
        add_deps("infiniop-kunlun")
    end
+    if has_config("hygon-dcu") then
+        add_deps("infiniop-hygon")
+    end
    set_languages("cxx17")
    add_files("src/infiniop/devices/handle.cc")
    add_files("src/infiniop/ops/*/operator.cc")
@@ -306,6 +299,9 @@ target("infiniccl")
    if has_config("kunlun-xpu") then
        add_deps("infiniccl-kunlun")
    end
+    if has_config("hygon-dcu") then
+        add_deps("infiniccl-hygon")
+    end
    set_languages("cxx17")

--- a/xmake/hygon.lua
+++ b/xmake/hygon.lua
+local dtk_root = os.getenv("DTK_ROOT")
+toolchain("hygon.toolchain")
+    set_toolset("cc"  , "clang"  )
+    set_toolset("cxx" , "clang++")
+    -- 使用DTK中的CUDA编译器
+    local nvcc_path = path.join(dtk_root, "cuda", "bin", "nvcc")
+    if os.isfile(nvcc_path) then
+        set_toolset("cu"  , nvcc_path)
+        set_toolset("culd", nvcc_path)
+    else
+        set_toolset("cu"  , "nvcc")
+        set_toolset("culd", "nvcc")
+    end
+    set_toolset("cu-ccbin", "$(env CXX)", "$(env CC)")
+toolchain_end()
+rule("hygon.env")
+    -- Fix the deprecated warning by using add_orders
+    add_orders("cuda.env", "hygon.env")
+    after_load(function (target)
+        -- This logic to remove CUDA-specific libs is correct and can remain
+        local old = target:get("syslinks") or {}
+        local new = {}
+        for _, link in ipairs(old) do
+            if link ~= "cudadevrt" and link ~= "cudnn" then
+                table.insert(new, link)
+            end
+        end
+        if #old > #new then
+            target:set("syslinks", new)
+            print("CUDA specific libraries removed for Hygon DCU. New syslinks: {" .. table.concat(new, ", ") .. "}")
+        end
+    end)
+rule_end()
+target("infiniop-hygon")
+    set_kind("static")
+    add_deps("infini-utils")
+    on_install(function (target) end)
+    set_toolchains("hygon.toolchain")
+    add_rules("hygon.env")
+    set_values("cuda.rdc", false)
+    -- 海光DCU使用DTK中的CUDA库
+    add_links("cudart", "cublas", "curand", "cublasLt", "cudnn")
+    -- 添加DTK路径支持
+    local dtk_root = os.getenv("DTK_ROOT") or "/opt/dtk"
+    if os.isdir(dtk_root) then
+        add_includedirs(path.join(dtk_root, "include"))
+        add_includedirs(path.join(dtk_root, "cuda", "include"))
+        add_linkdirs(path.join(dtk_root, "lib"))
+        add_linkdirs(path.join(dtk_root, "cuda", "lib64"))
+    end
+    set_warnings("all", "error")
+    add_cuflags("-Wno-error=unused-private-field")
+    add_cuflags("-Wno-return-type", {force = true})  -- 抑制return语句警告
+    add_cuflags("-fPIC", "-std=c++17", {force = true})
+    add_culdflags("-fPIC")
+    add_cxflags("-fPIC")
+    -- 添加海光DCU特定的编译标志
+    add_cuflags("-arch=gfx906", "-arch=gfx926", "-arch=gfx928", "-arch=gfx936")
+    -- 复用NVIDIA的CUDA实现，通过HIP兼容层
+    -- 只编译海光DCU支持的7个算子：rope, gemm, causal_softmax, random_sample, rearrange, rms_norm, swiglu
+    add_files("../src/infiniop/devices/nvidia/*.cu")
+    add_files("../src/infiniop/ops/rope/nvidia/*.cu")
+    add_files("../src/infiniop/ops/gemm/nvidia/*.cu")
+    add_files("../src/infiniop/ops/causal_softmax/nvidia/*.cu")
+    add_files("../src/infiniop/ops/random_sample/nvidia/*.cu")
+    add_files("../src/infiniop/ops/rearrange/nvidia/*.cu")
+    add_files("../src/infiniop/ops/rms_norm/nvidia/*.cu")
+    add_files("../src/infiniop/ops/swiglu/nvidia/*.cu")
+    if has_config("ninetoothed") then
+        add_files("../build/ninetoothed/*.c", {cxflags = {"-Wno-return-type"}})
+    end
+target_end()
+target("infinirt-hygon")
+    set_kind("static")
+    add_deps("infini-utils")
+    on_install(function (target) end)
+    set_toolchains("hygon.toolchain")
+    add_rules("hygon.env")
+    set_values("cuda.rdc", false)
+    add_links("cudart", "curand")
+    -- 添加DTK路径支持
+    local dtk_root = os.getenv("DTK_ROOT") or "/opt/dtk"
+    if os.isdir(dtk_root) then
+        add_includedirs(path.join(dtk_root, "include"))
+        add_includedirs(path.join(dtk_root, "cuda", "include"))
+        add_linkdirs(path.join(dtk_root, "lib"))
+        add_linkdirs(path.join(dtk_root, "cuda", "lib64"))
+    end
+    set_warnings("all", "error")
+    add_cuflags("-Wno-return-type", {force = true})  -- 抑制return语句警告
+    add_cuflags("-fPIC", "-std=c++17", {force = true})
+    add_culdflags("-fPIC")
+    add_cxflags("-fPIC")
+    -- 添加海光DCU特定的编译标志
+    add_cuflags("-arch=gfx906", "-arch=gfx926", "-arch=gfx928", "-arch=gfx936")
+    add_files("../src/infinirt/cuda/*.cu")
+target_end()
+target("infiniccl-hygon")
+    set_kind("static")
+    add_deps("infinirt")
+    on_install(function (target) end)
+    if has_config("ccl") then
+        set_toolchains("hygon.toolchain")
+        add_rules("hygon.env")
+        set_values("cuda.rdc", false)
+        add_links("cudart", "curand")
+        -- 添加DTK路径支持
+        local dtk_root = os.getenv("DTK_ROOT") or "/opt/dtk"
+        if os.isdir(dtk_root) then
+            add_includedirs(path.join(dtk_root, "include"))
+            add_includedirs(path.join(dtk_root, "cuda", "include"))
+            add_linkdirs(path.join(dtk_root, "lib"))
+            add_linkdirs(path.join(dtk_root, "cuda", "lib64"))
+        end
+        set_warnings("all", "error")
+        add_cuflags("-Wno-return-type", {force = true})  -- 抑制return语句警告
+        add_cuflags("-fPIC", "-std=c++17", {force = true})
+        add_culdflags("-fPIC")
+        add_cxflags("-fPIC")
+        -- 添加海光DCU特定的编译标志
+        add_cuflags("-arch=gfx906", "-arch=gfx926", "-arch=gfx928", "-arch=gfx936")
+        -- 使用NCCL (NVIDIA Collective Communications Library)
+        add_links("nccl")
+        add_files("../src/infiniccl/cuda/*.cu")
+    end
+target_end()