xmake.lua 22.2 KB
Newer Older
PanZezhongQY's avatar
PanZezhongQY committed
1
add_rules("mode.debug", "mode.release")
2
add_requires("boost", {configs = {stacktrace = true}})
3
4
add_requires("pybind11")

PanZezhongQY's avatar
PanZezhongQY committed
5
6
7
8
9
-- Define color codes
local GREEN = '\27[0;32m'
local YELLOW = '\27[1;33m'
local NC = '\27[0m'  -- No Color

10
set_encodings("utf-8")
PanZezhongQY's avatar
PanZezhongQY committed
11

12
add_includedirs("include")
13
add_includedirs("third_party/spdlog/include")
qinyiqun's avatar
qinyiqun committed
14
add_includedirs("third_party/nlohmann_json/single_include/")
15

PanZezhongQY's avatar
PanZezhongQY committed
16
17
18
19
if is_mode("debug") then
    add_defines("DEBUG_MODE")
end

20
21
if is_plat("windows") then
    set_runtimes("MD")
22
    add_ldflags("/utf-8", {force = true})
23
    add_cxxflags("/utf-8", {force = true})
24
25
end

PanZezhongQY's avatar
PanZezhongQY committed
26
27
28
29
-- CPU
option("cpu")
    set_default(true)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
30
    set_description("Whether to compile implementations for CPU")
PanZezhongQY's avatar
PanZezhongQY committed
31
32
33
option_end()

option("omp")
34
    set_default(true)
PanZezhongQY's avatar
PanZezhongQY committed
35
36
37
38
39
40
41
42
43
    set_showmenu(true)
    set_description("Enable or disable OpenMP support for cpu kernel")
option_end()

if has_config("cpu") then
    includes("xmake/cpu.lua")
    add_defines("ENABLE_CPU_API")
end

44
45
46
47
if has_config("omp") then
    add_defines("ENABLE_OMP")
end

PanZezhongQY's avatar
PanZezhongQY committed
48
49
50
51
-- 英伟达
option("nv-gpu")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
52
    set_description("Whether to compile implementations for Nvidia GPU")
PanZezhongQY's avatar
PanZezhongQY committed
53
54
55
option_end()

if has_config("nv-gpu") then
56
    add_defines("ENABLE_NVIDIA_API")
57
    includes("xmake/nvidia.lua")
PanZezhongQY's avatar
PanZezhongQY committed
58
59
end

60
61
option("cudnn")
    set_default(true)
YdrMaster's avatar
YdrMaster committed
62
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
63
    set_description("Whether to compile cudnn for Nvidia GPU")
YdrMaster's avatar
YdrMaster committed
64
65
option_end()

66
67
if has_config("cudnn") then
    add_defines("ENABLE_CUDNN_API")
YdrMaster's avatar
YdrMaster committed
68
69
end

70
71
72
73
74
75
76
77
78
79
option("cutlass")
    set_default(false)
    set_showmenu(true)
    set_description("Whether to compile cutlass for Nvidia GPU")
option_end()

if has_config("cutlass") then 
    add_defines("ENABLE_CUTLASS_API")
end

80
81
82
83
84
85
86
option("cuda_arch")
    set_showmenu(true)
    set_description("Set CUDA GPU architecture (e.g. sm_90)")
    set_values("sm_50", "sm_60", "sm_70", "sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_90a")
    set_category("option")
option_end()

PanZezhongQY's avatar
PanZezhongQY committed
87
88
89
90
-- 寒武纪
option("cambricon-mlu")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
91
    set_description("Whether to compile implementations for Cambricon MLU")
PanZezhongQY's avatar
PanZezhongQY committed
92
93
94
95
option_end()

if has_config("cambricon-mlu") then
    add_defines("ENABLE_CAMBRICON_API")
PanZezhong's avatar
PanZezhong committed
96
    includes("xmake/bang.lua")
PanZezhongQY's avatar
PanZezhongQY committed
97
98
99
100
101
102
end

-- 华为昇腾
option("ascend-npu")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
103
    set_description("Whether to compile implementations for Huawei Ascend NPU")
PanZezhongQY's avatar
PanZezhongQY committed
104
105
106
107
option_end()

if has_config("ascend-npu") then
    add_defines("ENABLE_ASCEND_API")
Pan Zezhong's avatar
Pan Zezhong committed
108
    includes("xmake/ascend.lua")
PanZezhongQY's avatar
PanZezhongQY committed
109
110
end

111
112
113
114
-- 天数智芯
option("iluvatar-gpu")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
115
    set_description("Whether to compile implementations for Iluvatar GPU")
116
117
option_end()

118
119
option("iluvatar_arch")
    set_default("ivcore20")
120
    set_showmenu(true)
121
122
123
    set_description("Set Iluvatar GPU architecture (e.g. ivcore20)")
    set_values("ivcore20")
    set_category("option")
124
125
option_end()

126
if has_config("iluvatar-gpu") then
127
    add_defines("ENABLE_ILUVATAR_API")
128
129
130
    includes("xmake/iluvatar.lua")
end

wooway777's avatar
wooway777 committed
131
132
133
134
135
136
137
138
139
140
141
142
-- ali
option("ali-ppu")
    set_default(false)
    set_showmenu(true)
    set_description("Whether to compile implementations for Ali PPU")
option_end()

if has_config("ali-ppu") then
    add_defines("ENABLE_ALI_API")
    includes("xmake/ali.lua")
end

143
144
145
146
147
148
149
150
151
152
153
154
-- qy
option("qy-gpu")
    set_default(false)
    set_showmenu(true)
    set_description("Whether to compile implementations for Qy GPU")
option_end()

if has_config("qy-gpu") then
    add_defines("ENABLE_QY_API")
    includes("xmake/qy.lua")
end

PanZezhongQY's avatar
PanZezhongQY committed
155
156
157
158
-- 沐曦
option("metax-gpu")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
159
    set_description("Whether to compile implementations for MetaX GPU")
PanZezhongQY's avatar
PanZezhongQY committed
160
161
option_end()

162
163
164
165
166
167
option("use-mc")
    set_default(false)
    set_showmenu(true)
    set_description("Use MC version")
option_end()

PanZezhongQY's avatar
PanZezhongQY committed
168
if has_config("metax-gpu") then
169
    add_defines("ENABLE_METAX_API")
170
171
172
    if has_config("use-mc") then
        add_defines("ENABLE_METAX_MC_API")
    end
173
    includes("xmake/metax.lua")
PanZezhongQY's avatar
PanZezhongQY committed
174
175
176
177
178
179
end

-- 摩尔线程
option("moore-gpu")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
180
    set_description("Whether to compile implementations for Moore Threads GPU")
PanZezhongQY's avatar
PanZezhongQY committed
181
182
option_end()

183
184
185
186
187
188
option("moore-gpu-arch")
    set_default("mp_31")
    set_showmenu(true)
    set_description("Set Moore GPU architecture (e.g. mp_31)")
option_end()

189
if has_config("moore-gpu") then
190
    add_defines("ENABLE_MOORE_API")
191
    includes("xmake/moore.lua")
192
end
PanZezhongQY's avatar
PanZezhongQY committed
193

194
195
-- 海光DCU
option("hygon-dcu")
PanZezhongQY's avatar
PanZezhongQY committed
196
197
    set_default(false)
    set_showmenu(true)
198
    set_description("Whether to compile implementations for Hygon DCU")
PanZezhongQY's avatar
PanZezhongQY committed
199
200
option_end()

201
202
if has_config("hygon-dcu") then
    add_defines("ENABLE_HYGON_API")
203
204
    -- Required by HIP headers included from torch ATen/hip.
    add_defines("__HIP_PLATFORM_AMD__")
205
    includes("xmake/hygon.lua")
PanZezhongQY's avatar
PanZezhongQY committed
206
207
end

208
209
210
211
212
213
214
215
216
217
218
-- 昆仑芯
option("kunlun-xpu")
    set_default(false)
    set_showmenu(true)
    set_description("Enable or disable Kunlun XPU kernel")
option_end()

if has_config("kunlun-xpu") then
    add_defines("ENABLE_KUNLUN_API")
    includes("xmake/kunlun.lua")
end
PanZezhongQY's avatar
PanZezhongQY committed
219

220
221
222
223
224
225
226
227
228
229
230
-- 九齿
option("ninetoothed")
    set_default(false)
    set_showmenu(true)
    set_description("Whether to complie NineToothed implementations")
option_end()

if has_config("ninetoothed") then
    add_defines("ENABLE_NINETOOTHED")
end

231
232
233
234
235
236
237
238
239
-- ATen
option("aten")
    set_default(false)
    set_showmenu(true)
    set_description("Wether to link aten and torch libraries")
option_end()

-- Flash-Attn
option("flash-attn")
240
    set_default("")
241
242
243
244
    set_showmenu(true)
    set_description("Path to flash-attention repo. If not set, flash-attention will not used.")
option_end()

245
246
247
248
249
250
option("flash-attn-prebuilt")
    set_default("")
    set_showmenu(true)
    set_description("Path to prebuilt flash_attn .so file or directory containing it. Used for Hygon DCU.")
option_end()

251
252
if has_config("aten") then
    add_defines("ENABLE_ATEN")
253
254
255
256
257
258
    local fa_src = get_config("flash-attn")
    local fa_prebuilt = get_config("flash-attn-prebuilt")
    if not fa_prebuilt or fa_prebuilt == "" then
        fa_prebuilt = os.getenv("FLASH_ATTN_PREBUILT")
    end
    if (fa_src and fa_src ~= "") or (fa_prebuilt and fa_prebuilt ~= "") then
259
260
261
262
263
        add_defines("ENABLE_FLASH_ATTN")
    end
end


264
265
266
267
268
269
270
271
272
273
274
275
-- cuda graph
option("graph")
    set_default(false)
    set_showmenu(true)
    set_description("Whether to use device graph instantiating feature, such as cuda graph for nvidia")
option_end()

if has_config("graph") then
    add_defines("USE_INFINIRT_GRAPH")
end


276
277
278
279
-- InfiniCCL
option("ccl")
    set_default(false)
    set_showmenu(true)
YdrMaster's avatar
YdrMaster committed
280
    set_description("Wether to compile implementations for InfiniCCL")
281
282
283
284
285
286
option_end()

if has_config("ccl") then
    add_defines("ENABLE_CCL")
end

PanZezhong's avatar
PanZezhong committed
287
288
target("infini-utils")
    set_kind("static")
PanZezhong's avatar
PanZezhong committed
289
    on_install(function (target) end)
PanZezhong's avatar
PanZezhong committed
290
    set_languages("cxx17")
291
292
293
294

    set_warnings("all", "error")

    if is_plat("windows") then
295
        add_cxxflags("/wd4068")
296
        if has_config("omp") then
297
            add_cxxflags("/openmp")
298
299
300
        end
    else
        add_cxflags("-fPIC", "-Wno-unknown-pragmas")
301
        add_cxxflags("-fPIC", "-Wno-unknown-pragmas")
302
        if has_config("omp") then
303
            add_cxxflags("-fopenmp")
PanZezhong's avatar
PanZezhong committed
304
            add_ldflags("-fopenmp", {force = true})
305
306
307
        end
    end

PanZezhong's avatar
PanZezhong committed
308
309
310
    add_files("src/utils/*.cc")
target_end()

311
312
313
314
315
316
317
target("infinirt")
    set_kind("shared")

    if has_config("cpu") then
        add_deps("infinirt-cpu")
    end
    if has_config("nv-gpu") then
318
        add_deps("infinirt-nvidia")
319
    end
320
321
322
    if has_config("cambricon-mlu") then
        add_deps("infinirt-cambricon")
    end
323
324
325
    if has_config("ascend-npu") then
        add_deps("infinirt-ascend")
    end
326
327
328
    if has_config("metax-gpu") then
        add_deps("infinirt-metax")
    end
qinyiqun's avatar
qinyiqun committed
329
330
331
    if has_config("moore-gpu") then
        add_deps("infinirt-moore")
    end
YdrMaster's avatar
YdrMaster committed
332
333
334
    if has_config("iluvatar-gpu") then
        add_deps("infinirt-iluvatar")
    end
wooway777's avatar
wooway777 committed
335
336
337
    if has_config("ali-ppu") then
        add_deps("infinirt-ali")
    end
338
339
340
341
    if has_config("qy-gpu") then
        add_deps("infinirt-qy")
        add_files("build/.objs/infinirt-qy/rules/qy.cuda/src/infinirt/cuda/*.cu.o", {public = true})
    end
zhangyue's avatar
zhangyue committed
342
343
344
    if has_config("kunlun-xpu") then
        add_deps("infinirt-kunlun")
    end
345
346
347
    if has_config("hygon-dcu") then
        add_deps("infinirt-hygon")
    end
348
    set_languages("cxx17")
349
350
    if not is_plat("windows") then
        add_cxflags("-fPIC")
351
        add_cxxflags("-fPIC")
352
        add_ldflags("-fPIC", {force = true})
353
    end
354
355
    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
    add_files("src/infinirt/*.cc")
356
    add_installfiles("include/infinirt.h", {prefixdir = "include"})
357
358
target_end()

PanZezhongQY's avatar
PanZezhongQY committed
359
360
target("infiniop")
    set_kind("shared")
361
362
    add_deps("infinirt")

PanZezhongQY's avatar
PanZezhongQY committed
363
364
365
366
    if has_config("cpu") then
        add_deps("infiniop-cpu")
    end
    if has_config("nv-gpu") then
367
        add_deps("infiniop-nvidia")
PanZezhongQY's avatar
PanZezhongQY committed
368
    end
YdrMaster's avatar
YdrMaster committed
369
370
371
    if has_config("iluvatar-gpu") then
        add_deps("infiniop-iluvatar")
    end
wooway777's avatar
wooway777 committed
372
373
374
    if has_config("ali-ppu") then
        add_deps("infiniop-ali")
    end
375
376
377
    if has_config("qy-gpu") then
        add_deps("infiniop-qy")
        add_files("build/.objs/infiniop-qy/rules/qy.cuda/src/infiniop/ops/*/nvidia/*.cu.o", {public = true})
qinyiqun's avatar
qinyiqun committed
378
        add_files("build/.objs/infiniop-qy/rules/qy.cuda/src/infiniop/ops/*/*/nvidia/*.cu.o", {public = true})
379
380
        add_files("build/.objs/infiniop-qy/rules/qy.cuda/src/infiniop/devices/nvidia/*.cu.o", {public = true})
    end
PanZezhongQY's avatar
PanZezhongQY committed
381
382

    if has_config("cambricon-mlu") then
PanZezhong's avatar
PanZezhong committed
383
        add_deps("infiniop-cambricon")
PanZezhongQY's avatar
PanZezhongQY committed
384
385
    end
    if has_config("ascend-npu") then
Pan Zezhong's avatar
Pan Zezhong committed
386
        add_deps("infiniop-ascend")
PanZezhongQY's avatar
PanZezhongQY committed
387
388
    end
    if has_config("metax-gpu") then
389
        add_deps("infiniop-metax")
PanZezhongQY's avatar
PanZezhongQY committed
390
    end
391
    if has_config("moore-gpu") then
392
        add_deps("infiniop-moore")
393
    end
394
395
396
    if has_config("kunlun-xpu") then
        add_deps("infiniop-kunlun")
    end
397
398
399
    if has_config("hygon-dcu") then
        add_deps("infiniop-hygon")
    end
PanZezhongQY's avatar
PanZezhongQY committed
400
401
    set_languages("cxx17")
    add_files("src/infiniop/devices/handle.cc")
qinyiqun's avatar
qinyiqun committed
402
    add_files("src/infiniop/ops/*/operator.cc", "src/infiniop/ops/*/*/operator.cc")
PanZezhongQY's avatar
PanZezhongQY committed
403
404
405
406
407
408
409
410
    add_files("src/infiniop/*.cc")

    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
    add_installfiles("include/infiniop/(**/*.h)", {prefixdir = "include/infiniop"})
    add_installfiles("include/infiniop/*.h", {prefixdir = "include/infiniop"})
    add_installfiles("include/infiniop.h", {prefixdir = "include"})
    add_installfiles("include/infinicore.h", {prefixdir = "include"})
target_end()
411

412
413
414
415
416
target("infiniccl")
    set_kind("shared")
    add_deps("infinirt")

    if has_config("nv-gpu") then
417
        add_deps("infiniccl-nvidia")
418
    end
Pan Zezhong's avatar
Pan Zezhong committed
419
420
421
    if has_config("ascend-npu") then
        add_deps("infiniccl-ascend")
    end
wooway777's avatar
wooway777 committed
422
423
424
    if has_config("cambricon-mlu") then
        add_deps("infiniccl-cambricon")
    end
PanZezhong's avatar
PanZezhong committed
425
426
427
    if has_config("metax-gpu") then
        add_deps("infiniccl-metax")
    end
YdrMaster's avatar
YdrMaster committed
428
429
430
    if has_config("iluvatar-gpu") then
        add_deps("infiniccl-iluvatar")
    end
wooway777's avatar
wooway777 committed
431
432
433
    if has_config("ali-ppu") then
        add_deps("infiniccl-ali")
    end
434
435
    if has_config("qy-gpu") then
        add_deps("infiniccl-qy")
436
        add_files("build/.objs/infiniccl-qy/rules/qy.cuda/src/infiniccl/cuda/*.cu.o", {public = true})
437
    end
YdrMaster's avatar
YdrMaster committed
438

spike-zhu's avatar
spike-zhu committed
439
440
441
    if has_config("moore-gpu") then
        add_deps("infiniccl-moore")
    end
442

443
444
445
    if has_config("kunlun-xpu") then
        add_deps("infiniccl-kunlun")
    end
446
447
448
    if has_config("hygon-dcu") then
        add_deps("infiniccl-hygon")
    end
449

450
451
452
453
454
455
456
    set_languages("cxx17")

    add_files("src/infiniccl/*.cc")
    add_installfiles("include/infiniccl.h", {prefixdir = "include"})

    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
target_end()
457

458
target("infinicore_c_api")
459
    set_kind("phony")
460
    add_deps("infiniop", "infinirt", "infiniccl")
461
462
    after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
463

464
465
466
467
target("infinicore_cpp_api")
    set_kind("shared")
    add_deps("infiniop", "infinirt", "infiniccl")
    set_languages("cxx17")
468
469
    set_symbols("visibility")

470
471
472
473
474
475
476
477
    local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")

    add_includedirs("include")
    add_includedirs(INFINI_ROOT.."/include", { public = true })

    add_linkdirs(INFINI_ROOT.."/lib")
    add_links("infiniop", "infinirt", "infiniccl")

478
    if get_config("flash-attn") ~= "" and get_config("flash-attn") ~= nil then
479
480
481
482
483
484
        add_installfiles("(builddir)/$(plat)/$(arch)/$(mode)/flash-attn*.so", {prefixdir = "lib"})
        if has_config("nv-gpu") then
            add_deps("flash-attn-nvidia")
        end
    end

485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
    if has_config("hygon-dcu") then
        local cuda_sdk = get_config("cuda") or os.getenv("CUDA_HOME") or os.getenv("CUDA_PATH")
        local dtk_root = os.getenv("DTK_ROOT") or "/opt/dtk"
        local function normalize_cuda_root(root)
            if not root or root == "" or not os.isdir(root) then
                return nil
            end
            if os.isdir(path.join(root, "include")) then
                return root
            end
            local nested = {
                path.join(root, "cuda"),
                path.join(root, "cuda-12")
            }
            for _, cand in ipairs(nested) do
                if os.isdir(path.join(cand, "include")) then
                    return cand
                end
            end
            return root
        end

        -- Prefer xmake --cuda=... for deterministic SDK include/link paths.
        local normalized_cuda_sdk = normalize_cuda_root(cuda_sdk)
        if normalized_cuda_sdk then
            add_includedirs(path.join(normalized_cuda_sdk, "include"))
            add_linkdirs(path.join(normalized_cuda_sdk, "lib64"))
        end

        -- Keep DTK fallback paths for environments where only DTK_ROOT is set.
        if dtk_root and dtk_root ~= "" and os.isdir(dtk_root) then
            add_includedirs(path.join(dtk_root, "include"))
            add_includedirs(path.join(dtk_root, "cuda", "include"))
            add_linkdirs(path.join(dtk_root, "lib"))
            add_linkdirs(path.join(dtk_root, "cuda", "lib64"))
        end
    end

    on_load(function (target)
524
        if has_config("aten") then
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
            -- Hygon DCU: link prebuilt flash_attn BEFORE torch for correct symbol resolution order
            if has_config("hygon-dcu") then
                local fa_prebuilt = get_config("flash-attn-prebuilt")
                if not fa_prebuilt or fa_prebuilt == "" then
                    fa_prebuilt = os.getenv("FLASH_ATTN_PREBUILT")
                end

                local flash_so_dir = nil
                local flash_so_name = nil

                if fa_prebuilt and fa_prebuilt ~= "" then
                    if os.isfile(fa_prebuilt) then
                        flash_so_dir = path.directory(fa_prebuilt)
                        flash_so_name = path.filename(fa_prebuilt)
                    else
                        flash_so_dir = fa_prebuilt
                        local files = os.files(path.join(fa_prebuilt, "flash_attn_2_cuda*.so"))
                        if #files > 0 then
                            flash_so_name = path.filename(files[1])
                        end
                    end
                else
                    local ok, so_path = pcall(function()
                        return os.iorunv("python", {"-c", "import flash_attn_2_cuda; print(flash_attn_2_cuda.__file__)"}):trim()
                    end)
                    if ok and so_path and so_path ~= "" and os.isfile(so_path) then
                        flash_so_dir = path.directory(so_path)
                        flash_so_name = path.filename(so_path)
                    end
                end

                if flash_so_dir and flash_so_name then
                    target:add("linkdirs", flash_so_dir)
                    target:add("ldflags", "-Wl,--no-as-needed", {force = true})
                    target:add("ldflags", "-l:" .. flash_so_name, {force = true})
                    target:add("ldflags", "-Wl,--as-needed", {force = true})
                    print("Flash Attention library: " .. path.join(flash_so_dir, flash_so_name))
                end
            end

565
566
567
            local outdata = os.iorunv("python", {"-c", "import torch, os; print(os.path.dirname(torch.__file__))"}):trim()
            local TORCH_DIR = outdata

568
569
            -- Use sysincludedirs (-isystem) so that torch's bundled pybind11 headers
            -- do not shadow the xmake pybind11 package headers.
570
            target:add(
571
572
                "sysincludedirs",
                path.join(TORCH_DIR, "include"),
573
574
575
576
577
578
579
580
                path.join(TORCH_DIR, "include/torch/csrc/api/include"),
                { public = true })
            
            target:add(
                "linkdirs",
                path.join(TORCH_DIR, "lib"),
                { public = true }
            )
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
            local torch_libdir = path.join(TORCH_DIR, "lib")
            target:add("rpathdirs", torch_libdir)
            target:add("ldflags", "-Wl,--no-as-needed", {force = true})
            local torch_links = {"torch", "c10"}
            local function has_torch_lib(name)
                return #os.files(path.join(torch_libdir, "lib" .. name .. ".so*")) > 0
            end
            if has_torch_lib("torch_cuda") then
                table.insert(torch_links, "torch_cuda")
            elseif has_torch_lib("torch_hip") then
                table.insert(torch_links, "torch_hip")
            end
            if has_torch_lib("c10_cuda") then
                table.insert(torch_links, "c10_cuda")
            elseif has_torch_lib("c10_hip") then
                table.insert(torch_links, "c10_hip")
            end
            target:add("links", table.unpack(torch_links), { public = true })
            -- Hard-pin runtime dependency entries to avoid linker dropping HIP torch libs.
            target:add("ldflags", "-L" .. torch_libdir, {force = true})
            if has_torch_lib("torch_hip") then
                target:add("ldflags", "-l:libtorch_hip.so", {force = true})
            end
            if has_torch_lib("c10_hip") then
                target:add("ldflags", "-l:libc10_hip.so", {force = true})
            end
            if has_torch_lib("torch_cuda") then
                target:add("ldflags", "-l:libtorch_cuda.so", {force = true})
            end
            if has_torch_lib("c10_cuda") then
                target:add("ldflags", "-l:libc10_cuda.so", {force = true})
            end
            target:add("ldflags", "-Wl,--as-needed", {force = true})
            print("Torch libraries: " .. table.concat(torch_links, ", "))
615
616
617
        end

    end)
618

619
620
    -- Add InfiniCore C++ source files (needed for RoPE and other nn modules)
    add_files("src/infinicore/*.cc")
621
    add_files("src/infinicore/adaptor/*.cc")
622
623
624
    add_files("src/infinicore/context/*.cc")
    add_files("src/infinicore/context/*/*.cc")
    add_files("src/infinicore/tensor/*.cc")
625
    add_files("src/infinicore/graph/*.cc")
626
627
    add_files("src/infinicore/nn/*.cc")
    add_files("src/infinicore/ops/*/*.cc")
628
    add_files("src/utils/*.cc")
629
630
631
632
633
634
635
636

    set_installdir(INFINI_ROOT)
    add_installfiles("include/infinicore/(**.h)",    {prefixdir = "include/infinicore"})
    add_installfiles("include/infinicore/(**.hpp)",    {prefixdir = "include/infinicore"})
    add_installfiles("include/infinicore/(**/*.h)",  {prefixdir = "include/infinicore"})
    add_installfiles("include/infinicore/(**/*.hpp)",{prefixdir = "include/infinicore"})
    add_installfiles("include/infinicore.h",          {prefixdir = "include"})
    add_installfiles("include/infinicore.hpp",        {prefixdir = "include"})
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670

    after_install(function (target)
        if not has_config("hygon-dcu") then return end
        local fa_prebuilt = get_config("flash-attn-prebuilt")
        if not fa_prebuilt or fa_prebuilt == "" then
            fa_prebuilt = os.getenv("FLASH_ATTN_PREBUILT")
        end

        local flash_so_path = nil
        if fa_prebuilt and fa_prebuilt ~= "" then
            if os.isfile(fa_prebuilt) then
                flash_so_path = fa_prebuilt
            else
                local files = os.files(path.join(fa_prebuilt, "flash_attn_2_cuda*.so"))
                if #files > 0 then flash_so_path = files[1] end
            end
        else
            local ok, so_path = pcall(function()
                return os.iorunv("python", {"-c", "import flash_attn_2_cuda; print(flash_attn_2_cuda.__file__)"}):trim()
            end)
            if ok and so_path and so_path ~= "" and os.isfile(so_path) then
                flash_so_path = so_path
            end
        end

        if flash_so_path then
            local installdir = target:installdir()
            local libdir = path.join(installdir, "lib")
            os.mkdir(libdir)
            os.cp(flash_so_path, libdir)
            print("Copied prebuilt flash_attn library to " .. libdir)
        end
    end)

671
672
673
    after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()

674
675
676
677
678
679
680
681
682
683
target("_infinicore")
    add_packages("boost")
    if is_mode("debug") then
        add_defines("BOOST_STACKTRACE_USE_BACKTRACE")
        add_links("backtrace")
    else
        add_defines("BOOST_STACKTRACE_USE_NOOP")
    end

    set_default(false)
684
685
    add_rules("python.library", {soabi = true})
    add_packages("pybind11")
686
    set_languages("cxx17")
687

688
689
    add_deps("infinicore_cpp_api")

690
    set_kind("shared")
691
692
693
694
695
    local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
    add_includedirs(INFINI_ROOT.."/include", { public = true })

    add_linkdirs(INFINI_ROOT.."/lib")
    add_links("infiniop", "infinirt", "infiniccl")
696

697
    add_files("src/infinicore/pybind11/**.cc")
698

699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
    set_installdir("python/infinicore")
target_end()

option("editable")
    set_default(false)
    set_showmenu(true)
    set_description("Install the `infinicore` Python package in editable mode")
option_end()

target("infinicore")
    set_kind("phony")

    set_default(false)

    add_deps("_infinicore")

    on_install(function (target)
        local pip_install_args = {}

        if has_config("editable") then
            table.insert(pip_install_args, "--editable")
        end

        os.execv("python", table.join({"-m", "pip", "install"}, pip_install_args, {"."}))
    end)
724
725
target_end()

726
727
-- Tests
includes("xmake/test.lua")