Unverified Commit ac4aae48 authored by Shijie's avatar Shijie Committed by GitHub
Browse files

Merge branch 'main' into dev_topkrouter

parents a15aa367 2f3f4076
#ifndef __TOPKROUTER_KUNLUN_H__
#define __TOPKROUTER_KUNLUN_H__
#include "../topkrouter.h"
DESCRIPTOR(kunlun)
#endif
#include "../../../devices/kunlun/kunlun_common.h"
#include "../../../devices/kunlun/kunlun_handle.h"
#include "../../../devices/kunlun/kunlun_kernel_common.h"
#include "kernel.h"
#include "topkrouter_kunlun.h"
#include <memory>
#include <stdint.h>
namespace op::topkrouter::kunlun {
struct Descriptor::Opaque {
std::shared_ptr<device::kunlun::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
delete _opaque;
}
infiniStatus_t Descriptor::create(
infiniopHandle_t handle,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t correction_bias_desc) {
auto result = TopkrouterInfo::create(x_desc);
CHECK_RESULT(result);
auto info = result.take();
if (info.x_strides[1] != 1) {
return INFINI_STATUS_BAD_TENSOR_STRIDES;
}
*desc_ptr = new Descriptor(
new Opaque{reinterpret_cast<device::kunlun::Handle *>(handle)->internal()},
std::move(info),
0,
handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS;
}
template <int BLOCK_SIZE = 64>
infiniStatus_t launch_topkrouter(float *d_values_out, int *d_indices_out, const void *d_input, const float *d_correction_bias,
const float routed_scaling_factor, const size_t N, const size_t width, const size_t topk, infiniDtype_t xtype,
kunlunStream_t stream) {
if (xtype == INFINI_DTYPE_F32) {
topkrouter_kernel<float, BLOCK_SIZE, 256, 8, 4, 2>
<<<N, BLOCK_SIZE, stream>>>(
d_values_out,
d_indices_out,
(float *)d_input,
(const float *)d_correction_bias,
routed_scaling_factor,
N,
width,
topk);
} else if (xtype == INFINI_DTYPE_F16) {
topkrouter_kernel<half, BLOCK_SIZE, 256, 8, 4, 2>
<<<N, BLOCK_SIZE, stream>>>(
d_values_out,
d_indices_out,
(half *)d_input,
(const float *)d_correction_bias,
routed_scaling_factor,
N,
width,
topk);
} else if (xtype == INFINI_DTYPE_BF16) {
topkrouter_kernel<bfloat16_t, BLOCK_SIZE, 256, 8, 4, 2>
<<<N, BLOCK_SIZE, stream>>>(
d_values_out,
d_indices_out,
(bfloat16_t *)d_input,
(const float *)d_correction_bias,
routed_scaling_factor,
N,
width,
topk);
} else {
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
float *values,
int *indices,
const void *x,
const float *correction_bias,
const float routed_scaling_factor,
const size_t topk,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
size_t N = _info.N;
size_t width = _info.width;
auto kunlun_stream = reinterpret_cast<kunlunStream_t>(stream);
launch_topkrouter<64>(values, indices, x, correction_bias, routed_scaling_factor, N, width, topk, _info.xtype, kunlun_stream);
return INFINI_STATUS_SUCCESS;
}
} // namespace op::topkrouter::kunlun
......@@ -11,6 +11,9 @@
#ifdef ENABLE_METAX_API
#include "metax/topkrouter_metax.h"
#endif
#ifdef ENABLE_KUNLUN_API
#include "kunlun/topkrouter_kunlun.h"
#endif
__C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, infiniopTopkrouterDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t x_desc,
......@@ -32,6 +35,9 @@ __C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, i
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_KUNLUN_API
CREATE(INFINI_DEVICE_KUNLUN, kunlun);
#endif
}
......@@ -58,6 +64,9 @@ __C infiniStatus_t infiniopGetTopkrouterWorkspaceSize(infiniopTopkrouterDescript
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_KUNLUN_API
GET(INFINI_DEVICE_KUNLUN, kunlun);
#endif
}
......@@ -87,6 +96,9 @@ __C infiniStatus_t infiniopTopkrouter(infiniopTopkrouterDescriptor_t desc, void
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_KUNLUN_API
CALCULATE(INFINI_DEVICE_KUNLUN, kunlun);
#endif
}
......@@ -113,6 +125,9 @@ __C infiniStatus_t infiniopDestroyTopkrouterDescriptor(infiniopTopkrouterDescrip
#endif
#ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_KUNLUN_API
DESTROY(INFINI_DEVICE_KUNLUN, kunlun);
#endif
}
......
#ifndef __INFINIOP_HEAP_KUNLUN_H__
#define __INFINIOP_HEAP_KUNLUN_H__
#include "xpu/kernel/xtdk_simd_xpu2.h"
template <typename TK, typename TV>
static __device__ inline void sm_swap_kv(_shared_ptr_ TK *k0, _shared_ptr_ TV *v0,
_shared_ptr_ TK *k1, _shared_ptr_ TV *v1) {
TK tmpk = *k0;
TV tmpv = *v0;
*k0 = *k1;
*v0 = *v1;
*k1 = tmpk;
*v1 = tmpv;
}
template <typename TK, typename TV>
static __device__ inline void update_sm_min_heap(_shared_ptr_ TK *heap_key,
_shared_ptr_ TV *heap_value, int idx, int heap_capacity) {
while (idx < heap_capacity) {
int child_l = idx * 2 + 1;
int child_r = idx * 2 + 2;
int child_min = child_l;
if (child_r >= heap_capacity) {
if (child_l >= heap_capacity) { // idx is leaf node, shift finished
break;
} else { // if child_r does not exist while child_l does, choose child_l
child_min = child_l;
}
} else { // both child L & R exists
child_min = child_l + (heap_key[child_l] > heap_key[child_r]);
}
if (heap_key[idx] <= heap_key[child_min]) {
break;
}
sm_swap_kv(&heap_key[idx], &heap_value[idx], &heap_key[child_min], &heap_value[child_min]);
idx = child_min;
}
}
template <typename TK, typename TV>
static __device__ inline void make_sm_min_heap(
_shared_ptr_ TK *heap_key, _shared_ptr_ TV *heap_value, int size) {
for (int i = size / 2 - 1; i >= 0; i--) {
update_sm_min_heap(heap_key, heap_value, i, size);
}
}
template <typename TK, typename TV>
static __device__ inline void sort_sm_min_heap(
_shared_ptr_ TK *heap_key, _shared_ptr_ TV *heap_value, int heap_capacity) {
for (int i = heap_capacity - 1; i > 0; i--) {
sm_swap_kv(&heap_key[0], &heap_value[0], &heap_key[i], &heap_value[i]);
update_sm_min_heap(heap_key, heap_value, 0, i);
}
}
template <typename TK, typename TV>
static __device__ inline void update_sm_max_heap(_shared_ptr_ TK *heap_key,
_shared_ptr_ TV *heap_value, int idx, int heap_capacity) {
while (idx < heap_capacity) {
int child_l = idx * 2 + 1;
int child_r = idx * 2 + 2;
int child_max = child_l;
if (child_r >= heap_capacity) {
if (child_l >= heap_capacity) { // idx is leaf node, shift finished
break;
} else { // if child_r does not exist while child_l does, choose child_l
child_max = child_l;
}
} else { // both child L & R exists
child_max = child_l + (heap_key[child_l] < heap_key[child_r]);
}
if (heap_key[idx] >= heap_key[child_max]) {
break;
}
sm_swap_kv(&heap_key[idx], &heap_value[idx], &heap_key[child_max], &heap_value[child_max]);
idx = child_max;
}
}
template <typename TK, typename TV>
static __device__ inline void make_sm_max_heap(
_shared_ptr_ TK *heap_key, _shared_ptr_ TV *heap_value, int size) {
for (int i = size / 2 - 1; i >= 0; i--) {
update_sm_max_heap(heap_key, heap_value, i, size);
}
}
template <typename TK, typename TV>
static __device__ inline void sort_sm_max_heap(_shared_ptr_ TK *heap_key,
_shared_ptr_ TV *heap_value, int heap_capacity) {
for (int i = heap_capacity - 1; i > 0; i--) {
sm_swap_kv(&heap_key[0], &heap_value[0], &heap_key[i], &heap_value[i]);
update_sm_max_heap(heap_key, heap_value, 0, i);
}
}
template <typename TK, typename TV>
static __device__ inline void lm_swap_kv(TK *k0, TV *v0,
TK *k1, TV *v1) {
TK tmpk = *k0;
TV tmpv = *v0;
*k0 = *k1;
*v0 = *v1;
*k1 = tmpk;
*v1 = tmpv;
}
template <typename TK, typename TV>
static __device__ inline void update_lm_min_heap(TK *heap_key, TV *heap_value, int idx, int heap_capacity) {
while (idx < heap_capacity) {
int child_l = idx * 2 + 1;
int child_r = idx * 2 + 2;
int child_min = child_l;
if (child_r >= heap_capacity) {
if (child_l >= heap_capacity) { // idx is leaf node, shift finished
break;
} else { // if child_r does not exist while child_l does, choose child_l
child_min = child_l;
}
} else { // both child L & R exists
child_min = child_l + (heap_key[child_l] > heap_key[child_r]);
}
if (heap_key[idx] <= heap_key[child_min]) {
break;
}
lm_swap_kv(&heap_key[idx], &heap_value[idx], &heap_key[child_min], &heap_value[child_min]);
idx = child_min;
}
}
template <typename TK, typename TV>
static __device__ inline void make_lm_min_heap(
TK *heap_key, TV *heap_value, int size) {
for (int i = size / 2 - 1; i >= 0; i--) {
update_lm_min_heap(heap_key, heap_value, i, size);
}
}
template <typename TK, typename TV>
static __device__ inline void sort_lm_min_heap(TK *heap_key, TV *heap_value, int heap_capacity) {
for (int i = heap_capacity - 1; i > 0; i--) {
lm_swap_kv(&heap_key[0], &heap_value[0], &heap_key[i], &heap_value[i]);
update_lm_min_heap(heap_key, heap_value, 0, i);
}
}
template <typename TK, typename TV>
static __device__ inline void update_lm_max_heap(TK *heap_key, TV *heap_value, int idx, int heap_capacity) {
while (idx < heap_capacity) {
int child_l = idx * 2 + 1;
int child_r = idx * 2 + 2;
int child_max = child_l;
if (child_r >= heap_capacity) {
if (child_l >= heap_capacity) { // idx is leaf node, shift finished
break;
} else { // if child_r does not exist while child_l does, choose child_l
child_max = child_l;
}
} else { // both child L & R exists
child_max = child_l + (heap_key[child_l] < heap_key[child_r]);
}
if (heap_key[idx] >= heap_key[child_max]) {
break;
}
lm_swap_kv(&heap_key[idx], &heap_value[idx], &heap_key[child_max], &heap_value[child_max]);
idx = child_max;
}
}
template <typename TK, typename TV>
static __device__ inline void make_lm_max_heap(
TK *heap_key, TV *heap_value, int size) {
for (int i = size / 2 - 1; i >= 0; i--) {
update_lm_max_heap(heap_key, heap_value, i, size);
}
}
template <typename TK, typename TV>
static __device__ inline void sort_lm_max_heap(TK *heap_key, TV *heap_value, int heap_capacity) {
for (int i = heap_capacity - 1; i > 0; i--) {
lm_swap_kv(&heap_key[0], &heap_value[0], &heap_key[i], &heap_value[i]);
update_lm_max_heap(heap_key, heap_value, 0, i);
}
}
template <typename TID>
__device__ TID roundup_div_p(TID a, TID b) {
return (a + b - 1) / b;
}
template <typename T>
__device__ T min_p(T a, T b) {
return a < b ? a : b;
}
template <typename TID>
static __device__ inline void partition(int tid, int nthreads, TID len, int align, TID *start, TID *end) {
TID block_cnt = roundup_div_p<TID>(len, align);
TID remain_block = block_cnt % nthreads;
TID start_block = block_cnt / nthreads * static_cast<TID>(tid) + min_p<TID>(tid, remain_block);
TID end_block = start_block + block_cnt / nthreads + (tid < remain_block);
*start = min_p<TID>(start_block * align, len);
*end = min_p<TID>(end_block * align, len);
}
template <typename TX, typename TY>
static __device__ void primitive_cast(const TX *x, TY *y, int len) {
return;
}
template <>
__device__ void primitive_cast(const float *x, int *y, int len) {
for (int i = 0; i < len; i += 16) {
float32x16_t Y = vload_lm_float32x16(x);
__asm__ __volatile__("vfloat2fix.rz vr0, %0\t\n"
"vstore_mask16.mz vr0{mr1}, 0(%1)" ::"v"(Y),
"r"(y)
: "vr0");
x += 16;
y += 16;
}
mfence_lm();
}
template <>
__device__ void primitive_cast(const int *x, float *y, int len) {
for (int i = 0; i < len; i += 16) {
int32x16_t Y = vload_lm_int32x16(x);
__asm__ __volatile__("vfix2float.rn vr0, %0\t\n"
"vstore_mask16.mz vr0{mr1}, 0(%1)" ::"v"(Y),
"r"(y)
: "vr0");
x += 16;
y += 16;
}
mfence_lm();
}
static __device__ inline void vload2_lm(const float *ptr, float32x16_t &vl, float32x16_t &vh) {
vl = __builtin_xpu2_vload_mask16_mr1(ptr, 0);
vh = __builtin_xpu2_vload_mask16_mr1(ptr + 16, 0);
}
static __device__ inline void vstore2_lm(float *ptr, float32x16_t &vl, float32x16_t &vh) {
vstore_lm_float32x16(ptr, vl);
vstore_lm_float32x16(ptr + 16, vh);
}
template <>
__device__ void primitive_cast(const float *x, float *y, int len) {
if (x == y) {
return;
} else { // just copy
float32x16_t vec_x_0;
float32x16_t vec_x_1;
for (int i = 0; i < len; i += 32) {
vload2_lm(x + i, vec_x_0, vec_x_1);
vstore2_lm(y + i, vec_x_0, vec_x_1);
}
mfence_lm();
}
}
#endif
......@@ -23,7 +23,7 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
return INFINI_STATUS_NULL_POINTER;
}
for (size_t i = 0; i < INFINI_DEVICE_TYPE_COUNT; i++) {
if (i == INFINI_DEVICE_ILUVATAR || i == INFINI_DEVICE_QY || i == INFINI_DEVICE_KUNLUN || i == INFINI_DEVICE_HYGON) {
if (i == INFINI_DEVICE_ILUVATAR || i == INFINI_DEVICE_HYGON || i == INFINI_DEVICE_QY) {
count_array[i] = 0;
continue;
}
......
......@@ -13,6 +13,7 @@ from .datatypes import to_torch_dtype, to_infinicore_dtype
from .devices import InfiniDeviceNames, torch_device_map
from .tensor import TensorSpec, TensorInitializer
from .utils import (
clone_torch_tensor,
create_test_comparator,
infinicore_tensor_from_torch,
)
......@@ -321,7 +322,7 @@ class BaseOperatorTest(ABC):
for item in input_sequence:
if isinstance(item, torch.Tensor):
if clone:
cloned_item = item.clone().detach()
cloned_item = clone_torch_tensor(item)
infini_item = infinicore_tensor_from_torch(cloned_item)
cloned_tensors.append(cloned_item)
else:
......@@ -340,7 +341,7 @@ class BaseOperatorTest(ABC):
if isinstance(inp, torch.Tensor):
# Clone only if this input will be used for comparison
if comparison_target == i:
cloned_inp = inp.clone().detach()
cloned_inp = clone_torch_tensor(inp)
infini_tensor = infinicore_tensor_from_torch(cloned_inp)
cloned_tensors.append(cloned_inp)
else:
......@@ -362,7 +363,7 @@ class BaseOperatorTest(ABC):
if isinstance(value, torch.Tensor):
# Check if this tensor is used for output comparison
if key == "out" and comparison_target == "out":
cloned_value = value.clone().detach()
cloned_value = clone_torch_tensor(value)
infini_kwargs[key] = infinicore_tensor_from_torch(cloned_value)
cloned_tensors.append(cloned_value)
elif key == "out" and isinstance(comparison_target, int):
......@@ -566,12 +567,12 @@ class BaseOperatorTest(ABC):
elif comparison_target == "out":
# Compare output tensor from kwargs (explicit output)
torch_comparison = kwargs.get("out")
infini_comparison = infini_kwargs.get("out")
infini_comparison = cloned_tensors[0]
elif isinstance(comparison_target, int):
# Compare specific input tensor (in-place operation on input)
if 0 <= comparison_target < len(inputs):
torch_comparison = inputs[comparison_target]
infini_comparison = infini_inputs[comparison_target]
infini_comparison = cloned_tensors[0]
else:
raise ValueError(
f"Invalid comparison target index: {comparison_target}"
......
......@@ -118,6 +118,13 @@ def get_tolerance(tolerance_map, tensor_dtype, default_atol=0, default_rtol=1e-3
return tolerance["atol"], tolerance["rtol"]
def clone_torch_tensor(torch_tensor):
cloned = torch_tensor.clone().detach()
if not torch_tensor.is_contiguous():
cloned = rearrange_tensor(cloned, torch_tensor.stride())
return cloned
def infinicore_tensor_from_torch(torch_tensor):
infini_device = infinicore.device(torch_tensor.device.type, 0)
if torch_tensor.is_contiguous():
......@@ -152,6 +159,10 @@ def convert_infinicore_to_torch(infini_result):
dtype=to_torch_dtype(infini_result.dtype),
device=infini_result.device.type,
)
if not infini_result.is_contiguous():
torch_result_from_infini = rearrange_tensor(
torch_result_from_infini, infini_result.stride()
)
temp_tensor = infinicore_tensor_from_torch(torch_result_from_infini)
temp_tensor.copy_(infini_result)
return torch_result_from_infini
......@@ -223,7 +234,10 @@ def compare_results(
return result_equal
# Convert infinicore result to PyTorch tensor for comparison
torch_result_from_infini = convert_infinicore_to_torch(infini_result)
if isinstance(infini_result, torch.Tensor):
torch_result_from_infini = infini_result
else:
torch_result_from_infini = convert_infinicore_to_torch(infini_result)
# Debug mode: detailed comparison
if debug_mode:
......
......@@ -49,8 +49,8 @@ _TEST_CASES_DATA = [
((13, 4), 0, False, None, (3,), (3,)),
((13, 4), 1, False, (20, 1), (10,), (10,)),
# 3D in-place cases
((4, 5, 6), 1, True, None, (4, 1, 6), (4, 1, 6)),
((4, 5, 6), -1, False, (30, 6, 1), (4, 5), (4, 5)),
((4, 5, 6), 1, True, None, (6, 6, 1), (6, 6, 1)),
((4, 5, 6), -1, False, (30, 6, 1), (5, 1), (5, 1)),
]
# Tolerance configuration
......
......@@ -28,7 +28,6 @@ _TEST_CASES_DATA = [
((4, 48, 6), None, None),
# Strided tensors
((1, 2048), (4096, 1), (4096, 1)),
((6, 2560), (2048, 1), (2560, 1)),
# Mixed cases
((8, 16, 32), None, None),
# Large tensors
......
......@@ -31,12 +31,12 @@ _TEST_CASES_DATA = [
((4, 5, 6), 1, False, None, None, None),
((4, 5, 6), -1, True, None, None, None),
# 3D in-place cases
((4, 5, 6), 1, False, None, (4, 1, 6), (4, 1, 6)),
((4, 5, 6), -1, False, (30, 6, 1), (64, 1, 5), (64, 1, 5)),
((4, 5, 6), 1, False, None, (30, 6, 1), (30, 6, 1)),
((4, 5, 6), -1, False, (30, 6, 1), (30, 6, 1), (30, 6, 1)),
# Strided inputs and outputs
((13, 4), None, False, (4, 1), (12, 1), (24, 1)),
((13, 4), 0, False, (1, 4), (64, 1), (1, 4)),
((13, 4), 1, False, (1, 4), (64, 1), (1, 4)),
((13, 4), None, False, (4, 1), (4, 1), (4, 1)),
((13, 4), 0, False, (13, 1), (13, 1), (13, 1)),
((13, 4), 1, False, (13, 1), (13, 1), (13, 1)),
]
# Tolerance configuration
......
......@@ -33,7 +33,8 @@ _TEST_CASES_ = [
# w (weight) types
# Note: 'None' means the same as input dtype
_X_DTYPES = [] # [InfiniDtype.F32, InfiniDtype.BF16, InfiniDtype.F16]
# _X_DTYPES = [InfiniDtype.F32, InfiniDtype.BF16, InfiniDtype.F16]
_X_DTYPES = [] # CPU CI
# x types used for testing
_VALUE_DTYPES = [InfiniDtype.F32]
......@@ -194,6 +195,7 @@ def test(
lib_topkrouter()
lable_values, lable_indices = torch_topkrouter(x.actual_tensor(), correction_bias.actual_tensor(), routed_scaling_factor, topk)
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
if DEBUG:
......
......@@ -326,6 +326,7 @@ target("infiniccl")
end
if has_config("qy-gpu") then
add_deps("infiniccl-qy")
add_files("build/.objs/infiniccl-qy/rules/qy.cuda/src/infiniccl/cuda/*.cu.o", {public = true})
end
if has_config("moore-gpu") then
......@@ -347,12 +348,45 @@ target("infiniccl")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
target_end()
target("infinicore_c_api")
target("infinicore_c_api")
set_kind("phony")
add_deps("infiniop", "infinirt", "infiniccl")
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
target("infinicore_cpp_api")
set_kind("shared")
add_deps("infiniop", "infinirt", "infiniccl")
set_languages("cxx17")
local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
add_includedirs("include")
add_includedirs(INFINI_ROOT.."/include", { public = true })
add_linkdirs(INFINI_ROOT.."/lib")
add_links("infiniop", "infinirt", "infiniccl")
-- Add InfiniCore C++ source files (needed for RoPE and other nn modules)
add_files("src/infinicore/*.cc")
add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/nn/*.cc")
add_files("src/infinicore/ops/*/*.cc")
set_installdir(INFINI_ROOT)
add_installfiles("include/infinicore/(**.h)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**.hpp)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**/*.h)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**/*.hpp)",{prefixdir = "include/infinicore"})
add_installfiles("include/infinicore.h", {prefixdir = "include"})
add_installfiles("include/infinicore.hpp", {prefixdir = "include"})
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
target("_infinicore")
add_packages("boost")
if is_mode("debug") then
......@@ -378,6 +412,7 @@ target("_infinicore")
add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/nn/*.cc")
add_files("src/infinicore/ops/*/*.cc")
add_files("src/infinicore/pybind11/**.cc")
......
......@@ -89,6 +89,7 @@ target("infinicore-test")
add_files(os.projectdir().."/src/infinicore/nn/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*/*.cc")
set_installdir(INFINI_ROOT)
target_end()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment