Unverified Commit 9a05446f authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

issue/461 InfiniCore 推理运行时


Co-authored-by: default avatarJiacheng Huang <huangjiacheng0709@outlook.com>
Co-authored-by: default avatarwooway777 <wooway777@gmail.com>
parent 37411f6d
......@@ -3,6 +3,8 @@
#include <iostream>
#include <tuple>
#include "infini_status_string.h"
#define CHECK_OR_RETURN(CONDITION, ERROR) \
do { \
if (!(CONDITION)) { \
......@@ -26,7 +28,10 @@
#define CHECK_INTERNAL(API, EXPECT) CHECK_API_OR(API, EXPECT, return INFINI_STATUS_INTERNAL_ERROR)
#define CHECK_STATUS(API) CHECK_API_OR(API, INFINI_STATUS_SUCCESS, return api_result_)
#define CHECK_STATUS(API) \
CHECK_API_OR(API, INFINI_STATUS_SUCCESS, \
std::cerr << "Error: " << infini_status_string(api_result_) << std::endl; \
return api_result_)
#define CHECK_DTYPE(DT, ...) \
do { \
......
#ifndef INFINI_STATUS_STRING_H
#define INFINI_STATUS_STRING_H
#include <infinicore.h>
inline const char *infini_status_string(infiniStatus_t status) {
switch (status) {
case INFINI_STATUS_SUCCESS:
return "Success";
case INFINI_STATUS_INTERNAL_ERROR:
return "Internal Error";
case INFINI_STATUS_NOT_IMPLEMENTED:
return "Not Implemented";
case INFINI_STATUS_BAD_PARAM:
return "Bad Parameter";
case INFINI_STATUS_NULL_POINTER:
return "Null Pointer";
case INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED:
return "Device Type Not Supported";
case INFINI_STATUS_DEVICE_NOT_FOUND:
return "Device Not Found";
case INFINI_STATUS_DEVICE_NOT_INITIALIZED:
return "Device Not Initialized";
case INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED:
return "Device Architecture Not Supported";
case INFINI_STATUS_BAD_TENSOR_DTYPE:
return "Bad Tensor Data Type";
case INFINI_STATUS_BAD_TENSOR_SHAPE:
return "Bad Tensor Shape";
case INFINI_STATUS_BAD_TENSOR_STRIDES:
return "Bad Tensor Strides";
case INFINI_STATUS_INSUFFICIENT_WORKSPACE:
return "Insufficient Workspace";
default:
return "Unknown Error";
}
}
#endif /* INFINI_STATUS_STRING_H */
from .base import TestConfig, TestRunner, TestCase
from .utils import (
create_infinicore_tensor,
compare_results,
debug,
get_tolerance,
profile_operation,
rearrange_tensor,
)
from .config import get_test_devices, get_args
from .devices import InfiniDeviceEnum, InfiniDeviceNames, torch_device_map
from .datatypes import to_torch_dtype, to_infinicore_dtype
__all__ = [
"TestConfig",
"TestRunner",
"TestCase",
"create_infinicore_tensor",
"compare_results",
"debug",
"get_tolerance",
"profile_operation",
"rearrange_tensor",
"get_test_devices",
"get_args",
"InfiniDeviceEnum",
"InfiniDeviceNames",
"torch_device_map",
"to_torch_dtype",
"to_infinicore_dtype",
]
import torch
import infinicore
from .devices import InfiniDeviceNames
from .utils import synchronize_device
class TestCase:
"""Base test case class"""
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def __str__(self):
return f"TestCase{self.args}"
class TestConfig:
"""Test configuration"""
def __init__(
self,
tensor_dtypes,
tolerance_map,
debug=False,
bench=False,
num_prerun=10,
num_iterations=1000,
):
self.tensor_dtypes = tensor_dtypes
self.tolerance_map = tolerance_map
self.debug = debug
self.bench = bench
self.num_prerun = num_prerun
self.num_iterations = num_iterations
class TestRunner:
"""Test runner"""
def __init__(self, test_cases, test_config):
self.test_cases = test_cases
self.config = test_config
self.failed_tests = [] # Track failures
def run_tests(self, devices, test_func):
"""Run tests and track failures"""
for device in devices:
print(f"\n{'='*60}")
print(f"Testing on {InfiniDeviceNames[device]}")
print(f"{'='*60}")
# filter unsupported data types
tensor_dtypes = self._filter_tensor_dtypes_by_device(
device, self.config.tensor_dtypes
)
for test_case in self.test_cases:
for dtype in tensor_dtypes:
try:
test_func(device, test_case, dtype, self.config)
print(f"✓ {test_case} with {dtype} passed")
except Exception as e:
error_msg = f"{test_case} with {dtype} on {InfiniDeviceNames[device]}: {e}"
print(f"✗ {error_msg}")
self.failed_tests.append(error_msg)
if self.config.debug:
raise
# Return whether any tests failed
return len(self.failed_tests) == 0
def _filter_tensor_dtypes_by_device(self, device, tensor_dtypes):
"""Filter data types based on device"""
if device in ():
# Filter out unsupported data types on specified devices
return [dt for dt in tensor_dtypes if dt != infinicore.bfloat16]
else:
return tensor_dtypes
def print_summary(self):
"""Print test summary"""
if self.failed_tests:
print(f"\n\033[91m{len(self.failed_tests)} tests failed:\033[0m")
for failure in self.failed_tests:
print(f" - {failure}")
return False
else:
print("\n\033[92mAll tests passed!\033[0m")
return True
import argparse
from .devices import InfiniDeviceEnum
def get_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description="Test Operator")
parser.add_argument(
"--bench",
action="store_true",
help="Whether to benchmark performance",
)
parser.add_argument(
"--num_prerun",
type=lambda x: max(0, int(x)),
default=10,
help="Set the number of pre-runs before benchmarking. Default is 10.",
)
parser.add_argument(
"--num_iterations",
type=lambda x: max(0, int(x)),
default=1000,
help="Set the number of iterations for benchmarking. Default is 1000.",
)
parser.add_argument(
"--debug",
action="store_true",
help="Whether to turn on debug mode.",
)
# Device options
device_group = parser.add_argument_group("Device options")
device_group.add_argument("--cpu", action="store_true", help="Run CPU test")
device_group.add_argument(
"--nvidia", action="store_true", help="Run NVIDIA GPU test"
)
device_group.add_argument(
"--cambricon", action="store_true", help="Run Cambricon MLU test"
)
device_group.add_argument(
"--ascend", action="store_true", help="Run ASCEND NPU test"
)
device_group.add_argument(
"--iluvatar", action="store_true", help="Run Iluvatar GPU test"
)
device_group.add_argument("--metax", action="store_true", help="Run METAX GPU test")
device_group.add_argument(
"--moore", action="store_true", help="Run MTHREADS GPU test"
)
device_group.add_argument(
"--kunlun", action="store_true", help="Run KUNLUN XPU test"
)
return parser.parse_args()
def get_test_devices(args):
"""
Determine which devices to test based on command line arguments
"""
devices_to_test = []
if args.cpu:
devices_to_test.append(InfiniDeviceEnum.CPU)
if args.nvidia:
devices_to_test.append(InfiniDeviceEnum.NVIDIA)
if args.iluvatar:
devices_to_test.append(InfiniDeviceEnum.ILUVATAR)
if args.cambricon:
try:
import torch_mlu
devices_to_test.append(InfiniDeviceEnum.CAMBRICON)
except ImportError:
print("Warning: torch_mlu not available, skipping Cambricon tests")
if args.ascend:
try:
import torch
import torch_npu
torch.npu.set_device(0) # Ascend NPU needs explicit device initialization
devices_to_test.append(InfiniDeviceEnum.ASCEND)
except ImportError:
print("Warning: torch_npu not available, skipping Ascend tests")
if args.metax:
import torch
devices_to_test.append(InfiniDeviceEnum.METAX)
if args.moore:
try:
import torch
import torch_musa
devices_to_test.append(InfiniDeviceEnum.MOORE)
except ImportError:
print("Warning: torch_musa not available, skipping Moore tests")
if args.kunlun:
try:
import torch_xmlir
devices_to_test.append(InfiniDeviceEnum.KUNLUN)
except ImportError:
print("Warning: torch_xmlir not available, skipping Kunlun tests")
# Default to CPU if no devices specified
if not devices_to_test:
devices_to_test = [InfiniDeviceEnum.CPU]
return devices_to_test
import torch
import infinicore
def to_torch_dtype(infini_dtype):
"""Convert infinicore data type to PyTorch data type"""
if infini_dtype == infinicore.float16:
return torch.float16
elif infini_dtype == infinicore.float32:
return torch.float32
elif infini_dtype == infinicore.bfloat16:
return torch.bfloat16
elif infini_dtype == infinicore.int32:
return torch.int32
elif infini_dtype == infinicore.int64:
return torch.int64
else:
raise ValueError(f"Unsupported infinicore dtype: {infini_dtype}")
def to_infinicore_dtype(torch_dtype):
"""Convert PyTorch data type to infinicore data type"""
if torch_dtype == torch.float32:
return infinicore.float32
elif torch_dtype == torch.float16:
return infinicore.float16
elif torch_dtype == torch.bfloat16:
return infinicore.bfloat16
elif torch_dtype == torch.int32:
return infinicore.int32
elif torch_dtype == torch.int64:
return infinicore.int64
else:
raise ValueError(f"Unsupported torch dtype: {torch_dtype}")
class InfiniDeviceEnum:
CPU = 0
NVIDIA = 1
CAMBRICON = 2
ASCEND = 3
METAX = 4
MOORE = 5
ILUVATAR = 6
KUNLUN = 7
SUGON = 8
InfiniDeviceNames = {
InfiniDeviceEnum.CPU: "CPU",
InfiniDeviceEnum.NVIDIA: "NVIDIA",
InfiniDeviceEnum.CAMBRICON: "Cambricon",
InfiniDeviceEnum.ASCEND: "Ascend",
InfiniDeviceEnum.METAX: "Metax",
InfiniDeviceEnum.MOORE: "Moore",
InfiniDeviceEnum.ILUVATAR: "Iluvatar",
InfiniDeviceEnum.KUNLUN: "Kunlun",
InfiniDeviceEnum.SUGON: "Sugon",
}
# Mapping that maps InfiniDeviceEnum to torch device string
torch_device_map = {
InfiniDeviceEnum.CPU: "cpu",
InfiniDeviceEnum.NVIDIA: "cuda",
InfiniDeviceEnum.CAMBRICON: "mlu",
InfiniDeviceEnum.ASCEND: "npu",
InfiniDeviceEnum.METAX: "cuda",
InfiniDeviceEnum.MOORE: "musa",
InfiniDeviceEnum.ILUVATAR: "cuda",
InfiniDeviceEnum.KUNLUN: "cuda",
InfiniDeviceEnum.SUGON: "cuda",
}
import torch
import time
import infinicore
from .datatypes import to_infinicore_dtype, to_torch_dtype
def create_infinicore_tensor(torch_tensor, device_str):
"""Create infinicore tensor from PyTorch tensor"""
infini_device = infinicore.device(device_str, 0)
return infinicore.from_blob(
torch_tensor.data_ptr(),
list(torch_tensor.shape),
dtype=to_infinicore_dtype(torch_tensor.dtype),
device=infini_device,
)
def synchronize_device(torch_device):
"""Device synchronization"""
if torch_device == "cuda":
torch.cuda.synchronize()
elif torch_device == "npu":
torch.npu.synchronize()
elif torch_device == "mlu":
torch.mlu.synchronize()
def timed_op(func, num_iterations, device):
"""Timed operation"""
synchronize_device(device)
start = time.time()
for _ in range(num_iterations):
func()
synchronize_device(device)
return (time.time() - start) / num_iterations
def profile_operation(desc, func, torch_device, num_prerun, num_iterations):
"""
Performance profiling workflow
"""
# Warm-up runs
for _ in range(num_prerun):
func()
# Timed execution
elapsed = timed_op(lambda: func(), num_iterations, torch_device)
print(f" {desc} time: {elapsed * 1000 :6f} ms")
def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True):
"""
Debug function to compare two tensors and print differences
"""
if actual.dtype == torch.bfloat16 or desired.dtype == torch.bfloat16:
actual = actual.to(torch.float32)
desired = desired.to(torch.float32)
print_discrepancy(actual, desired, atol, rtol, equal_nan, verbose)
import numpy as np
np.testing.assert_allclose(
actual.cpu(), desired.cpu(), rtol, atol, equal_nan, verbose=True
)
def print_discrepancy(
actual, expected, atol=0, rtol=1e-3, equal_nan=True, verbose=True
):
"""Print detailed tensor differences"""
if actual.shape != expected.shape:
raise ValueError("Tensors must have the same shape to compare.")
import torch
import sys
is_terminal = sys.stdout.isatty()
actual_isnan = torch.isnan(actual)
expected_isnan = torch.isnan(expected)
# Calculate difference mask
nan_mismatch = (
actual_isnan ^ expected_isnan if equal_nan else actual_isnan | expected_isnan
)
diff_mask = nan_mismatch | (
torch.abs(actual - expected) > (atol + rtol * torch.abs(expected))
)
diff_indices = torch.nonzero(diff_mask, as_tuple=False)
delta = actual - expected
# Display formatting
col_width = [18, 20, 20, 20]
decimal_places = [0, 12, 12, 12]
total_width = sum(col_width) + sum(decimal_places)
def add_color(text, color_code):
if is_terminal:
return f"\033[{color_code}m{text}\033[0m"
else:
return text
if verbose:
for idx in diff_indices:
index_tuple = tuple(idx.tolist())
actual_str = f"{actual[index_tuple]:<{col_width[1]}.{decimal_places[1]}f}"
expected_str = (
f"{expected[index_tuple]:<{col_width[2]}.{decimal_places[2]}f}"
)
delta_str = f"{delta[index_tuple]:<{col_width[3]}.{decimal_places[3]}f}"
print(
f" > Index: {str(index_tuple):<{col_width[0]}}"
f"actual: {add_color(actual_str, 31)}"
f"expect: {add_color(expected_str, 32)}"
f"delta: {add_color(delta_str, 33)}"
)
print(add_color(" INFO:", 35))
print(f" - Actual dtype: {actual.dtype}")
print(f" - Desired dtype: {expected.dtype}")
print(f" - Atol: {atol}")
print(f" - Rtol: {rtol}")
print(
f" - Mismatched elements: {len(diff_indices)} / {actual.numel()} ({len(diff_indices) / actual.numel() * 100}%)"
)
print(
f" - Min(actual) : {torch.min(actual):<{col_width[1]}} | Max(actual) : {torch.max(actual):<{col_width[2]}}"
)
print(
f" - Min(desired): {torch.min(expected):<{col_width[1]}} | Max(desired): {torch.max(expected):<{col_width[2]}}"
)
print(
f" - Min(delta) : {torch.min(delta):<{col_width[1]}} | Max(delta) : {torch.max(delta):<{col_width[2]}}"
)
print("-" * total_width + "\n")
return diff_indices
def get_tolerance(tolerance_map, tensor_dtype, default_atol=0, default_rtol=1e-3):
"""
Get tolerance settings based on data type
"""
tolerance = tolerance_map.get(
tensor_dtype, {"atol": default_atol, "rtol": default_rtol}
)
return tolerance["atol"], tolerance["rtol"]
def compare_results(
infini_result, torch_result, dtype, config, device_str, tolerance_map=None
):
"""
Compare infinicore result with PyTorch reference result
Args:
infini_result: infinicore tensor result
torch_result: PyTorch tensor reference result
dtype: infinicore data type
config: test config
device_str: torch device string
device: device enum
tolerance_map: optional tolerance map (defaults to config's tolerance_map)
Returns:
bool: True if results match within tolerance
"""
# Convert infinicore result to PyTorch tensor for comparison
torch_result_from_infini = torch.zeros(
torch_result.shape, dtype=to_torch_dtype(dtype), device=device_str
)
temp_tensor = create_infinicore_tensor(torch_result_from_infini, device_str)
temp_tensor.copy_(infini_result)
# Retrieve tolerance - use provided map or config's map
if tolerance_map is None:
tolerance_map = config.tolerance_map
atol, rtol = get_tolerance(tolerance_map, dtype)
# Debug mode: detailed comparison
if config.debug:
debug(torch_result_from_infini, torch_result, atol=atol, rtol=rtol)
# Check if results match within tolerance
return torch.allclose(torch_result_from_infini, torch_result, atol=atol, rtol=rtol)
def rearrange_tensor(tensor, new_strides):
"""
Given a PyTorch tensor and a list of new strides, return a new PyTorch tensor with the given strides.
"""
import torch
shape = tensor.shape
new_size = [0] * len(shape)
left = 0
right = 0
for i in range(len(shape)):
if new_strides[i] > 0:
new_size[i] = (shape[i] - 1) * new_strides[i] + 1
right += new_strides[i] * (shape[i] - 1)
else: # TODO: Support negative strides in the future
# new_size[i] = (shape[i] - 1) * (-new_strides[i]) + 1
# left += new_strides[i] * (shape[i] - 1)
raise ValueError("Negative strides are not supported yet")
# Create a new tensor with zeros
new_tensor = torch.zeros(
(right - left + 1,), dtype=tensor.dtype, device=tensor.device
)
# Generate indices for original tensor based on original strides
indices = [torch.arange(s) for s in shape]
mesh = torch.meshgrid(*indices, indexing="ij")
# Flatten indices for linear indexing
linear_indices = [m.flatten() for m in mesh]
# Calculate new positions based on new strides
new_positions = sum(
linear_indices[i] * new_strides[i] for i in range(len(shape))
).to(tensor.device)
offset = -left
new_positions += offset
# Copy the original data to the new tensor
new_tensor.view(-1).index_add_(0, new_positions, tensor.view(-1))
new_tensor.set_(new_tensor.untyped_storage(), offset, shape, tuple(new_strides))
return new_tensor
import torch
import infinicore
import sys
import os
# Framework path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from framework import (
TestConfig,
TestRunner,
TestCase,
create_infinicore_tensor,
compare_results,
get_args,
get_test_devices,
profile_operation,
to_torch_dtype,
InfiniDeviceNames,
torch_device_map,
)
# ==============================================================================
# Test Setup
# ==============================================================================
# Test cases
_TEST_CASES = [
# (a_shape, b_shape, result_shape, a_stride, b_stride, c_stride)
TestCase((2, 3), (3, 4), (2, 4), None, None, None),
TestCase((128, 256), (256, 64), (128, 64), None, None, None),
TestCase((2, 4, 2048), (2, 2048, 2048), (2, 4, 2048), None, None, None),
TestCase((1, 2048), (2048, 2048), (1, 2048), (4096, 1), (4096, 1), (4096, 1)),
TestCase((6, 2048), (2048, 2560), (6, 2560), (2048, 1), (1, 2048), (2560, 1)),
TestCase((4, 8 * 6, 64), (4, 64, 6), (4, 8 * 6, 6), None, None, None),
]
# Data types - now using infinicore native types
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
# Tolerance
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 0, "rtol": 1e-2},
infinicore.float32: {"atol": 0, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 0, "rtol": 5e-2},
}
# ==============================================================================
# Test Method
# ==============================================================================
def test_matmul(device, test_case, dtype, config):
"""
Test matmul operation
Args:
device: device enum
test_case: test case
dtype: infinicore data type
config: test config
"""
a_shape, b_shape, result_shape, a_stride, b_stride, c_stride = test_case.args
print(
f"Testing Matmul on {InfiniDeviceNames[device]} with "
f"a_shape:{a_shape}, b_shape:{b_shape}, result_shape:{result_shape}, "
f"a_stride:{a_stride}, b_stride:{b_stride}, c_stride:{c_stride}, "
f"dtype:{dtype}"
)
# Create PyTorch tensors
device_str = torch_device_map[device]
torch_dtype = to_torch_dtype(dtype)
torch_a = torch.rand(a_shape, dtype=torch_dtype, device=device_str)
torch_b = torch.rand(b_shape, dtype=torch_dtype, device=device_str)
# Calculate PyTorch reference result
def torch_matmul():
return torch.matmul(torch_a, torch_b)
torch_result = torch_matmul()
# Create infinicore tensors
infini_a = create_infinicore_tensor(torch_a, device_str)
infini_b = create_infinicore_tensor(torch_b, device_str)
# Out-of-place matmul
def infini_matmul():
return infinicore.matmul(infini_a, infini_b)
infini_result = infini_matmul()
# Validate results using common method
is_valid = compare_results(infini_result, torch_result, dtype, config, device_str)
assert is_valid, "Matmul test failed"
# Performance test
if config.bench:
profile_operation(
"PyTorch",
torch_matmul,
device_str,
config.num_prerun,
config.num_iterations,
)
profile_operation(
"Infinicore",
infini_matmul,
device_str,
config.num_prerun,
config.num_iterations,
)
def test_matmul_inplace(device, test_case, dtype, config):
"""
Test in-place matmul operation
Args:
device: device enum
test_case: test case
dtype: infinicore data type
config: test config
"""
a_shape, b_shape, result_shape, a_stride, b_stride, c_stride = test_case.args
print(
f"Testing In-place Matmul on {InfiniDeviceNames[device]} with "
f"a_shape:{a_shape}, b_shape:{b_shape}, result_shape:{result_shape}, "
f"dtype:{dtype}"
)
device_str = torch_device_map[device]
torch_dtype = to_torch_dtype(dtype)
# Create PyTorch tensors
torch_a = torch.rand(a_shape, dtype=torch_dtype, device=device_str)
torch_b = torch.rand(b_shape, dtype=torch_dtype, device=device_str)
# Create pre-allocated result tensor
torch_preallocated = torch.zeros(result_shape, dtype=torch_dtype, device=device_str)
# Calculate PyTorch reference result using in-place operation
def torch_matmul_inplace():
torch.matmul(torch_a, torch_b, out=torch_preallocated)
# Execute in-place operation
torch_matmul_inplace()
# Create infinicore tensors
infini_a = create_infinicore_tensor(torch_a, device_str)
infini_b = create_infinicore_tensor(torch_b, device_str)
infini_c = infinicore.empty(
result_shape, dtype=dtype, device=infinicore.device(device_str, 0)
)
# Test in-place matmul
def infini_matmul_inplace():
infinicore.matmul(infini_a, infini_b, out=infini_c)
# Execute in-place operation
infini_matmul_inplace()
# Validate results using common method
is_valid = compare_results(infini_c, torch_preallocated, dtype, config, device_str)
assert is_valid, "In-place matmul test failed"
# Performance test
if config.bench:
profile_operation(
"PyTorch In-place",
torch_matmul_inplace,
device_str,
config.num_prerun,
config.num_iterations,
)
profile_operation(
"Infinicore In-place",
infini_matmul_inplace,
device_str,
config.num_prerun,
config.num_iterations,
)
# ==============================================================================
# Main Execution Function
# ==============================================================================
def main():
args = get_args()
# Create test configuration
config = TestConfig(
tensor_dtypes=_TENSOR_DTYPES,
tolerance_map=_TOLERANCE_MAP,
debug=args.debug,
bench=args.bench,
num_prerun=args.num_prerun,
num_iterations=args.num_iterations,
)
# Create test runner
runner = TestRunner(_TEST_CASES, config)
# Get test devices
devices = get_test_devices(args)
print("Starting matmul tests...")
all_passed = True
# Run out-of-place tests
print("\n--- Testing Out-of-place Matmul ---")
out_of_place_passed = runner.run_tests(devices, test_matmul)
all_passed = all_passed and out_of_place_passed
# Run in-place tests
print("\n--- Testing In-place Matmul ---")
in_place_passed = runner.run_tests(devices, test_matmul_inplace)
all_passed = all_passed and in_place_passed
runner.print_summary()
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()
import infinicore
import torch
def test():
shape = [2, 3, 4]
shape2 = [3, 4, 2]
torch_tensor_ans = torch.rand(shape, dtype=torch.float32, device="cpu")
torch_tensor_result = torch.zeros(shape, dtype=torch.float32, device="cpu")
t_cpu = infinicore.from_blob(
torch_tensor_ans.data_ptr(),
shape,
dtype=infinicore.float32,
device=infinicore.device("cpu", 0),
)
t_gpu = t_cpu.to(infinicore.device("cuda", 0))
t_gpu = t_gpu.permute([1, 2, 0])
t_gpu2 = infinicore.empty(
shape2, dtype=infinicore.float32, device=infinicore.device("cuda", 0)
)
t_gpu2.copy_(t_gpu)
t_gpu2 = t_gpu2.permute([2, 0, 1]).contiguous()
t_result = infinicore.from_blob(
torch_tensor_result.data_ptr(),
shape,
dtype=infinicore.float32,
device=infinicore.device("cpu", 0),
)
t_result.copy_(t_gpu2)
assert torch.equal(torch_tensor_ans, torch_tensor_result)
print("Test passed")
if __name__ == "__main__":
test()
Subproject commit f1d748e5e3edfa4b1778edea003bac94781bc7b7
add_rules("mode.debug", "mode.release")
add_requires("boost", {configs = {stacktrace = true}})
add_requires("pybind11")
-- Define color codes
......@@ -9,6 +10,7 @@ local NC = '\27[0m' -- No Color
set_encodings("utf-8")
add_includedirs("include")
add_includedirs("third_party/spdlog/include")
if is_mode("debug") then
add_defines("DEBUG_MODE")
......@@ -317,14 +319,33 @@ target("infinicore_c_api")
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
target("infinicore")
target("_infinicore")
add_packages("boost")
if is_mode("debug") then
add_defines("BOOST_STACKTRACE_USE_BACKTRACE")
add_links("backtrace")
else
add_defines("BOOST_STACKTRACE_USE_NOOP")
end
set_default(false)
add_rules("python.library", {soabi = true})
add_packages("pybind11")
set_languages("cxx17")
set_kind("shared")
add_deps("infinicore_c_api")
local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
add_includedirs(INFINI_ROOT.."/include", { public = true })
add_linkdirs(INFINI_ROOT.."/lib")
add_links("infiniop", "infinirt", "infiniccl")
add_files("src/infinicore/*.cc")
add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/op/*/*.cc")
add_files("src/infinicore/pybind11/**.cc")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
target_end()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment