"vscode:/vscode.git/clone" did not exist on "9cc0c41644208da61430287e78b98ad6a9dec3f7"
Commit cb7f0b7d authored by wooway777's avatar wooway777
Browse files

Revert "Merge pull request #1056 from InfiniTensor/issue/1031"

This reverts commit 7f295448, reversing
changes made to e60985dc.
parent 037140c0
...@@ -50,13 +50,7 @@ from infinicore.dtype import ( ...@@ -50,13 +50,7 @@ from infinicore.dtype import (
from infinicore.ops.add import add from infinicore.ops.add import add
from infinicore.ops.add_rms_norm import add_rms_norm from infinicore.ops.add_rms_norm import add_rms_norm
from infinicore.ops.all import all from infinicore.ops.all import all
from infinicore.ops.asinh import asinh
from infinicore.ops.attention import attention from infinicore.ops.attention import attention
from infinicore.ops.baddbmm import baddbmm
from infinicore.ops.bilinear import bilinear
from infinicore.ops.cross_entropy import cross_entropy
from infinicore.ops.equal import equal
from infinicore.ops.fmod import fmod
from infinicore.ops.kv_caching import kv_caching from infinicore.ops.kv_caching import kv_caching
from infinicore.ops.matmul import matmul from infinicore.ops.matmul import matmul
from infinicore.ops.mha_varlen import mha_varlen from infinicore.ops.mha_varlen import mha_varlen
...@@ -134,18 +128,12 @@ __all__ = [ ...@@ -134,18 +128,12 @@ __all__ = [
"add_rms_norm_", "add_rms_norm_",
"attention", "attention",
"kv_caching", "kv_caching",
"asinh",
"baddbmm",
"bilinear",
"fmod",
"matmul", "matmul",
"equal",
"mul", "mul",
"narrow", "narrow",
"squeeze", "squeeze",
"unsqueeze", "unsqueeze",
"rearrange", "rearrange",
"cross_entropy",
"empty", "empty",
"empty_like", "empty_like",
"from_blob", "from_blob",
......
from .adaptive_max_pool1d import adaptive_max_pool1d
from .avg_pool1d import avg_pool1d
from .causal_softmax import causal_softmax from .causal_softmax import causal_softmax
from .embedding import embedding from .embedding import embedding
from .flash_attention import flash_attention from .flash_attention import flash_attention
from .hardswish import hardswish
from .hardtanh import hardtanh
from .linear import linear from .linear import linear
from .linear_w8a8i8 import linear_w8a8i8 from .linear_w8a8i8 import linear_w8a8i8
from .random_sample import random_sample from .random_sample import random_sample
...@@ -15,7 +11,6 @@ from .silu_and_mul import silu_and_mul ...@@ -15,7 +11,6 @@ from .silu_and_mul import silu_and_mul
from .swiglu import swiglu from .swiglu import swiglu
__all__ = [ __all__ = [
"adaptive_max_pool1d",
"causal_softmax", "causal_softmax",
"embedding", "embedding",
"flash_attention", "flash_attention",
...@@ -25,9 +20,6 @@ __all__ = [ ...@@ -25,9 +20,6 @@ __all__ = [
"RopeAlgo", "RopeAlgo",
"rope", "rope",
"silu", "silu",
"hardswish",
"hardtanh",
"avg_pool1d",
"swiglu", "swiglu",
"linear_w8a8i8", "linear_w8a8i8",
"silu_and_mul", "silu_and_mul",
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def adaptive_max_pool1d(
input: Tensor,
output_size: int,
*,
out=None,
) -> Tensor:
r"""Applies a 1D adaptive max pooling over an input signal composed of
several input planes.
The output size is H_out. The algorithm used is fairly simple:
.. math::
\text{start} = \left\lfloor \frac{i \cdot L_{in}}{L_{out}} \right\rfloor
\text{end} = \left\lceil \frac{(i + 1) \cdot L_{in}}{L_{out}} \right\rceil
where :math:`L_{in}` is the size of the input dimension, and :math:`L_{out}` is the size of the output dimension.
Args:
input (Tensor): Input tensor of shape (N, C, L_in)
output_size (int): The target output size (L_out)
out (Tensor, optional): Output tensor.
Returns:
Tensor: The result of the adaptive max pooling operation.
"""
if out is None:
return Tensor(_infinicore.adaptive_max_pool1d(input._underlying, output_size))
_infinicore.adaptive_max_pool1d_(out._underlying, input._underlying, output_size)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def avg_pool1d(
input: Tensor,
kernel_size: int,
stride: int | None = None,
padding: int = 0,
*,
out=None,
) -> Tensor:
if stride is None:
stride = 0
if out is None:
return Tensor(
_infinicore.avg_pool1d(input._underlying, kernel_size, stride, padding)
)
_infinicore.avg_pool1d_(
out._underlying, input._underlying, kernel_size, stride, padding
)
return out
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def hardswish(input: Tensor, inplace: bool = False, *, out=None) -> Tensor:
r"""Apply the Hardswish activation function element-wise."""
if (
infinicore.use_ntops
and input.device.type in ("cuda", "musa")
and out is None
and hasattr(infinicore.ntops.torch, "hardswish")
):
try:
return infinicore.ntops.torch.hardswish(input, inplace=inplace)
except AttributeError:
pass
if inplace:
_infinicore.hardswish_(input._underlying, input._underlying)
return input
if out is None:
return Tensor(_infinicore.hardswish(input._underlying))
_infinicore.hardswish_(out._underlying, input._underlying)
return out
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def hardtanh(
input: Tensor,
min_val: float = -1.0,
max_val: float = 1.0,
inplace: bool = False,
*,
out=None,
) -> Tensor:
"""Clamp the input tensor to the range [min_val, max_val]."""
if min_val > max_val:
raise ValueError("min_val must be less than or equal to max_val")
if (
infinicore.use_ntops
and input.device.type in ("cuda", "musa")
and out is None
and hasattr(infinicore.ntops.torch, "hardtanh")
):
try:
return infinicore.ntops.torch.hardtanh(
input, min_val=min_val, max_val=max_val, inplace=inplace
)
except AttributeError:
pass
if inplace:
_infinicore.hardtanh_(
input._underlying, input._underlying, float(min_val), float(max_val)
)
return input
if out is None:
return Tensor(
_infinicore.hardtanh(input._underlying, float(min_val), float(max_val))
)
_infinicore.hardtanh_(
out._underlying, input._underlying, float(min_val), float(max_val)
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def asinh(input, *, out=None):
if out is None:
return Tensor(_infinicore.asinh(input._underlying))
_infinicore.asinh_(out._underlying, input._underlying)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def baddbmm(input, batch1, batch2, *, beta=1.0, alpha=1.0, out=None):
if out is None:
return Tensor(
_infinicore.baddbmm(
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
)
_infinicore.baddbmm_(
out._underlying,
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def bilinear(input1, input2, weight, bias=None, *, out=None):
if out is None:
return Tensor(
_infinicore.bilinear(
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
)
_infinicore.bilinear_(
out._underlying,
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def cross_entropy(
logits,
target,
weight=None,
*,
ignore_index=None,
reduction="none",
out=None,
):
"""
Token-wise cross entropy without reduction. The output tensor has the same
shape as target and uses the logits dtype.
"""
if weight is not None:
raise NotImplementedError("class weights are not supported yet.")
if ignore_index is not None:
raise NotImplementedError("ignore_index is not supported yet.")
if reduction not in (None, "none"):
raise NotImplementedError("Only reduction='none' is implemented.")
if out is None:
return Tensor(_infinicore.cross_entropy(logits._underlying, target._underlying))
_infinicore.cross_entropy_(
out._underlying,
logits._underlying,
target._underlying,
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def equal(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.equal(input._underlying, other._underlying))
_infinicore.equal_(out._underlying, input._underlying, other._underlying)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def fmod(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.fmod(input._underlying, other._underlying))
_infinicore.fmod_(out._underlying, input._underlying, other._underlying)
return out
import ml_dtypes
import numpy as np import numpy as np
import torch import torch
import infinicore import infinicore
try:
import ml_dtypes
except ModuleNotFoundError:
ml_dtypes = None
def to_torch_dtype(infini_dtype): def to_torch_dtype(infini_dtype):
"""Convert infinicore data type to PyTorch data type""" """Convert infinicore data type to PyTorch data type"""
...@@ -61,9 +57,7 @@ def numpy_to_infinicore_dtype(numpy_dtype): ...@@ -61,9 +57,7 @@ def numpy_to_infinicore_dtype(numpy_dtype):
return infinicore.float64 return infinicore.float64
elif numpy_dtype == np.float16: elif numpy_dtype == np.float16:
return infinicore.float16 return infinicore.float16
elif hasattr(np, "bfloat16") and numpy_dtype == np.bfloat16: elif numpy_dtype == ml_dtypes.bfloat16:
return infinicore.bfloat16
elif ml_dtypes is not None and numpy_dtype == ml_dtypes.bfloat16:
return infinicore.bfloat16 return infinicore.bfloat16
elif numpy_dtype == np.int8: elif numpy_dtype == np.int8:
return infinicore.int8 return infinicore.int8
...@@ -92,13 +86,6 @@ def infinicore_to_numpy_dtype(infini_dtype): ...@@ -92,13 +86,6 @@ def infinicore_to_numpy_dtype(infini_dtype):
elif infini_dtype == infinicore.int16: elif infini_dtype == infinicore.int16:
return np.int16 return np.int16
elif infini_dtype == infinicore.bfloat16: elif infini_dtype == infinicore.bfloat16:
if hasattr(np, "bfloat16"):
return np.bfloat16
if ml_dtypes is None:
raise ModuleNotFoundError(
"ml_dtypes is required for bfloat16 numpy conversion. "
"Please install ml_dtypes."
)
return ml_dtypes.bfloat16 return ml_dtypes.bfloat16
elif infini_dtype == infinicore.int32: elif infini_dtype == infinicore.int32:
return np.int32 return np.int32
......
...@@ -17,12 +17,12 @@ def run_tests(args): ...@@ -17,12 +17,12 @@ def run_tests(args):
"causal_softmax.py", "causal_softmax.py",
"clip.py", "clip.py",
"conv.py", "conv.py",
# "dequantize_awq.py", #"dequantize_awq.py",
"gelu.py", "gelu.py",
"gemm.py", "gemm.py",
# "layer_norm.py", #"layer_norm.py",
"logsoftmax.py", "logsoftmax.py",
# "lp_norm.py", #"lp_norm.py",
"mul.py", "mul.py",
"ones.py", "ones.py",
"random_sample.py", "random_sample.py",
...@@ -31,7 +31,7 @@ def run_tests(args): ...@@ -31,7 +31,7 @@ def run_tests(args):
"rms_norm.py", "rms_norm.py",
"rope.py", "rope.py",
"sigmoid.py", "sigmoid.py",
# "softmax.py", #"softmax.py",
"softplus.py", "softplus.py",
"sub.py", "sub.py",
"swiglu.py", "swiglu.py",
...@@ -42,7 +42,6 @@ def run_tests(args): ...@@ -42,7 +42,6 @@ def run_tests(args):
# "paged_attention.py", # "paged_attention.py",
# "paged_caching.py", # "paged_caching.py",
# "paged_attention_prefill.py" # "paged_attention_prefill.py"
"cross_entropy.py",
]: ]:
result = subprocess.run( result = subprocess.run(
f"python {test} {args} --debug", text=True, encoding="utf-8", shell=True f"python {test} {args} --debug", text=True, encoding="utf-8", shell=True
......
#include "infinicore/ops/adaptive_max_pool1d.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<AdaptiveMaxPool1d::schema> &AdaptiveMaxPool1d::dispatcher() {
static common::OpDispatcher<AdaptiveMaxPool1d::schema> dispatcher_;
return dispatcher_;
}
void AdaptiveMaxPool1d::execute(Tensor y, Tensor x, size_t output_size) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x, output_size);
}
Tensor adaptive_max_pool1d(Tensor x, size_t output_size) {
infinicore::Shape y_shape = x->shape();
y_shape.back() = output_size;
auto y = Tensor::empty(y_shape, x->dtype(), x->device());
adaptive_max_pool1d_(y, x, output_size);
return y;
}
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size) {
AdaptiveMaxPool1d::execute(y, x, output_size);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/adaptive_max_pool1d.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::adaptive_max_pool1d_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAdaptiveMaxPool1dDescriptor_t> caches(
100, // capacity
[](infiniopAdaptiveMaxPool1dDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAdaptiveMaxPool1dDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor x, size_t out) {
size_t seed = hash_combine(y, x, out);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAdaptiveMaxPool1dDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAdaptiveMaxPool1dDescriptor(
context::getInfiniopHandle(y->device()), &desc,
y->desc(), x->desc(), out));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAdaptiveMaxPool1dWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAdaptiveMaxPool1d(
desc, workspace->data(), workspace_size,
y->data(), x->data(), context::getStream()));
}
static bool registered = []() {
AdaptiveMaxPool1d::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::adaptive_max_pool1d_impl::infiniop
#include "infinicore/ops/asinh.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Asinh::schema> &Asinh::dispatcher() {
static common::OpDispatcher<Asinh::schema> dispatcher_;
return dispatcher_;
};
void Asinh::execute(Tensor y, Tensor x) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x);
}
Tensor asinh(Tensor x) {
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
asinh_(y, x);
return y;
}
void asinh_(Tensor y, Tensor x) {
Asinh::execute(y, x);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/asinh.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::asinh_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAsinhDescriptor_t> caches(
100, // capacity
[](infiniopAsinhDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAsinhDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor x) {
size_t seed = hash_combine(y, x);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAsinhDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAsinhDescriptor(
context::getInfiniopHandle(y->device()), &desc,
y->desc(), x->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAsinhWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAsinh(
desc, workspace->data(), workspace_size,
y->data(), x->data(), context::getStream()));
}
static bool registered = []() {
Asinh::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::asinh_impl::infiniop
#include "infinicore/ops/avg_pool1d.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<AvgPool1d::schema> &AvgPool1d::dispatcher() {
static common::OpDispatcher<AvgPool1d::schema> dispatcher_;
return dispatcher_;
}
void AvgPool1d::execute(
Tensor output,
Tensor input,
size_t kernel_size,
size_t stride,
size_t padding) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
if (stride == 0) {
stride = kernel_size;
}
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error(
"No AvgPool1d implementation for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, kernel_size, stride, padding);
}
Tensor avg_pool1d(Tensor input, size_t kernel_size, size_t stride, size_t padding) {
if (stride == 0) {
stride = kernel_size;
}
const auto &shape = input->shape();
if (shape.size() != 3) {
throw std::runtime_error("AvgPool1d expects tensors with shape [N, C, L]");
}
const size_t n = shape[0];
const size_t c = shape[1];
const size_t l_in = shape[2];
if (l_in + 2 * padding < kernel_size) {
throw std::runtime_error("AvgPool1d kernel_size is larger than padded length");
}
const size_t out_width = (l_in + 2 * padding - kernel_size) / stride + 1;
Shape out_shape = {n, c, out_width};
auto output = Tensor::empty(out_shape, input->dtype(), input->device());
avg_pool1d_(output, input, kernel_size, stride, padding);
return output;
}
void avg_pool1d_(Tensor output, Tensor input, size_t kernel_size, size_t stride, size_t padding) {
AvgPool1d::execute(output, input, kernel_size, stride, padding);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/avg_pool1d.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::avg_pool1d_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAvgPool1dDescriptor_t> caches(
100,
[](infiniopAvgPool1dDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAvgPool1dDescriptor(desc));
desc = nullptr;
}
});
void calculate(
Tensor output,
Tensor input,
size_t kernel_size,
size_t stride,
size_t padding) {
if (stride == 0) {
stride = kernel_size;
}
size_t seed = hash_combine(output, input, kernel_size, stride, padding);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopAvgPool1dDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAvgPool1dDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc(),
kernel_size,
stride,
padding));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAvgPool1dWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAvgPool1d(
desc,
workspace->data(),
workspace_size,
output->data(),
input->data(),
context::getStream()));
}
static bool registered = []() {
AvgPool1d::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::avg_pool1d_impl::infiniop
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment