Unverified Commit a23c4d13 authored by Tianyu Xiong's avatar Tianyu Xiong Committed by GitHub
Browse files

issue/347 Add support for BOOL/BF16 and printing utils in infiniop-test

* utils: add printing support for int8_t, bf16_t and fp16_t

* utils: add support for BF16 in infiniop-test

* utils: add support for BOOL in infiniop-test
parent 50eaee63
...@@ -141,10 +141,8 @@ typedef enum { ...@@ -141,10 +141,8 @@ typedef enum {
inline size_t ggmlTypeSize(GGML_TYPE ggml_type) { inline size_t ggmlTypeSize(GGML_TYPE ggml_type) {
switch (ggml_type) { switch (ggml_type) {
case GGML_TYPE_F32: case GGML_TYPE_Q8_K:
return 4; return 1;
case GGML_TYPE_F16:
return 2;
case GGML_TYPE_I8: case GGML_TYPE_I8:
return 1; return 1;
case GGML_TYPE_I16: case GGML_TYPE_I16:
...@@ -153,10 +151,14 @@ inline size_t ggmlTypeSize(GGML_TYPE ggml_type) { ...@@ -153,10 +151,14 @@ inline size_t ggmlTypeSize(GGML_TYPE ggml_type) {
return 4; return 4;
case GGML_TYPE_I64: case GGML_TYPE_I64:
return 8; return 8;
case GGML_TYPE_F64:
return 8;
case GGML_TYPE_BF16: case GGML_TYPE_BF16:
return 2; return 2;
case GGML_TYPE_F16:
return 2;
case GGML_TYPE_F32:
return 4;
case GGML_TYPE_F64:
return 8;
default: default:
throw std::runtime_error("GGML_TYPE_SIZE: Unsupported GGML_TYPE"); throw std::runtime_error("GGML_TYPE_SIZE: Unsupported GGML_TYPE");
} }
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) { inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) {
switch (type) { switch (type) {
case GGML_TYPE_Q8_K:
return INFINI_DTYPE_BOOL;
case GGML_TYPE_I8: case GGML_TYPE_I8:
return INFINI_DTYPE_I8; return INFINI_DTYPE_I8;
case GGML_TYPE_I16: case GGML_TYPE_I16:
...@@ -14,10 +16,10 @@ inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) { ...@@ -14,10 +16,10 @@ inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) {
return INFINI_DTYPE_I32; return INFINI_DTYPE_I32;
case GGML_TYPE_I64: case GGML_TYPE_I64:
return INFINI_DTYPE_I64; return INFINI_DTYPE_I64;
case GGML_TYPE_F16:
return INFINI_DTYPE_F16;
case GGML_TYPE_BF16: case GGML_TYPE_BF16:
return INFINI_DTYPE_BF16; return INFINI_DTYPE_BF16;
case GGML_TYPE_F16:
return INFINI_DTYPE_F16;
case GGML_TYPE_F32: case GGML_TYPE_F32:
return INFINI_DTYPE_F32; return INFINI_DTYPE_F32;
case GGML_TYPE_F64: case GGML_TYPE_F64:
......
...@@ -9,12 +9,16 @@ ...@@ -9,12 +9,16 @@
inline double getVal(void *ptr, GGML_TYPE ggml_type) { inline double getVal(void *ptr, GGML_TYPE ggml_type) {
switch (ggml_type) { switch (ggml_type) {
case GGML_TYPE_BF16:
return utils::cast<float>(*(bf16_t *)ptr);
case GGML_TYPE_F16: case GGML_TYPE_F16:
return utils::cast<double>(*(fp16_t *)ptr); return utils::cast<float>(*(fp16_t *)ptr);
case GGML_TYPE_F32: case GGML_TYPE_F32:
return *(float *)ptr; return *(float *)ptr;
case GGML_TYPE_F64: case GGML_TYPE_F64:
return *(double *)ptr; return *(double *)ptr;
case GGML_TYPE_Q8_K:
return *(bool *)ptr;
case GGML_TYPE_I8: case GGML_TYPE_I8:
return *(int8_t *)ptr; return *(int8_t *)ptr;
case GGML_TYPE_I16: case GGML_TYPE_I16:
...@@ -30,12 +34,16 @@ inline double getVal(void *ptr, GGML_TYPE ggml_type) { ...@@ -30,12 +34,16 @@ inline double getVal(void *ptr, GGML_TYPE ggml_type) {
inline size_t ggmlSizeOf(GGML_TYPE ggml_type) { inline size_t ggmlSizeOf(GGML_TYPE ggml_type) {
switch (ggml_type) { switch (ggml_type) {
case GGML_TYPE_BF16:
return sizeof(bf16_t);
case GGML_TYPE_F16: case GGML_TYPE_F16:
return sizeof(fp16_t); return sizeof(fp16_t);
case GGML_TYPE_F32: case GGML_TYPE_F32:
return sizeof(float); return sizeof(float);
case GGML_TYPE_F64: case GGML_TYPE_F64:
return sizeof(double); return sizeof(double);
case GGML_TYPE_Q8_K:
return sizeof(bool);
case GGML_TYPE_I8: case GGML_TYPE_I8:
return sizeof(int8_t); return sizeof(int8_t);
case GGML_TYPE_I16: case GGML_TYPE_I16:
......
#include "tensor.hpp" #include "tensor.hpp"
#include "gguf.hpp"
#include "utils.hpp" #include "utils.hpp"
#include <cstring> #include <cstring>
#include <infinirt.h> #include <infinirt.h>
...@@ -19,6 +20,40 @@ void printData(const T *data, const std::vector<size_t> &shape, const std::vecto ...@@ -19,6 +20,40 @@ void printData(const T *data, const std::vector<size_t> &shape, const std::vecto
} }
} }
// The type int8_t is represented by signed char, with a range of –128 to 127.
// It may contain non-printable characters and thus cannot be printed directly.
template <>
void printData(const int8_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << static_cast<int>(*(data + i * strides[dim])) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
std::cout << std::endl;
}
}
}
template <>
void printData(const bf16_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << utils::cast<float>(*(data + i * strides[dim])) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
std::cout << std::endl;
}
}
}
template <> template <>
void printData(const fp16_t *data, const std::vector<size_t> &shape, void printData(const fp16_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) { const std::vector<ptrdiff_t> &strides, size_t dim) {
...@@ -26,6 +61,7 @@ void printData(const fp16_t *data, const std::vector<size_t> &shape, ...@@ -26,6 +61,7 @@ void printData(const fp16_t *data, const std::vector<size_t> &shape,
for (size_t i = 0; i < shape[dim]; i++) { for (size_t i = 0; i < shape[dim]; i++) {
std::cout << utils::cast<float>(*(data + i * strides[dim])) << " "; std::cout << utils::cast<float>(*(data + i * strides[dim])) << " ";
} }
std::cout << std::endl;
} else if (dim < shape.size() - 1) { } else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) { for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1); printData(data + i * strides[dim], shape, strides, dim + 1);
...@@ -227,6 +263,8 @@ void Tensor::debug() const { ...@@ -227,6 +263,8 @@ void Tensor::debug() const {
auto tensor = to(INFINI_DEVICE_CPU, 0); auto tensor = to(INFINI_DEVICE_CPU, 0);
std::cout << "Tensor: " << tensor->info() << std::endl; std::cout << "Tensor: " << tensor->info() << std::endl;
switch (_ggml_type) { switch (_ggml_type) {
case GGML_TYPE_BF16:
printData((bf16_t *)(tensor->data()), _shape, _strides, 0);
case GGML_TYPE_F16: case GGML_TYPE_F16:
printData((fp16_t *)(tensor->data()), _shape, _strides, 0); printData((fp16_t *)(tensor->data()), _shape, _strides, 0);
break; break;
...@@ -236,6 +274,9 @@ void Tensor::debug() const { ...@@ -236,6 +274,9 @@ void Tensor::debug() const {
case GGML_TYPE_F64: case GGML_TYPE_F64:
printData((double *)(tensor->data()), _shape, _strides, 0); printData((double *)(tensor->data()), _shape, _strides, 0);
break; break;
case GGML_TYPE_Q8_K:
printData((bool *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_I8: case GGML_TYPE_I8:
printData((int8_t *)(tensor->data()), _shape, _strides, 0); printData((int8_t *)(tensor->data()), _shape, _strides, 0);
break; break;
...@@ -245,6 +286,9 @@ void Tensor::debug() const { ...@@ -245,6 +286,9 @@ void Tensor::debug() const {
case GGML_TYPE_I32: case GGML_TYPE_I32:
printData((int32_t *)(tensor->data()), _shape, _strides, 0); printData((int32_t *)(tensor->data()), _shape, _strides, 0);
break; break;
case GGML_TYPE_I64:
printData((int64_t *)(tensor->data()), _shape, _strides, 0);
break;
default: default:
std::cout << "Unsupported GGML type" << std::endl; std::cout << "Unsupported GGML type" << std::endl;
break; break;
......
import gguf
from typing import List from typing import List
import gguf
import numpy as np import numpy as np
from gguf import GGMLQuantizationType from gguf import GGMLQuantizationType
from ml_dtypes import bfloat16
def np_dtype_to_ggml(tensor_dtype: np.dtype): def np_dtype_to_ggml(tensor_dtype: np.dtype):
if tensor_dtype == np.float16: if tensor_dtype == bfloat16:
return GGMLQuantizationType.BF16
elif tensor_dtype == np.float16:
return GGMLQuantizationType.F16 return GGMLQuantizationType.F16
elif tensor_dtype == np.float32: elif tensor_dtype == np.float32:
return GGMLQuantizationType.F32 return GGMLQuantizationType.F32
elif tensor_dtype == np.float64: elif tensor_dtype == np.float64:
return GGMLQuantizationType.F64 return GGMLQuantizationType.F64
elif tensor_dtype == np.bool:
return GGMLQuantizationType.Q8_K
elif tensor_dtype == np.int8: elif tensor_dtype == np.int8:
return GGMLQuantizationType.I8 return GGMLQuantizationType.I8
elif tensor_dtype == np.int16: elif tensor_dtype == np.int16:
...@@ -21,7 +27,7 @@ def np_dtype_to_ggml(tensor_dtype: np.dtype): ...@@ -21,7 +27,7 @@ def np_dtype_to_ggml(tensor_dtype: np.dtype):
return GGMLQuantizationType.I64 return GGMLQuantizationType.I64
else: else:
raise ValueError( raise ValueError(
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now" "Only BF16, F16, F32, F64, BOOL, I8, I16, I32, I64 tensors are supported for now"
) )
...@@ -37,6 +43,7 @@ def contiguous_gguf_strides(shape: tuple[int, ...]) -> list[int]: ...@@ -37,6 +43,7 @@ def contiguous_gguf_strides(shape: tuple[int, ...]) -> list[int]:
acc *= size acc *= size
return strides[::-1] return strides[::-1]
def process_zero_stride_tensor(tensor, stride=None): def process_zero_stride_tensor(tensor, stride=None):
if stride: if stride:
slices = tuple(slice(0, 1) if s == 0 else slice(None) for s in stride) slices = tuple(slice(0, 1) if s == 0 else slice(None) for s in stride)
...@@ -44,6 +51,7 @@ def process_zero_stride_tensor(tensor, stride=None): ...@@ -44,6 +51,7 @@ def process_zero_stride_tensor(tensor, stride=None):
else: else:
return tensor return tensor
class InfiniopTestCase: class InfiniopTestCase:
op_name: str op_name: str
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment