Unverified Commit a23c4d13 authored by Tianyu Xiong's avatar Tianyu Xiong Committed by GitHub
Browse files

issue/347 Add support for BOOL/BF16 and printing utils in infiniop-test

* utils: add printing support for int8_t, bf16_t and fp16_t

* utils: add support for BF16 in infiniop-test

* utils: add support for BOOL in infiniop-test
parent 50eaee63
......@@ -141,10 +141,8 @@ typedef enum {
inline size_t ggmlTypeSize(GGML_TYPE ggml_type) {
switch (ggml_type) {
case GGML_TYPE_F32:
return 4;
case GGML_TYPE_F16:
return 2;
case GGML_TYPE_Q8_K:
return 1;
case GGML_TYPE_I8:
return 1;
case GGML_TYPE_I16:
......@@ -153,10 +151,14 @@ inline size_t ggmlTypeSize(GGML_TYPE ggml_type) {
return 4;
case GGML_TYPE_I64:
return 8;
case GGML_TYPE_F64:
return 8;
case GGML_TYPE_BF16:
return 2;
case GGML_TYPE_F16:
return 2;
case GGML_TYPE_F32:
return 4;
case GGML_TYPE_F64:
return 8;
default:
throw std::runtime_error("GGML_TYPE_SIZE: Unsupported GGML_TYPE");
}
......
......@@ -6,6 +6,8 @@
inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) {
switch (type) {
case GGML_TYPE_Q8_K:
return INFINI_DTYPE_BOOL;
case GGML_TYPE_I8:
return INFINI_DTYPE_I8;
case GGML_TYPE_I16:
......@@ -14,10 +16,10 @@ inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) {
return INFINI_DTYPE_I32;
case GGML_TYPE_I64:
return INFINI_DTYPE_I64;
case GGML_TYPE_F16:
return INFINI_DTYPE_F16;
case GGML_TYPE_BF16:
return INFINI_DTYPE_BF16;
case GGML_TYPE_F16:
return INFINI_DTYPE_F16;
case GGML_TYPE_F32:
return INFINI_DTYPE_F32;
case GGML_TYPE_F64:
......
......@@ -9,12 +9,16 @@
inline double getVal(void *ptr, GGML_TYPE ggml_type) {
switch (ggml_type) {
case GGML_TYPE_BF16:
return utils::cast<float>(*(bf16_t *)ptr);
case GGML_TYPE_F16:
return utils::cast<double>(*(fp16_t *)ptr);
return utils::cast<float>(*(fp16_t *)ptr);
case GGML_TYPE_F32:
return *(float *)ptr;
case GGML_TYPE_F64:
return *(double *)ptr;
case GGML_TYPE_Q8_K:
return *(bool *)ptr;
case GGML_TYPE_I8:
return *(int8_t *)ptr;
case GGML_TYPE_I16:
......@@ -30,12 +34,16 @@ inline double getVal(void *ptr, GGML_TYPE ggml_type) {
inline size_t ggmlSizeOf(GGML_TYPE ggml_type) {
switch (ggml_type) {
case GGML_TYPE_BF16:
return sizeof(bf16_t);
case GGML_TYPE_F16:
return sizeof(fp16_t);
case GGML_TYPE_F32:
return sizeof(float);
case GGML_TYPE_F64:
return sizeof(double);
case GGML_TYPE_Q8_K:
return sizeof(bool);
case GGML_TYPE_I8:
return sizeof(int8_t);
case GGML_TYPE_I16:
......
#include "tensor.hpp"
#include "gguf.hpp"
#include "utils.hpp"
#include <cstring>
#include <infinirt.h>
......@@ -19,6 +20,40 @@ void printData(const T *data, const std::vector<size_t> &shape, const std::vecto
}
}
// The type int8_t is represented by signed char, with a range of –128 to 127.
// It may contain non-printable characters and thus cannot be printed directly.
template <>
void printData(const int8_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << static_cast<int>(*(data + i * strides[dim])) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
std::cout << std::endl;
}
}
}
template <>
void printData(const bf16_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << utils::cast<float>(*(data + i * strides[dim])) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
std::cout << std::endl;
}
}
}
template <>
void printData(const fp16_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) {
......@@ -26,6 +61,7 @@ void printData(const fp16_t *data, const std::vector<size_t> &shape,
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << utils::cast<float>(*(data + i * strides[dim])) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
......@@ -227,6 +263,8 @@ void Tensor::debug() const {
auto tensor = to(INFINI_DEVICE_CPU, 0);
std::cout << "Tensor: " << tensor->info() << std::endl;
switch (_ggml_type) {
case GGML_TYPE_BF16:
printData((bf16_t *)(tensor->data()), _shape, _strides, 0);
case GGML_TYPE_F16:
printData((fp16_t *)(tensor->data()), _shape, _strides, 0);
break;
......@@ -236,6 +274,9 @@ void Tensor::debug() const {
case GGML_TYPE_F64:
printData((double *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_Q8_K:
printData((bool *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_I8:
printData((int8_t *)(tensor->data()), _shape, _strides, 0);
break;
......@@ -245,6 +286,9 @@ void Tensor::debug() const {
case GGML_TYPE_I32:
printData((int32_t *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_I64:
printData((int64_t *)(tensor->data()), _shape, _strides, 0);
break;
default:
std::cout << "Unsupported GGML type" << std::endl;
break;
......
import gguf
from typing import List
import gguf
import numpy as np
from gguf import GGMLQuantizationType
from ml_dtypes import bfloat16
def np_dtype_to_ggml(tensor_dtype: np.dtype):
if tensor_dtype == np.float16:
if tensor_dtype == bfloat16:
return GGMLQuantizationType.BF16
elif tensor_dtype == np.float16:
return GGMLQuantizationType.F16
elif tensor_dtype == np.float32:
return GGMLQuantizationType.F32
elif tensor_dtype == np.float64:
return GGMLQuantizationType.F64
elif tensor_dtype == np.bool:
return GGMLQuantizationType.Q8_K
elif tensor_dtype == np.int8:
return GGMLQuantizationType.I8
elif tensor_dtype == np.int16:
......@@ -21,7 +27,7 @@ def np_dtype_to_ggml(tensor_dtype: np.dtype):
return GGMLQuantizationType.I64
else:
raise ValueError(
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
"Only BF16, F16, F32, F64, BOOL, I8, I16, I32, I64 tensors are supported for now"
)
......@@ -37,6 +43,7 @@ def contiguous_gguf_strides(shape: tuple[int, ...]) -> list[int]:
acc *= size
return strides[::-1]
def process_zero_stride_tensor(tensor, stride=None):
if stride:
slices = tuple(slice(0, 1) if s == 0 else slice(None) for s in stride)
......@@ -44,6 +51,7 @@ def process_zero_stride_tensor(tensor, stride=None):
else:
return tensor
class InfiniopTestCase:
op_name: str
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment