Unverified Commit afb70224 authored by kwyss-nvidia's avatar kwyss-nvidia Committed by GitHub
Browse files

Kwyss/new shape owns data (#1708)

* Reapply "Allow NVTEShape to own data." (#1703)

This reverts commit 91405eb4

.
Signed-off-by: default avatarKeith Wyss <kwyss@nvidia.com>

* Update code so that data is replaced by an array.
Signed-off-by: default avatarKeith Wyss <kwyss@nvidia.com>

* Specify unambiguous Tensor constructor in tests.
Signed-off-by: default avatarKeith Wyss <kwyss@nvidia.com>

* Fix assumption in test of 2D shape.
Signed-off-by: default avatarKeith Wyss <kwyss@nvidia.com>

* Remove row and col
Signed-off-by: default avatarKeith Wyss <kwyss@nvidia.com>

---------
Signed-off-by: default avatarKeith Wyss <kwyss@nvidia.com>
parent 21ec6e04
...@@ -116,10 +116,10 @@ void performTest(const size_t N, const size_t H) { ...@@ -116,10 +116,10 @@ void performTest(const size_t N, const size_t H) {
DType itype = TypeInfo<IType>::dtype; DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype; DType otype = TypeInfo<OType>::dtype;
Tensor input("input", { N, H }, itype); Tensor input("input", std::vector<size_t>{ N, H }, itype);
Tensor output("output", { N, H }, otype); Tensor output("output", std::vector<size_t>{ N, H }, otype);
Tensor igrad("igrad", { N, H }, itype); Tensor igrad("igrad", std::vector<size_t>{ N, H }, itype);
Tensor ograd("ograd", { N, H }, itype); Tensor ograd("ograd", std::vector<size_t>{ N, H }, itype);
fillUniform(&input); fillUniform(&input);
fillUniform(&ograd); fillUniform(&ograd);
...@@ -171,10 +171,10 @@ void performTestGLU(const size_t N, const size_t H) { ...@@ -171,10 +171,10 @@ void performTestGLU(const size_t N, const size_t H) {
DType itype = TypeInfo<IType>::dtype; DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype; DType otype = TypeInfo<OType>::dtype;
Tensor input("input", {N, H * 2}, itype); Tensor input("input", std::vector<size_t>{N, H * 2}, itype);
Tensor output("output", {N, H}, otype); Tensor output("output", std::vector<size_t>{N, H}, otype);
Tensor igrad("igrad", { N, H * 2 }, itype); Tensor igrad("igrad", std::vector<size_t>{ N, H * 2 }, itype);
Tensor ograd("ograd", { N, H }, itype); Tensor ograd("ograd", std::vector<size_t>{ N, H }, itype);
fillUniform(&input); fillUniform(&input);
fillUniform(&ograd); fillUniform(&ograd);
......
...@@ -70,7 +70,7 @@ void performTest(const std::vector<size_t>& shape) { ...@@ -70,7 +70,7 @@ void performTest(const std::vector<size_t>& shape) {
Tensor output_c("output_c", shape, otype); Tensor output_c("output_c", shape, otype);
// dbias has the same data type with "output grad" // dbias has the same data type with "output grad"
Tensor dbias("dbias", {H}, itype); Tensor dbias("dbias", std::vector<size_t>{H}, itype);
fillUniform(&input); fillUniform(&input);
setRandomScale(&output_c); setRandomScale(&output_c);
......
...@@ -79,7 +79,7 @@ void performTest(const std::vector<size_t>& shape) { ...@@ -79,7 +79,7 @@ void performTest(const std::vector<size_t>& shape) {
Tensor output_c("output_c", shape, otype); Tensor output_c("output_c", shape, otype);
// dbias has the same data type with "output grad" // dbias has the same data type with "output grad"
Tensor dbias("dbias", {H}, itype); Tensor dbias("dbias", std::vector<size_t>{H}, itype);
fillUniform(&input); fillUniform(&input);
fillUniform(&grad); fillUniform(&grad);
......
...@@ -280,7 +280,7 @@ void runTestCase(const ProcessingMethod processing_method, const std::vector<siz ...@@ -280,7 +280,7 @@ void runTestCase(const ProcessingMethod processing_method, const std::vector<siz
Tensor grad("grad", shape, itype); Tensor grad("grad", shape, itype);
Tensor output_c("output_c", shape, otype, rowwise, colwise, Tensor output_c("output_c", shape, otype, rowwise, colwise,
opts.block_scaling_dim == 2 ? NVTE_BLOCK_SCALING_2D : NVTE_BLOCK_SCALING_1D); opts.block_scaling_dim == 2 ? NVTE_BLOCK_SCALING_2D : NVTE_BLOCK_SCALING_1D);
Tensor output_dbias("output_dbias", {cols}, itype); Tensor output_dbias("output_dbias", std::vector<size_t>{cols}, itype);
std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(rows * cols);
std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(rows * cols);
...@@ -355,7 +355,7 @@ void runTestCaseOneDimensionalBlocks(const ProcessingMethod processing_method, ...@@ -355,7 +355,7 @@ void runTestCaseOneDimensionalBlocks(const ProcessingMethod processing_method,
Tensor grad("grad", shape, itype); Tensor grad("grad", shape, itype);
Tensor output_c("output_c", shape, otype, rowwise, colwise, Tensor output_c("output_c", shape, otype, rowwise, colwise,
opts.block_scaling_dim == 2 ? NVTE_BLOCK_SCALING_2D : NVTE_BLOCK_SCALING_1D); opts.block_scaling_dim == 2 ? NVTE_BLOCK_SCALING_2D : NVTE_BLOCK_SCALING_1D);
Tensor output_dbias("output_dbias", {cols}, itype); Tensor output_dbias("output_dbias", std::vector<size_t>{cols}, itype);
std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(rows * cols);
std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(rows * cols);
......
...@@ -230,7 +230,7 @@ void performTest_x1(const ProcessingMethod processing_method, ...@@ -230,7 +230,7 @@ void performTest_x1(const ProcessingMethod processing_method,
Tensor input("input", shape, itype); Tensor input("input", shape, itype);
Tensor grad("grad", shape, itype); Tensor grad("grad", shape, itype);
Tensor output_c("output_c", shape, otype, rowwise, colwise, NVTE_MXFP8_1D_SCALING); Tensor output_c("output_c", shape, otype, rowwise, colwise, NVTE_MXFP8_1D_SCALING);
Tensor output_dbias("output_dbias", { cols }, itype); Tensor output_dbias("output_dbias", std::vector<size_t>{ cols }, itype);
std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(rows * cols);
std::unique_ptr<InputType[]> ref_output_dbias = std::make_unique<InputType[]>(cols); std::unique_ptr<InputType[]> ref_output_dbias = std::make_unique<InputType[]>(cols);
...@@ -368,7 +368,7 @@ void performTest_x2(const ProcessingMethod processing_method, ...@@ -368,7 +368,7 @@ void performTest_x2(const ProcessingMethod processing_method,
Tensor input("input", shape, itype); Tensor input("input", shape, itype);
Tensor grad("grad", shape, itype); Tensor grad("grad", shape, itype);
Tensor output("output", shape, otype, true, true, NVTE_MXFP8_1D_SCALING); Tensor output("output", shape, otype, true, true, NVTE_MXFP8_1D_SCALING);
Tensor output_dbias("output_dbias", { cols }, itype); Tensor output_dbias("output_dbias", std::vector<size_t>{ cols }, itype);
std::unique_ptr<OutputType[]> ref_output_c_rowwise = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_c_rowwise = std::make_unique<OutputType[]>(rows * cols);
std::unique_ptr<OutputType[]> ref_output_c_colwise = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_c_colwise = std::make_unique<OutputType[]>(rows * cols);
......
...@@ -204,8 +204,8 @@ void performTest_x1(const size_t rows, ...@@ -204,8 +204,8 @@ void performTest_x1(const size_t rows,
// std::cout << "blocks_X: " << blocks_X << std::endl; // std::cout << "blocks_X: " << blocks_X << std::endl;
// std::cout << "scales_stride: " << scales_stride << std::endl; // std::cout << "scales_stride: " << scales_stride << std::endl;
Tensor grad("grad", { rows, cols }, itype); Tensor grad("grad", std::vector<size_t>{ rows, cols }, itype);
Tensor input("input", { rows, cols * 2 }, itype); Tensor input("input", std::vector<size_t>{ rows, cols * 2 }, itype);
const size_t output_cols = (IS_DGATED ? 2 : 1) * cols; const size_t output_cols = (IS_DGATED ? 2 : 1) * cols;
...@@ -289,8 +289,8 @@ void performTest_x2(const size_t rows, ...@@ -289,8 +289,8 @@ void performTest_x2(const size_t rows,
DType itype = TypeInfo<IType>::dtype; DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype; DType otype = TypeInfo<OType>::dtype;
Tensor grad("grad", { rows, cols }, itype); Tensor grad("grad", std::vector<size_t>{ rows, cols }, itype);
Tensor input("input", { rows, cols * 2 }, itype); Tensor input("input", std::vector<size_t>{ rows, cols * 2 }, itype);
const size_t output_cols = (IS_DGATED ? 2 : 1) * cols; const size_t output_cols = (IS_DGATED ? 2 : 1) * cols;
......
...@@ -47,8 +47,8 @@ void performTest(const size_t N, const size_t H) { ...@@ -47,8 +47,8 @@ void performTest(const size_t N, const size_t H) {
DType itype = TypeInfo<InputType>::dtype; DType itype = TypeInfo<InputType>::dtype;
DType otype = TypeInfo<OutputType>::dtype; DType otype = TypeInfo<OutputType>::dtype;
Tensor input("input", { N, H }, itype); Tensor input("input", std::vector<size_t>{ N, H }, itype);
Tensor output("output", { N, H }, otype, true, true); Tensor output("output", std::vector<size_t>{ N, H }, otype, true, true);
std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(N * H); std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(N * H);
std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(N * H); std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(N * H);
......
...@@ -112,8 +112,8 @@ void performTest(const size_t N, const size_t H) { ...@@ -112,8 +112,8 @@ void performTest(const size_t N, const size_t H) {
} }
} }
Tensor input("input", { N, H }, itype); Tensor input("input", std::vector<size_t>{ N, H }, itype);
Tensor output("output", { N, H }, otype, true, true); Tensor output("output", std::vector<size_t>{ N, H }, otype, true, true);
std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(N * H); std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(N * H);
std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(N * H); std::unique_ptr<OutputType[]> ref_output_t = std::make_unique<OutputType[]>(N * H);
......
...@@ -65,11 +65,11 @@ void performTest(const size_t N, const size_t H) { ...@@ -65,11 +65,11 @@ void performTest(const size_t N, const size_t H) {
DType itype = TypeInfo<IType>::dtype; DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype; DType otype = TypeInfo<OType>::dtype;
Tensor input("input", {N, H}, itype); Tensor input("input", std::vector<size_t>{N, H}, itype);
Tensor output("output", {N, H}, otype, true, true); Tensor output("output", std::vector<size_t>{N, H}, otype, true, true);
// dbias has the same data type with "output grad" // dbias has the same data type with "output grad"
Tensor dbias("dbias", {H}, itype); Tensor dbias("dbias", std::vector<size_t>{H}, itype);
fillUniform(&input); fillUniform(&input);
setRandomScale(&output); setRandomScale(&output);
......
...@@ -76,12 +76,12 @@ void performTest(const size_t N, const size_t H) { ...@@ -76,12 +76,12 @@ void performTest(const size_t N, const size_t H) {
DType itype = TypeInfo<IType>::dtype; DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype; DType otype = TypeInfo<OType>::dtype;
Tensor input("input", {N, H}, itype); Tensor input("input", std::vector<size_t>{N, H}, itype);
Tensor gelu_input("gelu_input", {N, H}, itype); Tensor gelu_input("gelu_input", std::vector<size_t>{N, H}, itype);
Tensor output("output", {N, H}, otype, true, true); Tensor output("output", std::vector<size_t>{N, H}, otype, true, true);
// dbias has the same data type with "output grad" // dbias has the same data type with "output grad"
Tensor dbias("dbias", {H}, itype); Tensor dbias("dbias", std::vector<size_t>{H}, itype);
fillUniform(&input); fillUniform(&input);
fillUniform(&gelu_input); fillUniform(&gelu_input);
......
...@@ -74,9 +74,9 @@ void performTest(const size_t N, const size_t H) { ...@@ -74,9 +74,9 @@ void performTest(const size_t N, const size_t H) {
DType itype = TypeInfo<IType>::dtype; DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype; DType otype = TypeInfo<OType>::dtype;
Tensor grad("grad", {N, H}, itype); Tensor grad("grad", std::vector<size_t>{N, H}, itype);
Tensor input("input", {N, H * 2}, itype); Tensor input("input", std::vector<size_t>{N, H * 2}, itype);
Tensor output("output", {N, H * 2}, otype, true, true); Tensor output("output", std::vector<size_t>{N, H * 2}, otype, true, true);
fillUniform(&grad); fillUniform(&grad);
fillUniform(&input); fillUniform(&input);
......
...@@ -153,11 +153,11 @@ void performTest( ...@@ -153,11 +153,11 @@ void performTest(
DType itype = TypeInfo<Type>::dtype; DType itype = TypeInfo<Type>::dtype;
Tensor data_in("data_in", { batches, heads, rows, cols }, itype); Tensor data_in("data_in", std::vector<size_t>{ batches, heads, rows, cols }, itype);
Tensor softmax_out("softmax_out", { batches, heads, rows, cols }, itype); Tensor softmax_out("softmax_out", std::vector<size_t>{ batches, heads, rows, cols }, itype);
Tensor softmax_in("softmax_in", { batches, heads, rows, cols }, itype); Tensor softmax_in("softmax_in", std::vector<size_t>{ batches, heads, rows, cols }, itype);
Tensor grads_in("grads_in", { batches, heads, rows, cols }, itype); Tensor grads_in("grads_in", std::vector<size_t>{ batches, heads, rows, cols }, itype);
Tensor grads_out("grads_out", { batches, heads, rows, cols }, itype); Tensor grads_out("grads_out", std::vector<size_t>{ batches, heads, rows, cols }, itype);
const size_t elements_total = batches * heads * rows * cols; const size_t elements_total = batches * heads * rows * cols;
std::unique_ptr<Type[]> softmax_out_ref = std::make_unique<Type[]>(elements_total); std::unique_ptr<Type[]> softmax_out_ref = std::make_unique<Type[]>(elements_total);
......
...@@ -214,10 +214,10 @@ void performTest_x1(const size_t rows, ...@@ -214,10 +214,10 @@ void performTest_x1(const size_t rows,
const size_t blocks_num = rowwise ? blocks_num_rowwise : blocks_num_colwise; const size_t blocks_num = rowwise ? blocks_num_rowwise : blocks_num_colwise;
const size_t scales_stride = rowwise ? blocks_X_rowwise : blocks_X_colwise; const size_t scales_stride = rowwise ? blocks_X_rowwise : blocks_X_colwise;
Tensor input("input", { rows, cols }, itype, rowwise, colwise, NVTE_MXFP8_1D_SCALING); Tensor input("input", std::vector<size_t>{ rows, cols }, itype, rowwise, colwise, NVTE_MXFP8_1D_SCALING);
// Output data are written to the rowwise ptr regardless of the scaling direction // Output data are written to the rowwise ptr regardless of the scaling direction
Tensor output("output", { rows, cols }, otype, true, false); Tensor output("output", std::vector<size_t>{ rows, cols }, otype, true, false);
std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(rows * cols);
std::unique_ptr<fp8e8m0[]> scales = std::make_unique<fp8e8m0[]>(blocks_num); std::unique_ptr<fp8e8m0[]> scales = std::make_unique<fp8e8m0[]>(blocks_num);
...@@ -267,11 +267,11 @@ void performTest_quantize_then_dequantize(const size_t rows, ...@@ -267,11 +267,11 @@ void performTest_quantize_then_dequantize(const size_t rows,
// input --> quantized --> output (dequantized) // input --> quantized --> output (dequantized)
// input == output // input == output
Tensor input("input", { rows, cols }, in_type); Tensor input("input", std::vector<size_t>{ rows, cols }, in_type);
Tensor quantized("quantized", { rows, cols }, intermed_type, rowwise, colwise, NVTE_MXFP8_1D_SCALING); Tensor quantized("quantized", std::vector<size_t>{ rows, cols }, intermed_type, rowwise, colwise, NVTE_MXFP8_1D_SCALING);
// Output data are written to the rowwise ptr regardless of the scaling direction // Output data are written to the rowwise ptr regardless of the scaling direction
Tensor output("output", { rows, cols }, out_type, true, false); Tensor output("output", std::vector<size_t>{ rows, cols }, out_type, true, false);
// fillCase<EncodingType>(&input, InputsFillCase::minNorm_to_maxNorm); // fillCase<EncodingType>(&input, InputsFillCase::minNorm_to_maxNorm);
fillCase<EncodingType>(&input, InputsFillCase::uniform); fillCase<EncodingType>(&input, InputsFillCase::uniform);
...@@ -333,8 +333,8 @@ void performTest_x2(const size_t rows, ...@@ -333,8 +333,8 @@ void performTest_x2(const size_t rows,
const size_t blocks_num_rowwise = blocks_Y_rowwise * blocks_X_rowwise; const size_t blocks_num_rowwise = blocks_Y_rowwise * blocks_X_rowwise;
const size_t blocks_num_colwise = blocks_Y_colwise * blocks_X_colwise; const size_t blocks_num_colwise = blocks_Y_colwise * blocks_X_colwise;
Tensor input("input", { rows, cols }, itype, true, true, NVTE_MXFP8_1D_SCALING); Tensor input("input", std::vector<size_t>{ rows, cols }, itype, true, true, NVTE_MXFP8_1D_SCALING);
Tensor output("output", { rows, cols }, otype); Tensor output("output", std::vector<size_t>{ rows, cols }, otype);
std::unique_ptr<OutputType[]> ref_output_rowwise = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_rowwise = std::make_unique<OutputType[]>(rows * cols);
std::unique_ptr<OutputType[]> ref_output_colwise = std::make_unique<OutputType[]>(rows * cols); std::unique_ptr<OutputType[]> ref_output_colwise = std::make_unique<OutputType[]>(rows * cols);
......
...@@ -81,9 +81,9 @@ void performTest() { ...@@ -81,9 +81,9 @@ void performTest() {
for (size_t tensor_id = 0; tensor_id < num_tensors; ++tensor_id) { for (size_t tensor_id = 0; tensor_id < num_tensors; ++tensor_id) {
const size_t height = tensor_dims[tensor_id].first; const size_t height = tensor_dims[tensor_id].first;
const size_t width = tensor_dims[tensor_id].second; const size_t width = tensor_dims[tensor_id].second;
input_list.emplace_back(Tensor("input_" + std::to_string(tensor_id), { height, width }, itype)); input_list.emplace_back(Tensor("input_" + std::to_string(tensor_id), std::vector<size_t>{ height, width }, itype));
output_list.emplace_back(Tensor("output_" + std::to_string(tensor_id), output_list.emplace_back(Tensor("output_" + std::to_string(tensor_id),
{ height, width }, otype, true, true)); std::vector<size_t>{ height, width }, otype, true, true));
auto& input = input_list.back(); auto& input = input_list.back();
auto& output = output_list.back(); auto& output = output_list.back();
......
...@@ -85,8 +85,8 @@ void performTest() { ...@@ -85,8 +85,8 @@ void performTest() {
const size_t height = tensor_dims[tensor_id].first; const size_t height = tensor_dims[tensor_id].first;
const size_t width = tensor_dims[tensor_id].second; const size_t width = tensor_dims[tensor_id].second;
const size_t padded_height = (height + align - 1) / align * align; const size_t padded_height = (height + align - 1) / align * align;
input_list.emplace_back(Tensor("input_" + std::to_string(tensor_id), { height, width }, itype)); input_list.emplace_back(Tensor("input_" + std::to_string(tensor_id), std::vector<size_t>{ height, width }, itype));
output_list.emplace_back(Tensor("output_" + std::to_string(tensor_id), { padded_height, width }, otype)); output_list.emplace_back(Tensor("output_" + std::to_string(tensor_id), std::vector<size_t>{ padded_height, width }, otype));
auto& input = input_list.back(); auto& input = input_list.back();
auto& output = output_list.back(); auto& output = output_list.back();
......
...@@ -48,16 +48,16 @@ void performTest(const size_t N, const size_t H, const bool zero_centered_gamma, ...@@ -48,16 +48,16 @@ void performTest(const size_t N, const size_t H, const bool zero_centered_gamma,
return; return;
} }
Tensor input("input", { N, H }, itype); Tensor input("input", std::vector<size_t>{ N, H }, itype);
Tensor z("z", { N, H }, otype); Tensor z("z", std::vector<size_t>{ N, H }, otype);
Tensor gamma("gamma", { H }, wtype); Tensor gamma("gamma", std::vector<size_t>{ H }, wtype);
Tensor beta("beta", { H }, wtype); Tensor beta("beta", std::vector<size_t>{ H }, wtype);
Tensor mu("mu", { N }, DType::kFloat32); Tensor mu("mu", std::vector<size_t>{ N }, DType::kFloat32);
Tensor rsigma("rsigma", { N }, DType::kFloat32); Tensor rsigma("rsigma", std::vector<size_t>{ N }, DType::kFloat32);
Tensor dz("dz", { N, H }, wtype); Tensor dz("dz", std::vector<size_t>{ N, H }, wtype);
Tensor dx("dx", { N, H }, itype); Tensor dx("dx", std::vector<size_t>{ N, H }, itype);
Tensor dgamma("dgamma", { H }, wtype); Tensor dgamma("dgamma", std::vector<size_t>{ H }, wtype);
Tensor dbeta("dbeta", { H }, wtype); Tensor dbeta("dbeta", std::vector<size_t>{ H }, wtype);
Tensor workspace_fwd, workspace_bwd; Tensor workspace_fwd, workspace_bwd;
fillUniform(&input); fillUniform(&input);
......
...@@ -116,12 +116,12 @@ void performTest(const size_t N, const size_t H, const bool zero_centered_gamma, ...@@ -116,12 +116,12 @@ void performTest(const size_t N, const size_t H, const bool zero_centered_gamma,
DType wtype = TypeInfo<WeightType>::dtype; DType wtype = TypeInfo<WeightType>::dtype;
DType otype = TypeInfo<OutputType>::dtype; DType otype = TypeInfo<OutputType>::dtype;
Tensor input("input", { N, H }, itype); Tensor input("input", std::vector<size_t>{ N, H }, itype);
Tensor z("z", { N, H }, otype, true, is_training, NVTE_MXFP8_1D_SCALING); Tensor z("z", std::vector<size_t>{ N, H }, otype, true, is_training, NVTE_MXFP8_1D_SCALING);
Tensor gamma("gamma", { H }, wtype); Tensor gamma("gamma", std::vector<size_t>{ H }, wtype);
Tensor beta("beta", { H }, wtype); Tensor beta("beta", std::vector<size_t>{ H }, wtype);
Tensor mu("mu", { N }, DType::kFloat32); Tensor mu("mu", std::vector<size_t>{ N }, DType::kFloat32);
Tensor rsigma("rsigma", { N }, DType::kFloat32); Tensor rsigma("rsigma", std::vector<size_t>{ N }, DType::kFloat32);
Tensor workspace; Tensor workspace;
...@@ -164,7 +164,7 @@ void performTest(const size_t N, const size_t H, const bool zero_centered_gamma, ...@@ -164,7 +164,7 @@ void performTest(const size_t N, const size_t H, const bool zero_centered_gamma,
nvte_enable_zero_centered_gamma_in_weight_dtype(false); nvte_enable_zero_centered_gamma_in_weight_dtype(false);
} }
Tensor dequantized_output("dequantized_output", { N, H }, DType::kFloat32, true, true); Tensor dequantized_output("dequantized_output", std::vector<size_t>{ N, H }, DType::kFloat32, true, true);
dequantize_2x<OutputType, fp8e8m0>(z, dequantized_output, is_training); dequantize_2x<OutputType, fp8e8m0>(z, dequantized_output, is_training);
......
...@@ -58,8 +58,8 @@ void performTestQ(const size_t N) { ...@@ -58,8 +58,8 @@ void performTestQ(const size_t N) {
DType itype = TypeInfo<InputType>::dtype; DType itype = TypeInfo<InputType>::dtype;
DType otype = TypeInfo<OutputType>::dtype; DType otype = TypeInfo<OutputType>::dtype;
Tensor input("input", { N }, itype); Tensor input("input", std::vector<size_t>{ N }, itype);
Tensor output("output", { N }, otype); Tensor output("output", std::vector<size_t>{ N }, otype);
std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(N); std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(N);
...@@ -89,8 +89,8 @@ void performTestDQ(const size_t N) { ...@@ -89,8 +89,8 @@ void performTestDQ(const size_t N) {
DType itype = TypeInfo<InputType>::dtype; DType itype = TypeInfo<InputType>::dtype;
DType otype = TypeInfo<OutputType>::dtype; DType otype = TypeInfo<OutputType>::dtype;
Tensor input("input", { N }, itype); Tensor input("input", std::vector<size_t>{ N }, itype);
Tensor output("output", { N }, otype); Tensor output("output", std::vector<size_t>{ N }, otype);
std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(N); std::unique_ptr<OutputType[]> ref_output = std::make_unique<OutputType[]>(N);
......
...@@ -37,8 +37,8 @@ void performTest(const size_t N, const size_t H) { ...@@ -37,8 +37,8 @@ void performTest(const size_t N, const size_t H) {
DType dtype = TypeInfo<Type>::dtype; DType dtype = TypeInfo<Type>::dtype;
Tensor input("input", { N, H }, dtype); Tensor input("input", std::vector<size_t>{ N, H }, dtype);
Tensor output("output", { H, N }, dtype); Tensor output("output", std::vector<size_t>{ H, N }, dtype);
std::unique_ptr<Type[]> ref_output = std::make_unique<Type[]>(N * H); std::unique_ptr<Type[]> ref_output = std::make_unique<Type[]>(N * H);
......
...@@ -112,8 +112,8 @@ struct scale_inv_meta { ...@@ -112,8 +112,8 @@ struct scale_inv_meta {
size_t type_size; size_t type_size;
}; };
NVTEShape convertShape(const std::vector<size_t>& shape) { NVTEShape convertShape(const std::vector<size_t>& s) {
return {shape.data(), shape.size()}; return nvte_make_shape(s.data(), s.size());
} }
std::pair<scale_inv_meta, scale_inv_meta> get_scales(const NVTEShape& shape, std::pair<scale_inv_meta, scale_inv_meta> get_scales(const NVTEShape& shape,
...@@ -240,7 +240,7 @@ Tensor::Tensor(const std::string& name, ...@@ -240,7 +240,7 @@ Tensor::Tensor(const std::string& name,
std::vector<size_t> normalized_shape_v = {product(shape, 0, shape.ndim - 1), std::vector<size_t> normalized_shape_v = {product(shape, 0, shape.ndim - 1),
shape.data[shape.ndim - 1]}; shape.data[shape.ndim - 1]};
NVTEShape normalized_shape = convertShape(normalized_shape_v); NVTEShape normalized_shape = convertShape(normalized_shape_v);
NVTEShape columnwise_shape{nullptr, 0}; NVTEShape columnwise_shape = {};
std::vector<size_t> columnwise_shape_vec; std::vector<size_t> columnwise_shape_vec;
if (scaling_mode == NVTE_DELAYED_TENSOR_SCALING || scaling_mode == NVTE_BLOCK_SCALING_1D || scaling_mode == NVTE_BLOCK_SCALING_2D) { if (scaling_mode == NVTE_DELAYED_TENSOR_SCALING || scaling_mode == NVTE_BLOCK_SCALING_1D || scaling_mode == NVTE_BLOCK_SCALING_2D) {
...@@ -257,8 +257,7 @@ Tensor::Tensor(const std::string& name, ...@@ -257,8 +257,7 @@ Tensor::Tensor(const std::string& name,
} }
if (columnwise) { if (columnwise) {
columnwise_shape.data = columnwise_shape_vec.data(); columnwise_shape = nvte_make_shape(columnwise_shape_vec.data(), columnwise_shape_vec.size());
columnwise_shape.ndim = columnwise_shape_vec.size();
} }
tensor_ = TensorWrapper(scaling_mode); tensor_ = TensorWrapper(scaling_mode);
...@@ -739,8 +738,6 @@ void fillUniform(Tensor *t) { ...@@ -739,8 +738,6 @@ void fillUniform(Tensor *t) {
template<typename InputEncoding, InputsFillCase Case> template<typename InputEncoding, InputsFillCase Case>
void fillCase_special(Tensor *t) { void fillCase_special(Tensor *t) {
const size_t size = product(t->rowwise_shape()); const size_t size = product(t->rowwise_shape());
const size_t rows = t->rowwise_shape().data[0];
const size_t cols = t->rowwise_shape().data[1];
if constexpr (Case == InputsFillCase::zeros) { if constexpr (Case == InputsFillCase::zeros) {
TRANSFORMER_ENGINE_TYPE_SWITCH_FP16_FP32_ONLY(t->dtype(), InputType, { TRANSFORMER_ENGINE_TYPE_SWITCH_FP16_FP32_ONLY(t->dtype(), InputType, {
...@@ -760,9 +757,7 @@ void fillCase_special(Tensor *t) { ...@@ -760,9 +757,7 @@ void fillCase_special(Tensor *t) {
std::uniform_real_distribution<> dis_sign(-1.0, 1.0); std::uniform_real_distribution<> dis_sign(-1.0, 1.0);
TRANSFORMER_ENGINE_TYPE_SWITCH_FP16_FP32_ONLY(t->dtype(), InputType, { TRANSFORMER_ENGINE_TYPE_SWITCH_FP16_FP32_ONLY(t->dtype(), InputType, {
InputType *data = t->rowwise_cpu_dptr<InputType>(); InputType *data = t->rowwise_cpu_dptr<InputType>();
for (size_t i = 0; i < rows; ++i) { for (size_t idx = 0; idx < size; ++idx) {
for (size_t j = 0; j < cols; ++j) {
const size_t idx = i * cols + j;
const bool is_negative = (dis_sign(t->gen()) < 0.0); const bool is_negative = (dis_sign(t->gen()) < 0.0);
double val = dis(t->gen()); double val = dis(t->gen());
if (is_negative) { if (is_negative) {
...@@ -770,7 +765,6 @@ void fillCase_special(Tensor *t) { ...@@ -770,7 +765,6 @@ void fillCase_special(Tensor *t) {
} }
data[idx] = static_cast<InputType>(val); data[idx] = static_cast<InputType>(val);
} }
}
}); });
} }
t->set_scale_inv(1.0); t->set_scale_inv(1.0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment