Commit 57deee08 authored by yuguo's avatar yuguo
Browse files

[DCU] cpp test compile pass

parent ab122dac
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# #
# See LICENSE for license information. # See LICENSE for license information.
if(USE_CUDA)
add_executable(test_util add_executable(test_util
test_nvrtc.cpp test_nvrtc.cpp
test_string.cpp test_string.cpp
...@@ -10,6 +11,16 @@ add_executable(test_util ...@@ -10,6 +11,16 @@ add_executable(test_util
find_package(OpenMP REQUIRED) find_package(OpenMP REQUIRED)
target_link_libraries(test_util PUBLIC CUDA::cudart GTest::gtest_main ${TE_LIB} CUDA::nvrtc CUDNN::cudnn OpenMP::OpenMP_CXX) target_link_libraries(test_util PUBLIC CUDA::cudart GTest::gtest_main ${TE_LIB} CUDA::nvrtc CUDNN::cudnn OpenMP::OpenMP_CXX)
else()
add_executable(test_util
test_nvrtc_hip.cpp
test_string.cpp
../test_common.hip)
find_package(OpenMP REQUIRED)
target_link_libraries(test_util PUBLIC hip::host hip::device GTest::gtest_main ${TE_LIB} OpenMP::OpenMP_CXX)
endif()
target_compile_options(test_util PRIVATE -O2 -fopenmp) target_compile_options(test_util PRIVATE -O2 -fopenmp)
include(GoogleTest) include(GoogleTest)
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#pragma once #pragma once
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include "common/utils.cuh"
// Forward/backward compatiblity hack around // Forward/backward compatiblity hack around
// https://github.com/pytorch/pytorch/commit/3aeb78079bcd68282fe9117088e138b77318e288 // https://github.com/pytorch/pytorch/commit/3aeb78079bcd68282fe9117088e138b77318e288
...@@ -267,8 +268,6 @@ ...@@ -267,8 +268,6 @@
AT_ERROR(#NAME, " not implemented for '", toString(TYPEIN), "'"); \ AT_ERROR(#NAME, " not implemented for '", toString(TYPEIN), "'"); \
} }
constexpr uint32_t THREADS_PER_WARP = 32;
template <typename T> template <typename T>
__device__ __forceinline__ T __device__ __forceinline__ T
reduce_block_into_lanes(T *x, T val, int lanes = 1, reduce_block_into_lanes(T *x, T val, int lanes = 1,
......
...@@ -91,6 +91,12 @@ class _BatchedLinear(torch.autograd.Function): ...@@ -91,6 +91,12 @@ class _BatchedLinear(torch.autograd.Function):
# TODO Support Float8 Current Scaling # pylint: disable=fixme # TODO Support Float8 Current Scaling # pylint: disable=fixme
if fp8 and FP8GlobalStateManager.get_fp8_recipe().float8_current_scaling(): if fp8 and FP8GlobalStateManager.get_fp8_recipe().float8_current_scaling():
raise NotImplementedError("BatchedLinear does not yet support Float8 Current Scaling") raise NotImplementedError("BatchedLinear does not yet support Float8 Current Scaling")
# TODO Support Float8 Delayed Scaling # pylint: disable=fixme
if fp8 and FP8GlobalStateManager.get_fp8_recipe().delayed():
raise NotImplementedError("BatchedLinear does not yet support Float8 Delayed Scaling")
# TODO Support Float8 Per Tensor Scaling # pylint: disable=fixme
if fp8 and FP8GlobalStateManager.get_fp8_recipe().float8_per_tensor_scaling():
raise NotImplementedError("BatchedLinear does not yet support Float8 Per Tensor Scaling")
# Make sure input dimensions are compatible # Make sure input dimensions are compatible
in_features = weights[0].shape[-1] in_features = weights[0].shape[-1]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment