Unverified Commit 6b311da2 authored by Tim Moon's avatar Tim Moon Committed by GitHub
Browse files

Refactor logging macros (#382)



* Do not include logging macros in installed C headers
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

* Debug logging macros
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

* Debug C++ tests

Use Google style for header includes.
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

* Update CUDA driver macros

Incorporating changes from #389.
Co-authored-by: default avatarTim Moon <tmoon@nvidia.com>
Co-authored-by: default avatarJan Bielak <jbielak@nvidia.com>
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

* Use core error checking macros in PyTorch extensions

Hack to get around macro redefinition warning.
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

* Fix missing arg when getting CUDA driver error string
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

* Reuse logging header in frameworks
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>

---------
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>
Co-authored-by: default avatarJan Bielak <jbielak@nvidia.com>
parent 91b754e0
......@@ -146,9 +146,6 @@ void nvte_multi_cast_transpose(size_t num_tensors,
* - `cast_output` is the result of the cast
* - `transposed_output` is the transposed result of the cast.
*
* Calling this function with workspace being an empty tensor will not perform the operation,
* but instead set the shape and type of the workspace tensor to the required values.
*
* \param[in] input Input tensor of shape [N, H].
* \param[in] geglu_input Tensor used as input to the forward of GeGLU operation.
* Shape [N, H * 2].
......
......@@ -43,30 +43,23 @@ inline CUresult call(const char *symbol, ArgTs... args) {
} // namespace transformer_engine
namespace {
/*! \brief Throw exception if CUDA driver call has failed */
inline void check_cuda_driver_(CUresult status) {
if (status != CUDA_SUCCESS) {
const char *description;
transformer_engine::cuda_driver::call("cuGetErrorString", &description);
NVTE_ERROR(transformer_engine::concat_strings("CUDA Error: ", description));
}
}
/*! \brief Call CUDA driver function and throw exception if it fails */
template <typename... ArgTs>
inline void call_and_check_cuda_driver_(const char *symbol,
ArgTs &&... args) {
check_cuda_driver_(transformer_engine::cuda_driver::call(symbol,
std::forward<ArgTs>(args)...));
}
} // namespace
#define NVTE_CHECK_CUDA_DRIVER(ans) { check_cuda_driver_(ans); }
#define NVTE_CALL_CHECK_CUDA_DRIVER(func, ...) \
{ call_and_check_cuda_driver_(#func, __VA_ARGS__); }
#define NVTE_CHECK_CUDA_DRIVER(expr) \
do { \
const CUresult status_NVTE_CHECK_CUDA_DRIVER = (expr); \
if (status_NVTE_CHECK_CUDA_DRIVER != CUDA_SUCCESS) { \
const char *desc_NVTE_CHECK_CUDA_DRIVER; \
::transformer_engine::cuda_driver::call( \
"cuGetErrorString", \
status_NVTE_CHECK_CUDA_DRIVER, \
&desc_NVTE_CHECK_CUDA_DRIVER); \
NVTE_ERROR("CUDA Error: ", desc_NVTE_CHECK_CUDA_DRIVER); \
} \
} while (false)
#define NVTE_CALL_CHECK_CUDA_DRIVER(symbol, ...) \
do { \
NVTE_CHECK_CUDA_DRIVER( \
::transformer_engine::cuda_driver::call(#symbol, __VA_ARGS__)); \
} while (false)
#endif // TRANSFORMER_ENGINE_COMMON_UTIL_CUDA_DRIVER_H_
/*************************************************************************
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See LICENSE for license information.
************************************************************************/
#ifndef TRANSFORMER_ENGINE_COMMON_UTIL_LOGGING_H_
#define TRANSFORMER_ENGINE_COMMON_UTIL_LOGGING_H_
#include <stdexcept>
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <cudnn.h>
#include <nvrtc.h>
#include "../util/string.h"
#define NVTE_ERROR(...) \
do { \
throw ::std::runtime_error( \
::transformer_engine::concat_strings( \
__FILE__ ":", __LINE__, \
" in function ", __func__, ": ", \
::transformer_engine::concat_strings(__VA_ARGS__))); \
} while (false)
#define NVTE_CHECK(expr, ...) \
do { \
if (!(expr)) { \
NVTE_ERROR("Assertion failed: " #expr ". ", \
::transformer_engine::concat_strings(__VA_ARGS__)); \
} \
} while (false)
#define NVTE_CHECK_CUDA(expr) \
do { \
const cudaError_t status_NVTE_CHECK_CUDA = (expr); \
if (status_NVTE_CHECK_CUDA != cudaSuccess) { \
NVTE_ERROR("CUDA Error: ", \
cudaGetErrorString(status_NVTE_CHECK_CUDA)); \
} \
} while (false)
#define NVTE_CHECK_CUBLAS(expr) \
do { \
const cublasStatus_t status_NVTE_CHECK_CUBLAS = (expr); \
if (status_NVTE_CHECK_CUBLAS != CUBLAS_STATUS_SUCCESS) { \
NVTE_ERROR("cuBLAS Error: ", \
cublasGetStatusString(status_NVTE_CHECK_CUBLAS)); \
} \
} while (false)
#define NVTE_CHECK_CUDNN(expr) \
do { \
const cudnnStatus_t status_NVTE_CHECK_CUDNN = (expr); \
if (status_NVTE_CHECK_CUDNN != CUDNN_STATUS_SUCCESS) { \
NVTE_ERROR("cuDNN Error: ", \
cudnnGetErrorString(status_NVTE_CHECK_CUDNN), \
". " \
"For more information, enable cuDNN error logging " \
"by setting CUDNN_LOGERR_DBG=1 and " \
"CUDNN_LOGDEST_DBG=stderr in the environment."); \
} \
} while (false)
#define NVTE_CHECK_NVRTC(expr) \
do { \
const nvrtcResult status_NVTE_CHECK_NVRTC = (expr); \
if (status_NVTE_CHECK_NVRTC != NVRTC_SUCCESS) { \
NVTE_ERROR("NVRTC Error: ", \
nvrtcGetErrorString(status_NVTE_CHECK_NVRTC)); \
} \
} while (false)
#endif // TRANSFORMER_ENGINE_COMMON_UTIL_LOGGING_H_
......@@ -7,19 +7,18 @@
#ifndef TRANSFORMER_ENGINE_JAX_CSRC_FP8_MODULES_H_
#define TRANSFORMER_ENGINE_JAX_CSRC_FP8_MODULES_H_
#include <cuda_runtime_api.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <vector>
#include <cuda_runtime_api.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "transformer_engine/fused_attn.h"
#include "transformer_engine/logging.h"
#include "transformer_engine/transformer_engine.h"
#include "common/util/logging.h"
#include <transformer_engine/fused_attn.h>
#include <transformer_engine/transformer_engine.h>
namespace transformer_engine {
namespace jax {
......
......@@ -7,15 +7,16 @@
#ifndef TRANSFORMER_ENGINE_JAX_CSRC_UTILS_H_
#define TRANSFORMER_ENGINE_JAX_CSRC_UTILS_H_
#include <pybind11/pybind11.h>
#include <cstdint>
#include <numeric>
#include <stdexcept>
#include <string>
#include <type_traits>
#include "transformer_engine/fused_attn.h"
#include "transformer_engine/logging.h"
#include <pybind11/pybind11.h>
#include "common/util/logging.h"
#include <transformer_engine/fused_attn.h>
namespace transformer_engine {
namespace jax {
......
......@@ -5,22 +5,23 @@
************************************************************************/
#pragma once
#include <cstdlib>
#include <vector>
#include <cublasLt.h>
#include "paddle/extension.h"
#include "paddle/phi/backends/all_context.h"
#include "common/util/logging.h"
#include <transformer_engine/activation.h>
#include <transformer_engine/cast.h>
#include <transformer_engine/fused_attn.h>
#include <transformer_engine/gemm.h>
#include <transformer_engine/layer_norm.h>
#include <transformer_engine/logging.h>
#include <transformer_engine/rmsnorm.h>
#include <transformer_engine/softmax.h>
#include <transformer_engine/transformer_engine.h>
#include <transformer_engine/transpose.h>
#include <cstdlib>
#include <vector>
#include "paddle/extension.h"
#include "paddle/phi/backends/all_context.h"
namespace transformer_engine {
namespace paddle_ext {
......
......@@ -6,8 +6,9 @@
#include <cub/cub.cuh>
#include <vector>
#include "../common.h"
#include "common.h"
#include "common/common.h"
namespace transformer_engine {
namespace paddle_ext {
......
......@@ -4,19 +4,22 @@
* See LICENSE for license information.
************************************************************************/
#include "userbuffers/userbuffers.h"
#include <stdio.h>
#include <stdlib.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <cuda.h>
#include <cuda_fp8.h>
#include <stdio.h>
#include <stdlib.h>
#include <torch/cuda.h>
#include <torch/custom_class.h>
#include <torch/extension.h>
#include <torch/types.h>
#include "common/util/logging.h"
#include "userbuffers/userbuffers.h"
#define HALF_BYTES 2
#define UB_MAX_SM 32
......
......@@ -7,39 +7,40 @@
#ifndef TRANSFORMER_ENGINE_PYTORCH_CSRC_COMMON_H_
#define TRANSFORMER_ENGINE_PYTORCH_CSRC_COMMON_H_
#include <transformer_engine/gemm.h>
#include <transformer_engine/layer_norm.h>
#include <transformer_engine/rmsnorm.h>
#include <transformer_engine/transpose.h>
#include <transformer_engine/activation.h>
#include <transformer_engine/logging.h>
#include <transformer_engine/transformer_engine.h>
#include <transformer_engine/cast.h>
#include <transformer_engine/softmax.h>
#include <transformer_engine/fused_attn.h>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <memory>
#include <random>
#include <stdexcept>
#include <vector>
#include <ATen/ATen.h>
#include <ATen/cudnn/Handle.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/macros/Macros.h>
#include <ATen/Dispatch.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAGeneratorImpl.h>
#include <ATen/cuda/CUDAGraphsUtils.cuh>
#include <torch/extension.h>
#include <torch/torch.h>
#include <ATen/cudnn/Handle.h>
#include <ATen/native/DispatchStub.h>
#include <c10/macros/Macros.h>
#include <cublasLt.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_bf16.h>
#include <cublasLt.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include <stdexcept>
#include <memory>
#include <iomanip>
#include <random>
#include <cstring>
#include <vector>
#include <iostream>
#include <torch/extension.h>
#include <torch/torch.h>
#include "common/util/logging.h"
#include <transformer_engine/activation.h>
#include <transformer_engine/cast.h>
#include <transformer_engine/fused_attn.h>
#include <transformer_engine/gemm.h>
#include <transformer_engine/layer_norm.h>
#include <transformer_engine/rmsnorm.h>
#include <transformer_engine/softmax.h>
#include <transformer_engine/transformer_engine.h>
#include <transformer_engine/transpose.h>
namespace transformer_engine {
......
......@@ -5,7 +5,7 @@
************************************************************************/
#include "common.h"
#include "../common.h"
#include "common/common.h"
NVTE_Fused_Attn_Backend get_fused_attn_backend(
const transformer_engine::DType q_dtype,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment