// Copyright (c) OpenMMLab. All rights reserved. #pragma once #include "src/fastertransformer/utils/Tensor.h" #include #include #include #include namespace fastertransformer { enum QuantPolicy { kNone = 0x00, // reserve 0x01 and 0x02 for backward compatibility kReserve1 = 0x01, kReserve2 = 0x02, // quantize cache kv kCacheKVInt8 = 0x04, }; enum CmpMode { kCmpNone, kCmpRead, kCmpWrite, }; extern CmpMode compare_mode; template void Compare(T* ptr, size_t size, std::string key, CmpMode mode, cudaStream_t stream); template void CheckNan(const T* ptr, size_t size, std::string key, cudaStream_t stream); namespace detail { template std::string to_string(T x) { return std::to_string(x); } inline std::string to_string(std::string x) { return x; } } // namespace detail template std::string Concat(std::string key, Args&&... args) { std::vector args_str{detail::to_string((Args &&) args)...}; for (const auto& s : args_str) { key.append("_"); key.append(s); } return key; } std::string format(const std::pair& p); size_t curandStateGetSize(); bool isDebug(); } // namespace fastertransformer