Unverified Commit 9602c2aa authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

keep the parts needed for moe_kernels (#3218)

parent e81d7f11
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <functional>
#include <numeric>
#include <optional>
#include <sstream>
namespace tensorrt_llm::common::stl_utils
{
template <typename TInputIt, typename TOutputIt, typename TBinOp>
constexpr TOutputIt basicInclusiveScan(TInputIt first, TInputIt last, TOutputIt dFirst, TBinOp op)
{
if (first != last)
{
auto val = *first;
while (true)
{
*dFirst = val;
++dFirst;
++first;
if (first == last)
{
break;
}
val = op(std::move(val), *first);
}
}
return dFirst;
}
template <typename TInputIt, typename TOutputIt>
constexpr TOutputIt inclusiveScan(TInputIt first, TInputIt last, TOutputIt dFirst)
{
#if defined(__GNUC__) && __GNUC__ <= 8
return basicInclusiveScan(first, last, dFirst, std::plus<>{});
#else
return std::inclusive_scan(first, last, dFirst);
#endif
}
template <typename TInputIt, typename TOutputIt, typename T, typename TBinOp>
constexpr TOutputIt basicExclusiveScan(TInputIt first, TInputIt last, TOutputIt dFirst, T init, TBinOp op)
{
if (first != last)
{
while (true)
{
T tmp{op(init, *first)};
*dFirst = init;
++dFirst;
++first;
if (first == last)
{
break;
}
init = std::move(tmp);
}
}
return dFirst;
}
template <typename TInputIt, typename TOutputIt, typename T>
constexpr TOutputIt exclusiveScan(TInputIt first, TInputIt last, TOutputIt dFirst, T init)
{
#if defined(__GNUC__) && __GNUC__ <= 8
return basicExclusiveScan(first, last, dFirst, std::move(init), std::plus<>{});
#else
return std::exclusive_scan(first, last, dFirst, std::move(init));
#endif
}
template <typename T, typename = void>
struct HasOperatorOutput : std::false_type
{
};
template <typename T>
struct HasOperatorOutput<T, std::void_t<decltype((std::declval<std::ostream&>() << std::declval<T>()))>>
: std::true_type
{
};
template <typename T>
std::string toString(T const& t, typename std::enable_if_t<HasOperatorOutput<T>::value, int> = 0)
{
std::ostringstream oss;
oss << t;
return oss.str();
}
template <typename T>
std::string toString(std::optional<T> const& t, typename std::enable_if_t<HasOperatorOutput<T>::value, int> = 0)
{
std::ostringstream oss;
if (t)
{
oss << t.value();
}
else
{
oss << "None";
}
return oss.str();
}
} // namespace tensorrt_llm::common::stl_utils
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if ENABLE_BF16
#include <cuda_bf16.h>
#endif // ENABLE_BF16
#include <cuda_fp16.h>
#include <memory> // std::make_unique
#include <sstream> // std::stringstream
#include <string>
#include <unordered_set>
#include <vector>
namespace tensorrt_llm::common
{
#if ENABLE_BF16
static inline std::basic_ostream<char>& operator<<(std::basic_ostream<char>& stream, __nv_bfloat16 const& val)
{
stream << __bfloat162float(val);
return stream;
}
#endif // ENABLE_BF16
static inline std::basic_ostream<char>& operator<<(std::basic_ostream<char>& stream, __half const& val)
{
stream << __half2float(val);
return stream;
}
inline std::string fmtstr(std::string const& s)
{
return s;
}
inline std::string fmtstr(std::string&& s)
{
return s;
}
#if defined(_MSC_VER)
std::string fmtstr(char const* format, ...);
#else
std::string fmtstr(char const* format, ...) __attribute__((format(printf, 1, 2)));
#endif
// __PRETTY_FUNCTION__ is used for neat debugging printing but is not supported on Windows
// The alternative is __FUNCSIG__, which is similar but not identical
#if defined(_WIN32)
#define __PRETTY_FUNCTION__ __FUNCSIG__
#endif
auto constexpr kDefaultDelimiter = ", ";
template <typename U, typename TStream, typename T>
inline TStream& arr2outCasted(TStream& out, T* arr, size_t size, char const* delim = kDefaultDelimiter)
{
out << "(";
if (size > 0)
{
for (size_t i = 0; i < size - 1; ++i)
{
out << static_cast<U>(arr[i]) << delim;
}
out << static_cast<U>(arr[size - 1]);
}
out << ")";
return out;
}
template <typename TStream, typename T>
inline TStream& arr2out(TStream& out, T* arr, size_t size, char const* delim = kDefaultDelimiter)
{
return arr2outCasted<T>(out, arr, size, delim);
}
template <typename T>
inline std::string arr2str(T* arr, size_t size, char const* delim = kDefaultDelimiter)
{
std::stringstream ss;
return arr2out(ss, arr, size, delim).str();
}
template <typename T>
inline std::string vec2str(std::vector<T> const& vec, char const* delim = kDefaultDelimiter)
{
return arr2str(vec.data(), vec.size(), delim);
}
inline bool strStartsWith(std::string const& str, std::string const& prefix)
{
return str.rfind(prefix, 0) == 0;
}
/// @brief Split a string into a set of strings using a delimiter
std::unordered_set<std::string> str2set(std::string const& input, char delimiter);
} // namespace tensorrt_llm::common
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <chrono>
#include <iomanip>
#include <sstream>
#include "tensorrt_llm/common/timestampUtils.h"
namespace tensorrt_llm::common
{
std::string getCurrentTimestamp()
{
auto now = std::chrono::system_clock::now();
auto now_t = std::chrono::system_clock::to_time_t(now);
auto tm = *std::localtime(&now_t);
auto epoch_to_now = now.time_since_epoch();
auto seconds = std::chrono::duration_cast<std::chrono::seconds>(epoch_to_now);
auto us = std::chrono::duration_cast<std::chrono::microseconds>(epoch_to_now - seconds);
std::ostringstream stream;
stream << std::put_time(&tm, "%m-%d-%Y %H:%M:%S");
stream << "." << std::setfill('0') << std::setw(6) << us.count();
return stream.str();
}
} // namespace tensorrt_llm::common
......@@ -14,12 +14,35 @@
* limitations under the License.
*/
#pragma once
#include <array>
#include <cstddef>
#include <stdexcept>
#include <string>
#define NEW_TLLM_EXCEPTION(...) \
tensorrt_llm::common::TllmException(__FILE__, __LINE__, tensorrt_llm::common::fmtstr(__VA_ARGS__))
namespace tensorrt_llm::common
{
/// @brief Get the current timestamp in the format "MM-DD-YYYY HH:MM:SS:uuuuuu"
std::string getCurrentTimestamp();
class TllmException : public std::runtime_error
{
public:
static auto constexpr MAX_FRAMES = 128;
explicit TllmException(char const* file, std::size_t line, std::string const& msg);
~TllmException() noexcept override;
[[nodiscard]] std::string getTrace() const;
static std::string demangle(char const* name);
private:
std::array<void*, MAX_FRAMES> mCallstack{};
int mNbFrames;
};
} // namespace tensorrt_llm::common
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment