Commit 546b4279 authored by limm's avatar limm
Browse files

add csrc and mmdeploy module

parent 502f4fb9
Pipeline #2810 canceled with stages
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CORE_STACKTRACE_H_
#define MMDEPLOY_SRC_CORE_STACKTRACE_H_
#include <memory>
#include <string>
namespace mmdeploy {
class Stacktrace {
public:
~Stacktrace();
Stacktrace() noexcept;
explicit Stacktrace(int);
Stacktrace& operator=(const Stacktrace&);
Stacktrace& operator=(Stacktrace&& other) noexcept;
Stacktrace(const Stacktrace&);
Stacktrace(Stacktrace&&) noexcept;
std::string to_string() const;
private:
struct Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace mmdeploy
#endif // MMDEPLOY_SRC_CORE_STACKTRACE_H_
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_TYPES_VALUE_H_
#define MMDEPLOY_TYPES_VALUE_H_
#include <cassert>
#include <cstdint>
#include <iostream>
#include <map>
#include <memory>
#include <type_traits>
#include <variant>
#include <vector>
#include "mmdeploy/core/logger.h"
#include "mmdeploy/core/mpl/priority_tag.h"
#include "mmdeploy/core/mpl/static_any.h"
#include "mmdeploy/core/mpl/type_traits.h"
#include "mmdeploy/core/status_code.h"
namespace mmdeploy {
enum class ValueType : int {
kNull = 0,
kBool,
kInt,
kUInt,
kFloat,
kString,
kBinary,
kArray,
kObject,
kPointer,
kDynamic,
kAny,
};
class Value;
#if __GNUC__ >= 8
using Byte = std::byte;
#else
enum class Byte : unsigned char {};
#endif
namespace detail {
class ValueRef;
}
template <typename T>
class ValueIterator {
public:
using value_type = Value;
using difference_type = std::ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;
using iterator_category = std::bidirectional_iterator_tag;
using object_iterator_t = typename T::Object::iterator;
using array_iterator_t = typename T::Array::iterator;
ValueIterator() = default;
ValueIterator(T* value, object_iterator_t iter) : value_(value), object_iter_(iter) {}
ValueIterator(T* value, array_iterator_t iter) : value_(value), array_iter_(iter) {}
ValueIterator& operator++() {
if (value_->is_array()) {
++array_iter_;
} else {
++object_iter_;
}
return *this;
}
ValueIterator operator++(int) {
auto it = *this;
++(*this);
return it;
}
T& operator*() {
if (value_->is_array()) {
return *array_iter_;
} else {
return object_iter_->second;
}
}
const T& operator*() const {
if (value_->is_array()) {
return *array_iter_;
} else {
return object_iter_->second;
}
}
T* operator->() {
if (value_->is_array()) {
return &(*array_iter_);
} else {
return &object_iter_->second;
}
}
const T* operator->() const {
if (value_->is_array()) {
return &(*array_iter_);
} else {
return &object_iter_->second;
}
}
const std::string& key() {
if (value_->is_object()) {
return object_iter_->first;
}
throw_exception(eInvalidArgument);
}
bool operator==(const ValueIterator& other) const {
return value_ == other.value_ && object_iter_ == other.object_iter_ &&
array_iter_ == other.array_iter_;
}
bool operator!=(const ValueIterator& other) const { return !(*this == other); }
private:
T* value_{};
object_iterator_t object_iter_{};
array_iterator_t array_iter_{};
};
class Dynamic;
class Value;
template <class T>
struct EraseType {
T value;
};
template <class T>
struct ArchiveType {
T value;
};
template <class T>
EraseType<T&&> cast_by_erasure(T&& v) {
return {std::forward<T>(v)};
}
template <class T>
ArchiveType<T&&> cast_by_archive(T&& v) {
return {std::forward<T>(v)};
}
template <typename T>
struct is_value : std::is_same<T, Value> {};
template <typename T>
inline constexpr bool is_value_v = is_value<T>::value;
namespace detail {
template <typename T>
struct is_pointer_to_const : std::false_type {};
template <typename T>
struct is_pointer_to_const<const T*> : std::true_type {};
template <typename T>
struct is_const_reference : std::false_type {};
template <typename T>
struct is_const_reference<const T&> : std::true_type {};
} // namespace detail
class Value {
public:
using value_type = Value;
using reference = value_type&;
using const_reference = const value_type&;
using difference_type = std::ptrdiff_t;
using size_type = std::size_t;
using pointer = value_type*;
using const_pointer = const value_type*;
using iterator = ValueIterator<Value>;
using const_iterator = ValueIterator<const Value>;
using Type = ValueType;
using Boolean = bool;
using Integer = int64_t;
using Unsigned = uint64_t;
using Float = double;
using String = std::string;
using Binary = std::vector<Byte>;
using Array = std::vector<Value>;
using Object = std::map<std::string, Value>;
using Pointer = std::shared_ptr<Value>;
using Dynamic = ::mmdeploy::Dynamic;
using Any = ::mmdeploy::StaticAny;
using ValueRef = detail::ValueRef;
static constexpr const auto kNull = ValueType::kNull;
static constexpr const auto kBool = ValueType::kBool;
static constexpr const auto kInt = ValueType::kInt;
static constexpr const auto kUInt = ValueType::kUInt;
static constexpr const auto kFloat = ValueType::kFloat;
static constexpr const auto kString = ValueType::kString;
static constexpr const auto kBinary = ValueType::kBinary;
static constexpr const auto kArray = ValueType::kArray;
static constexpr const auto kObject = ValueType::kObject;
static constexpr const auto kPointer = ValueType::kPointer;
static constexpr const auto kDynamic = ValueType::kDynamic;
static constexpr const auto kAny = ValueType::kAny;
Value(const ValueType v) : type_(v), data_(v) {}
Value(std::nullptr_t = nullptr) noexcept : Value(ValueType::kNull) {}
template <typename T, std::enable_if_t<std::is_same_v<T, ValueRef>, int> = 0>
Value(const T& ref) : Value(ref.moved_or_copied()) {}
Value(const Value& other) : type_(other.type_) {
switch (type_) {
case ValueType::kNull:
break;
case ValueType::kBool:
data_ = other.data_.boolean;
break;
case ValueType::kInt:
data_ = other.data_.number_integer;
break;
case ValueType::kUInt:
data_ = other.data_.number_unsigned;
break;
case ValueType::kFloat:
data_ = other.data_.number_float;
break;
case ValueType::kString:
data_ = *other.data_.string;
break;
case ValueType::kBinary:
data_ = *other.data_.binary;
break;
case ValueType::kArray:
data_ = *other.data_.array;
break;
case ValueType::kObject:
data_ = *other.data_.object;
break;
case ValueType::kPointer:
data_ = *other.data_.pointer;
break;
case ValueType::kAny:
data_.any = create<Any>(*other.data_.any);
break;
default:
throw_exception(eInvalidArgument);
}
}
template <class T, std::enable_if_t<std::is_same<std::decay_t<T>, bool>::value, bool> = true>
Value(T&& value) : type_(kBool), data_(Boolean{value}) {}
Value(int8_t value) : type_(kInt), data_(Integer{value}) {}
Value(int16_t value) : type_(kInt), data_(Integer{value}) {}
Value(int32_t value) : type_(kInt), data_(Integer{value}) {}
Value(int64_t value) : type_(kInt), data_(Integer{value}) {}
Value(uint8_t value) : type_(kUInt), data_(Unsigned{value}) {}
Value(uint16_t value) : type_(kUInt), data_(Unsigned{value}) {}
Value(uint32_t value) : type_(kUInt), data_(Unsigned{value}) {}
Value(uint64_t value) : type_(kUInt), data_(Unsigned{value}) {}
Value(float value) : type_(kFloat), data_(Float{value}) {}
Value(double value) : type_(kFloat), data_(Float{value}) {}
Value(Binary value) : type_(kBinary), data_(std::move(value)) {}
Value(Array value) : type_(kArray), data_(std::move(value)) {}
Value(Object value) : type_(kObject), data_(std::move(value)) {}
Value(Pointer value) : type_(kPointer), data_(std::move(value)) {}
template <class T, std::enable_if_t<std::is_constructible<String, T>::value, bool> = true>
Value(T&& value) : type_(kString), data_(String{std::forward<T>(value)}) {}
template <typename T, std::enable_if_t<is_cast_by_erasure<std::decay_t<T>>::value, bool> = true>
Value(T&& value) : Value(cast_by_erasure(std::forward<T>(value))) {}
template <typename T>
Value(EraseType<T>&& value) : type_(Type::kAny) {
data_.any = create<Any>(std::forward<T>(value.value));
}
Value(std::initializer_list<ValueRef> init, bool type_deduction = true,
Type manual_type = Type::kArray);
Value(Value&& other) noexcept : type_(other.type_), data_(other.data_) {
other.type_ = ValueType::kNull;
other.data_ = {};
}
// copy-and-swap
Value& operator=(Value other) noexcept {
using std::swap;
swap(type_, other.type_);
swap(data_, other.data_);
return *this;
}
~Value() { data_.destroy(type_); }
operator Type() const noexcept { return type(); }
Type type() const noexcept { return _unwrap().type_; }
bool is_null() const noexcept { return _unwrap()._is_null(); }
bool is_array() const noexcept { return _unwrap()._is_array(); }
bool is_object() const noexcept { return _unwrap()._is_object(); }
template <typename T = void>
bool is_any() const noexcept {
return _unwrap()._is_any<T>();
}
bool is_boolean() const noexcept { return _unwrap()._is_boolean(); }
bool is_string() const noexcept { return _unwrap()._is_string(); }
bool is_binary() const noexcept { return _unwrap()._is_binary(); }
bool is_number() const noexcept { return _unwrap()._is_number(); }
bool is_number_integer() const noexcept { return _unwrap()._is_number_integer(); }
bool is_number_unsigned() const noexcept { return _unwrap()._is_number_unsigned(); }
bool is_number_float() const noexcept { return _unwrap()._is_number_float(); }
bool is_pointer() const noexcept { return _is_pointer(); }
size_t size() const noexcept { return _unwrap()._size(); }
bool empty() const noexcept { return _unwrap()._empty(); }
private:
constexpr Type _type() const noexcept { return type_; }
constexpr bool _is_null() const noexcept { return type_ == Type::kNull; }
constexpr bool _is_array() const noexcept { return type_ == Type::kArray; }
constexpr bool _is_object() const noexcept { return type_ == Type::kObject; }
template <typename T = void>
constexpr bool _is_any() const noexcept {
if (type_ != Type::kAny) {
return false;
}
if constexpr (std::is_void_v<T>) {
return true;
} else {
return traits::TypeId<T>::value == data_.any->type();
}
}
constexpr bool _is_boolean() const noexcept { return type_ == Type::kBool; }
constexpr bool _is_string() const noexcept { return type_ == Type::kString; }
constexpr bool _is_binary() const noexcept { return type_ == Type::kBinary; }
constexpr bool _is_number() const noexcept { return _is_number_integer() || _is_number_float(); }
constexpr bool _is_number_integer() const noexcept {
return type_ == Type::kInt || type_ == Type::kUInt;
}
constexpr bool _is_number_unsigned() const noexcept { return type_ == Type::kUInt; }
constexpr bool _is_number_float() const noexcept { return type_ == Type::kFloat; }
constexpr bool _is_pointer() const noexcept { return type_ == Type::kPointer; }
size_t _size() const noexcept {
switch (_type()) {
case ValueType::kNull:
return 0;
case ValueType::kArray:
return data_.array->size();
case ValueType::kObject:
return data_.object->size();
default:
return 1;
}
}
bool _empty() const noexcept {
switch (_type()) {
case Type::kNull:
return true;
case Type::kArray:
return data_.array->empty();
case Type::kObject:
return data_.object->empty();
default:
return false;
}
}
private:
Boolean* get_impl_ptr(Boolean*) noexcept { return _is_boolean() ? &data_.boolean : nullptr; }
const Boolean* get_impl_ptr(const Boolean*) const noexcept {
return _is_boolean() ? &data_.boolean : nullptr;
}
Integer* get_impl_ptr(Integer*) noexcept {
return _is_number_integer() ? &data_.number_integer : nullptr;
}
const Integer* get_impl_ptr(const Integer*) const noexcept {
return _is_number_integer() ? &data_.number_integer : nullptr;
}
Unsigned* get_impl_ptr(Unsigned*) noexcept {
return _is_number_unsigned() ? &data_.number_unsigned : nullptr;
}
const Unsigned* get_impl_ptr(const Unsigned*) const noexcept {
return _is_number_unsigned() ? &data_.number_unsigned : nullptr;
}
Float* get_impl_ptr(Float*) noexcept {
return _is_number_float() ? &data_.number_float : nullptr;
}
const Float* get_impl_ptr(const Float*) const noexcept {
return _is_number_float() ? &data_.number_float : nullptr;
}
String* get_impl_ptr(String*) noexcept { return _is_string() ? data_.string : nullptr; }
const String* get_impl_ptr(const String*) const noexcept {
return _is_string() ? data_.string : nullptr;
}
Binary* get_impl_ptr(Binary*) noexcept { return _is_binary() ? data_.binary : nullptr; }
const Binary* get_impl_ptr(const Binary*) const noexcept {
return _is_binary() ? data_.binary : nullptr;
}
Array* get_impl_ptr(Array*) noexcept { return _is_array() ? data_.array : nullptr; }
const Array* get_impl_ptr(const Array*) const noexcept {
return _is_array() ? data_.array : nullptr;
}
Object* get_impl_ptr(Object*) noexcept { return _is_object() ? data_.object : nullptr; }
const Object* get_impl_ptr(const Object*) const noexcept {
return _is_object() ? data_.object : nullptr;
}
Pointer* get_impl_ptr(Pointer*) noexcept { return _is_pointer() ? data_.pointer : nullptr; }
const Pointer* get_impl_ptr(const Pointer*) const noexcept {
return _is_pointer() ? data_.pointer : nullptr;
}
Any* get_impl_ptr(Any*) noexcept { return _is_any() ? data_.any : nullptr; }
const Any* get_impl_ptr(const Any*) const noexcept { return _is_any() ? data_.any : nullptr; }
template <typename T>
T* get_erased_ptr(EraseType<T>*) noexcept {
return _is_any() ? static_any_cast<T>(data_.any) : nullptr;
}
template <typename T>
const T* get_erased_ptr(const EraseType<T>*) const noexcept {
return _is_any() ? static_any_cast<T>(const_cast<const Any*>(data_.any)) : nullptr;
}
template <typename T, typename This>
static auto get_ref_impl(This& obj)
-> decltype((*obj.template get_ptr<std::add_pointer_t<T>>())) {
auto p = obj.template get_ptr<std::add_pointer_t<T>>();
if (p) {
return *p;
}
throw_exception(eInvalidArgument);
}
template <typename T, std::enable_if_t<std::is_pointer<T>::value, bool> = true>
auto _get_ptr() noexcept -> decltype(std::declval<Value&>().get_impl_ptr(std::declval<T>())) {
return get_impl_ptr(static_cast<T>(nullptr));
}
template <typename T, std::enable_if_t<detail::is_pointer_to_const<T>::value, bool> = true>
auto _get_ptr() const noexcept
-> decltype(std::declval<const Value&>().get_impl_ptr(std::declval<T>())) {
return get_impl_ptr(static_cast<T>(nullptr));
}
template <typename T, std::enable_if_t<std::is_pointer<T>::value, bool> = true>
auto _get_ptr() noexcept -> decltype(std::declval<Value&>().get_erased_ptr(std::declval<T>())) {
return get_erased_ptr(static_cast<T>(nullptr));
}
template <typename T, std::enable_if_t<detail::is_pointer_to_const<T>::value, bool> = true>
auto _get_ptr() const noexcept
-> decltype(std::declval<const Value&>().get_erased_ptr(std::declval<T>())) {
return get_erased_ptr(static_cast<T>(nullptr));
}
// T* -> EraseType<T>*
template <
typename T, typename T0 = std::remove_pointer_t<T>,
std::enable_if_t<std::is_pointer<T>::value && is_cast_by_erasure<T0>::value, bool> = true>
auto _get_ptr() noexcept
-> decltype(std::declval<Value&>().get_erased_ptr(std::declval<EraseType<T0>*>())) {
return get_erased_ptr(static_cast<EraseType<T0>*>(nullptr));
}
// const T* -> const EraseType<T>*
template <typename T, typename T0 = std::remove_const_t<std::remove_pointer_t<T>>,
std::enable_if_t<detail::is_pointer_to_const<T>::value && is_cast_by_erasure<T0>::value,
bool> = true>
auto _get_ptr() const noexcept
-> decltype(std::declval<Value&>().get_erased_ptr(std::declval<const EraseType<T0>*>())) {
return get_erased_ptr(static_cast<const EraseType<T0>*>(nullptr));
}
template <typename T>
static auto test_get_ptr(T) -> decltype(std::declval<Value&>()._get_ptr<T>(), std::true_type{});
static std::false_type test_get_ptr(...);
template <typename T>
using has_get_ptr = decltype(test_get_ptr(std::declval<std::add_pointer_t<T>>()));
template <typename T, std::enable_if_t<std::is_reference<T>::value, bool> = true>
auto _get_ref() -> decltype((get_ref_impl<T>(std::declval<Value&>()))) {
return get_ref_impl<T>(*this);
}
template <typename T, std::enable_if_t<detail::is_const_reference<T>::value, bool> = true>
auto _get_ref() const -> decltype((get_ref_impl<T>(std::declval<Value&>()))) {
return get_ref_impl<T>(*this);
}
template <typename T,
std::enable_if_t<std::is_same<std::remove_const_t<T>, Value>::value, bool> = true>
Value _get() const {
return *this;
}
template <typename T,
std::enable_if_t<!std::is_arithmetic<T>::value && has_get_ptr<T>::value, bool> = true>
auto _get() const
-> std::remove_reference_t<decltype(std::declval<Value&>()._get_ref<const T&>())> {
return get_ref<const T&>();
}
template <typename T, std::enable_if_t<std::is_arithmetic<T>::value, bool> = true>
T _get() const {
switch (_type()) {
case kInt:
return static_cast<T>(*_get_ptr<const Integer*>());
case kUInt:
return static_cast<T>(*_get_ptr<const Unsigned*>());
case kFloat:
return static_cast<T>(*_get_ptr<const Float*>());
case kBool:
return static_cast<T>(*_get_ptr<const Boolean*>());
default:
throw_exception(eInvalidArgument);
}
}
template <typename T, std::enable_if_t<std::is_same<T, const char*>::value, bool> = true>
const char* _get() const {
if (_is_string()) {
return data_.string->c_str();
}
throw_exception(eInvalidArgument);
}
template <typename T>
T& _get_to(T& v) const {
v = get<T>();
return v;
}
public:
template <typename T>
auto get_ptr() noexcept -> decltype(std::declval<Value&>()._get_ptr<T>()) {
return _unwrap()._get_ptr<T>();
}
template <typename T>
auto get_ptr() const noexcept -> decltype(std::declval<const Value&>()._get_ptr<T>()) {
return _unwrap()._get_ptr<T>();
}
template <typename T>
auto get_ref() -> decltype((std::declval<Value&>()._get_ref<T>())) {
return _unwrap()._get_ref<T>();
}
template <typename T>
auto get_ref() const -> decltype((std::declval<const Value&>()._get_ref<T>())) {
return _unwrap()._get_ref<T>();
}
template <typename T>
auto get() -> decltype(std::declval<Value&>()._get<T>()) {
return _unwrap()._get<T>();
}
template <typename T>
auto get() const -> decltype(std::declval<const Value&>()._get<T>()) {
return _unwrap()._get<T>();
}
template <typename T>
auto get_to(T& v) const -> decltype((std::declval<const Value&>()._get_to(v))) {
return _unwrap()._get_to(v);
}
Array& array() & { return get_ref<Array&>(); }
Array&& array() && { return static_cast<Array&&>(get_ref<Array&>()); }
const Array& array() const& { return get_ref<const Array&>(); }
const Array&& array() const&& { return static_cast<const Array&&>(get_ref<const Array&>()); }
Object& object() & { return get_ref<Object&>(); }
Object&& object() && { return static_cast<Object&&>(get_ref<Object&>()); }
const Object& object() const& { return get_ref<const Object&>(); }
const Object&& object() const&& { return static_cast<const Object&&>(get_ref<const Object&>()); }
value_type& operator[](size_t idx) & {
return static_cast<value_type&>(_unwrap()._subscript(idx));
}
value_type&& operator[](size_t idx) && {
return static_cast<value_type&&>(_unwrap()._subscript(idx));
}
const value_type& operator[](size_t idx) const& {
return static_cast<const value_type&>(_unwrap()._subscript(idx));
}
const value_type&& operator[](size_t idx) const&& {
return static_cast<const value_type&&>(_unwrap()._subscript(idx));
}
value_type& operator[](const Object::key_type& idx) & {
return static_cast<value_type&>(_unwrap()._subscript(idx));
}
value_type&& operator[](const Object::key_type& idx) && {
return static_cast<value_type&&>(_unwrap()._subscript(idx));
}
const value_type& operator[](const Object::key_type& idx) const& {
return static_cast<const value_type&>(_unwrap()._subscript(idx));
}
const value_type&& operator[](const Object::key_type& idx) const&& {
return static_cast<const value_type&&>(_unwrap()._subscript(idx));
}
reference front() { return _unwrap()._front(); }
const_reference front() const { return _unwrap()._front(); }
reference back() { return _unwrap()._back(); }
const_reference back() const { return _unwrap()._back(); }
void push_back(Value&& val) { _unwrap()._push_back(std::move(val)); }
void push_back(const Value& val) { _unwrap()._push_back(val); }
template <typename Key>
bool contains(Key&& key) const {
return _unwrap()._contains(std::forward<Key>(key));
}
template <typename Key>
iterator find(Key&& key) {
return _unwrap()._find(std::forward<Key>(key));
}
template <typename Key>
const_iterator find(Key&& key) const {
return _unwrap()._find(std::forward<Key>(key));
}
template <typename T>
T value(const typename Object::key_type& key, const T& default_value) const {
return _unwrap()._value(key, default_value);
}
iterator begin() { return _unwrap()._begin(); }
iterator end() { return _unwrap()._end(); }
const_iterator begin() const { return _unwrap()._begin(); }
const_iterator end() const { return _unwrap()._end(); }
void update(const_reference v) { return _unwrap()._update(v); }
private:
reference _front() {
if (_is_array()) {
return (*data_.array).front();
}
throw_exception(eInvalidArgument);
}
const_reference _front() const {
if (_is_array()) {
return (*data_.array).front();
}
throw_exception(eInvalidArgument);
}
reference _back() {
if (_is_array()) {
return (*data_.array).back();
}
throw_exception(eInvalidArgument);
}
const_reference _back() const {
if (_is_array()) {
return (*data_.array).back();
}
throw_exception(eInvalidArgument);
}
void _push_back(Value&& val) {
if (!(_is_null() || _is_array())) {
throw_exception(eInvalidArgument);
}
if (_is_null()) {
*this = Type::kArray;
}
data_.array->push_back(std::move(val));
}
void _push_back(const Value& val) {
if (!(_is_null() || _is_array())) {
throw_exception(eInvalidArgument);
}
if (_is_null()) {
*this = Type::kArray;
}
data_.array->push_back(val);
}
template <typename Key>
bool _contains(Key&& key) const {
return _is_object() && data_.object->find(std::forward<Key>(key)) != data_.object->end();
}
template <typename Key>
iterator _find(Key&& key) {
if (_is_object()) {
auto iter = data_.object->find(std::forward<Key>(key));
return {this, iter};
}
throw_exception(eInvalidArgument);
}
template <typename Key>
const_iterator _find(Key&& key) const {
if (_is_object()) {
auto iter = data_.object->find(std::forward<Key>(key));
return {this, iter};
}
throw_exception(eInvalidArgument);
}
template <typename T>
T _value(const typename Object::key_type& key, const T& default_value) const {
if (_is_object()) {
const auto it = _find(key);
if (it != _end()) {
return (*it)._get<T>();
}
return default_value;
}
throw_exception(eInvalidArgument);
}
iterator _begin() {
if (_is_array()) {
return {this, data_.array->begin()};
} else if (_is_object()) {
return {this, data_.object->begin()};
} else {
throw_exception(eInvalidArgument);
}
}
iterator _end() {
if (_is_array()) {
return {this, data_.array->end()};
} else if (_is_object()) {
return {this, data_.object->end()};
} else {
throw_exception(eInvalidArgument);
}
}
const_iterator _begin() const {
if (_is_array()) {
return {this, data_.array->begin()};
} else if (_is_object()) {
return {this, data_.object->begin()};
} else {
throw_exception(eInvalidArgument);
}
}
const_iterator _end() const {
if (_is_array()) {
return {this, data_.array->end()};
} else if (_is_object()) {
return {this, data_.object->end()};
} else {
throw_exception(eInvalidArgument);
}
}
void _update(const_reference v) {
if (_is_null()) {
type_ = ValueType::kObject;
data_.object = create<Object>();
}
if (!(_is_object() && v._is_object())) {
throw_exception(eInvalidArgument);
}
for (auto it = v._begin(); it != v._end(); ++it) {
data_.object->operator[](it.key()) = *it;
}
}
Value& _unwrap() {
auto p = this;
while (p->_is_pointer() && *p->data_.pointer) {
p = p->data_.pointer->get();
}
return *p;
}
const Value& _unwrap() const {
auto p = this;
while (p->_is_pointer() && *p->data_.pointer) {
p = p->data_.pointer->get();
}
return *p;
}
private:
template <typename T, typename... Args>
static T* create(Args&&... args) {
return new T(std::forward<Args>(args)...);
}
template <typename T>
static void release(T* ptr) {
delete ptr;
}
value_type& _subscript(size_t idx) {
if (_is_array()) {
return (*data_.array)[idx];
}
throw_exception(eInvalidArgument);
}
const value_type& _subscript(size_t idx) const {
if (_is_array()) {
return (*data_.array)[idx];
}
throw_exception(eInvalidArgument);
}
reference _subscript(const Object::key_type& key) {
if (_is_null()) {
type_ = Type::kObject;
data_.object = create<Object>();
}
if (_is_object()) {
return (*data_.object)[key];
}
throw_exception(eInvalidArgument);
}
const_reference _subscript(const Object::key_type& key) const {
if (_is_object()) {
return (*data_.object)[key];
}
throw_exception(eInvalidArgument);
}
private:
union ValueData {
Boolean boolean;
Integer number_integer;
Unsigned number_unsigned;
Float number_float;
String* string;
Binary* binary;
Array* array;
Object* object;
Dynamic* dynamic;
Pointer* pointer;
Any* any;
ValueData() = default;
ValueData(Boolean v) noexcept : boolean(v) {}
ValueData(Integer v) noexcept : number_integer(v) {}
ValueData(Unsigned v) noexcept : number_unsigned(v) {}
ValueData(Float v) noexcept : number_float(v) {}
ValueData(Type type) {
switch (type) {
case Type::kBool:
boolean = Boolean{};
break;
case Type::kInt:
number_integer = Integer{};
break;
case Type::kUInt:
number_unsigned = Unsigned{};
break;
case Type::kFloat:
number_float = Float{};
break;
case Type::kString:
string = create<String>();
break;
case Type::kBinary:
binary = create<Binary>();
break;
case Type::kArray:
array = create<Array>();
break;
case Type::kObject:
object = create<Object>();
break;
case Type::kPointer:
pointer = create<Pointer>();
break;
case Type::kAny:
any = create<Any>();
break;
case Type::kNull:
object = nullptr;
break;
default:
throw_exception(eNotSupported);
}
}
ValueData(const String& value) { string = create<String>(value); }
ValueData(String&& value) { string = create<String>(std::move(value)); }
ValueData(const Binary& value) { binary = create<Binary>(value); }
ValueData(Binary&& value) { binary = create<Binary>(std::move(value)); }
ValueData(const Object& value) { object = create<Object>(value); }
ValueData(Object&& value) { object = create<Object>(std::move(value)); }
ValueData(const Array& value) { array = create<Array>(value); }
ValueData(Array&& value) { array = create<Array>(std::move(value)); }
ValueData(const Pointer& value) { pointer = create<Pointer>(value); }
ValueData(Pointer&& value) { pointer = create<Pointer>(std::move(value)); }
// nlohmann/json used an iterative implementation
void destroy(ValueType t) {
switch (t) {
case ValueType::kString:
release(string);
break;
case ValueType::kBinary:
release(binary);
break;
case ValueType::kArray:
release(array);
break;
case ValueType::kObject:
release(object);
break;
case ValueType::kPointer:
release(pointer);
break;
case ValueType::kAny:
release(any);
break;
default:
break;
}
}
};
ValueType type_ = ValueType::kNull;
ValueData data_ = {};
};
namespace detail {
class ValueRef {
public:
ValueRef(Value&& value)
: owned_value_(std::move(value)), value_ref_(&owned_value_), is_rvalue_(true) {}
ValueRef(const Value& value) : value_ref_(const_cast<Value*>(&value)), is_rvalue_(false) {}
ValueRef(std::initializer_list<ValueRef> init)
: owned_value_(init), value_ref_(&owned_value_), is_rvalue_(true) {}
template <typename... Args, std::enable_if_t<std::is_constructible_v<Value, Args...>, int> = 0>
ValueRef(Args&&... args)
: owned_value_(std::forward<Args>(args)...), value_ref_(&owned_value_), is_rvalue_(true) {}
ValueRef(ValueRef&&) = default;
ValueRef(const ValueRef&) = delete;
ValueRef& operator=(const ValueRef&) = delete;
ValueRef& operator=(ValueRef&&) = delete;
~ValueRef() = default;
Value moved_or_copied() const {
if (is_rvalue_) {
return std::move(*value_ref_);
}
return *value_ref_;
}
const Value& operator*() const { return *static_cast<const Value*>(value_ref_); }
const Value* operator->() const { return static_cast<const Value*>(value_ref_); }
private:
mutable Value owned_value_;
Value* value_ref_ = nullptr;
const bool is_rvalue_ = true;
};
} // namespace detail
inline Value::Value(std::initializer_list<ValueRef> init, bool type_deduction, Type manual_type) {
bool is_an_object = true;
for (const auto& x : init) {
if (!(x->_is_array() && x->_size() == 2 && x->_front()._is_string())) {
is_an_object = false;
break;
}
}
if (!type_deduction) {
if (manual_type == Type::kArray) {
is_an_object = false;
}
if (manual_type == Type::kObject && !is_an_object) {
throw_exception(eInvalidArgument);
}
}
if (is_an_object) {
type_ = Type::kObject;
data_ = Type::kObject;
for (const auto& x : init) {
auto e = x.moved_or_copied();
data_.object->emplace(std::move(*((*e.data_.array)[0].data_.string)),
std::move((*e.data_.array)[1]));
}
} else {
type_ = Type::kArray;
data_.array = create<Array>(init.begin(), init.end());
}
}
inline Value make_pointer(Value v) { return std::make_shared<Value>(std::move(v)); }
inline void update(Value::Object& dst, const Value::Object& src, int depth) {
if (depth < 0) {
return;
}
for (const auto& [key, value] : src) {
auto ret = dst.insert({key, value});
if (!ret.second && ret.first->second.is_object() && value.is_object()) {
update(ret.first->second.object(), value.object(), depth - 1);
}
}
}
} // namespace mmdeploy
#endif // MMDEPLOY_TYPES_VALUE_H_
# Copyright (c) OpenMMLab. All rights reserved.
add_subdirectory(cpu)
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
add_subdirectory(cuda)
endif ()
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(acl)
endif ()
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_acl_device)
file(GLOB_RECURSE SRCS "*.cpp")
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/core/device_impl.h"
namespace mmdeploy::framework {
class AclPlatformRegisterer {
public:
AclPlatformRegisterer() { gPlatformRegistry().AddAlias("npu", "cpu"); }
};
AclPlatformRegisterer g_acl_platform_registerer;
} // namespace mmdeploy::framework
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_cpu_device)
file(GLOB_RECURSE SRCS "*.cpp")
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
target_link_libraries(${PROJECT_NAME} PRIVATE Threads::Threads)
add_library(mmdeploy::device::cpu ALIAS ${PROJECT_NAME})
// Copyright (c) OpenMMLab. All rights reserved.
#include "cpu_device.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
namespace mmdeploy::framework {
class CpuHostMemory : public NonCopyable {
public:
CpuHostMemory() : size_(), data_(), owned_data_{false} {}
Result<void> Init(size_t size, size_t alignment) {
alignment = std::max(alignment, sizeof(void*));
auto space = (size + alignment - 1) / alignment * alignment;
#ifdef _MSC_VER
data_ = _aligned_malloc(space, alignment);
#elif defined(ANDROID)
posix_memalign(&data_, alignment, space);
#else
data_ = aligned_alloc(alignment, space);
#endif
if (!data_) {
return Status(eOutOfMemory);
}
aligned_data_ = data_;
size_ = size;
owned_data_ = true;
return success();
}
Result<void> Init(size_t size, std::shared_ptr<void> data) {
size_ = size;
external_ = std::move(data);
data_ = external_.get();
owned_data_ = false;
return success();
}
Result<void> Init(size_t size, void* data) {
size_ = size;
data_ = data;
owned_data_ = false;
return success();
}
~CpuHostMemory() {
if (data_) {
if (owned_data_) {
#ifdef _MSC_VER
_aligned_free(data_);
#else
std::free(data_);
#endif
owned_data_ = false;
}
data_ = nullptr;
}
external_.reset();
size_ = 0;
}
size_t size() const { return size_; }
void* data() const { return owned_data_ ? aligned_data_ : data_; }
private:
size_t size_;
void* data_;
void* aligned_data_{nullptr};
bool owned_data_;
std::shared_ptr<void> external_;
};
////////////////////////////////////////////////////////////////////////////////
/// CpuPlatformImpl
Result<void> CpuPlatformImpl::BindDevice(Device device, Device* prev) {
// do nothing
if (prev) {
*prev = device;
}
return success();
}
shared_ptr<BufferImpl> CpuPlatformImpl::CreateBuffer(Device device) {
return std::make_shared<CpuBufferImpl>(device);
}
shared_ptr<StreamImpl> CpuPlatformImpl::CreateStream(Device device) {
return std::make_shared<CpuStreamImpl>(device);
}
shared_ptr<EventImpl> CpuPlatformImpl::CreateEvent(Device device) {
return std::make_shared<CpuEventImpl>(device);
}
int CpuPlatformImpl::GetPlatformId() const noexcept { return 0; }
const char* CpuPlatformImpl::GetPlatformName() const noexcept { return "cpu"; }
bool CpuPlatformImpl::CheckCopyParam(size_t src_size, size_t dst_size, size_t src_offset,
size_t dst_offset, size_t copy_size) {
if (src_offset + copy_size > src_size) {
return false;
}
if (dst_offset + copy_size > dst_size) {
return false;
}
return true;
}
inline void* OffsetPtr(void* ptr, size_t offset) {
return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
}
inline const void* OffsetPtr(const void* ptr, size_t offset) {
return static_cast<const void*>(static_cast<const uint8_t*>(ptr) + offset);
}
Result<void> CpuPlatformImpl::CopyImpl(const void* src, void* dst, size_t src_size, size_t dst_size,
size_t src_offset, size_t dst_offset, size_t size,
Stream st) {
if (!CheckCopyParam(src_size, dst_size, src_offset, dst_offset, size)) {
return Status(eInvalidArgument);
}
auto task = [=] { std::memcpy(OffsetPtr(dst, dst_offset), OffsetPtr(src, src_offset), size); };
if (!st) {
task();
return success();
}
if (st.GetDevice().platform_id() != 0) {
return Status(eInvalidArgument);
}
auto cpu_stream = static_cast<CpuStreamImpl*>(st.GetNative());
if (!cpu_stream) {
return Status(eInvalidArgument);
}
return cpu_stream->Enqueue(std::move(task));
}
Result<void> CpuPlatformImpl::Copy(const void* host_ptr, Buffer dst, size_t size, size_t dst_offset,
Stream stream) {
auto dst_ptr = dst.GetNative();
if (!dst_ptr) {
return Status(eInvalidArgument);
}
if (dst.GetDevice().platform_id() != 0) {
return Status(eInvalidArgument);
}
return CopyImpl(host_ptr, dst_ptr, size, dst.GetSize(), 0, dst_offset, size, stream);
}
Result<void> CpuPlatformImpl::Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
Stream stream) {
auto src_ptr = src.GetNative();
if (!src_ptr) {
return Status(eInvalidArgument);
}
if (src.GetDevice().platform_id() != 0) {
return Status(eInvalidArgument);
}
return CopyImpl(src_ptr, host_ptr, src.GetSize(), size, src_offset, 0, size, stream);
}
Result<void> CpuPlatformImpl::Copy(Buffer src, Buffer dst, size_t size, size_t src_offset,
size_t dst_offset, Stream stream) {
auto src_ptr = src.GetNative();
auto dst_ptr = dst.GetNative();
if (!src_ptr || !dst_ptr) {
return Status(eInvalidArgument);
}
auto device = src.GetDevice();
if (device.platform_id() != 0 || device.platform_id() != dst.GetDevice().platform_id()) {
return Status(eInvalidArgument);
}
return CopyImpl(src_ptr, dst_ptr, src.GetSize(), dst.GetSize(), src_offset, dst_offset, size,
stream);
}
Result<Stream> CpuPlatformImpl::GetDefaultStream(int32_t device_id) {
try {
std::call_once(init_flag_, [&] { default_stream_ = Stream(GetDevice(device_id)); });
return default_stream_;
} catch (...) {
return Status(eFail);
}
}
////////////////////////////////////////////////////////////////////////////////
/// CpuBufferImpl
CpuBufferImpl::CpuBufferImpl(Device device) : BufferImpl(device) {}
void* CpuBufferImpl::GetNative(ErrorCode* ec) {
if (!memory_) {
if (ec) *ec = eInvalidArgument;
return nullptr;
}
if (ec) *ec = ErrorCode::eSuccess;
return OffsetPtr(memory_->data(), offset_);
}
Allocator CpuBufferImpl::GetAllocator() const { return {}; }
size_t CpuBufferImpl::GetSize(ErrorCode* ec) {
if (!memory_) {
if (ec) *ec = eInvalidArgument;
return 0;
}
if (ec) *ec = ErrorCode::eSuccess;
return size_;
}
// int CpuBufferImpl::Fill(uint8_t pattern, size_t size, size_t offset,
// Stream& st) {
// if (!memory_ || !memory_->handle) {
// return Status(eInvalidArgument);
// }
// if (offset + size >= size_) {
// return Status(eInvalidArgument);
// }
// auto task = [=] {
// auto data = OffsetPtr(memory_->handle, offset);
// std::memset(data, pattern, size);
// };
// if (!st) {
// task();
// return M_SUCCESS;
// }
// if (st.GetDevice() != Device()) {
// return Status(eInvalidArgument);
// }
// auto cpu_stream = static_cast<CpuStreamImpl*>(st.GetNative());
// if (!cpu_stream) {
// return Status(eInvalidArgument);
// }
// return cpu_stream->Enqueue(std::move(task));
// }
Result<void> CpuBufferImpl::Init(size_t size, Allocator allocator, size_t alignment,
uint64_t flags) {
assert(!allocator && "CPU device doesn't support allocators yet");
memory_ = std::make_shared<CpuHostMemory>();
OUTCOME_TRY(memory_->Init(size, alignment));
size_ = size;
return success();
}
Result<void> CpuBufferImpl::Init(size_t size, std::shared_ptr<void> native, uint64_t flags) {
memory_ = std::make_shared<CpuHostMemory>();
OUTCOME_TRY(memory_->Init(size, std::move(native)));
size_ = size;
return success();
}
Result<BufferImplPtr> CpuBufferImpl::SubBuffer(size_t offset, size_t size, uint64_t flags) {
if (offset_ + offset + size > memory_->size()) {
return Status(eInvalidArgument);
}
auto impl = std::make_shared<CpuBufferImpl>(device_);
impl->memory_ = memory_;
impl->offset_ = offset_ + offset;
impl->size_ = size;
return impl;
}
////////////////////////////////////////////////////////////////////////////////
/// CpuStreamImpl
CpuStreamImpl::CpuStreamImpl(Device device) : StreamImpl(device) {}
CpuStreamImpl::~CpuStreamImpl() {
{
std::lock_guard lock(mutex_);
abort_ = true;
}
cv_.notify_one();
thread_.join();
}
Result<void> CpuStreamImpl::Init(uint64_t flags) {
thread_ = std::thread(&CpuStreamImpl::InternalThreadEntry, this);
return success();
}
Result<void> CpuStreamImpl::Init(std::shared_ptr<void> native, uint64_t flags) {
return Status(eNotSupported);
}
Result<void> CpuStreamImpl::Enqueue(Task task) {
{
std::lock_guard lock(mutex_);
task_queue_.push(std::move(task));
}
cv_.notify_one();
return success();
}
Result<void> CpuStreamImpl::DependsOn(Event& event) {
return Enqueue([&] { event.Wait().value(); });
}
Result<void> CpuStreamImpl::Query() {
std::lock_guard lock(mutex_);
if (task_queue_.empty()) {
return success();
} else {
return Status(eFail);
}
}
Result<void> CpuStreamImpl::Wait() {
{
std::unique_lock lock(mutex_);
cv_.wait(lock, [this] { return task_queue_.empty() || abort_; });
}
cv_.notify_one();
return success();
}
Result<void> CpuStreamImpl::Submit(Kernel& kernel) {
if (GetDevice() != kernel.GetDevice()) {
return Status(eInvalidArgument);
}
auto task = static_cast<Task*>(kernel.GetNative());
if (task) {
OUTCOME_TRY(Enqueue(*task));
return success();
}
return Status(eInvalidArgument);
}
void* CpuStreamImpl::GetNative(ErrorCode* ec) {
if (ec) *ec = ErrorCode::eSuccess;
return this;
}
void CpuStreamImpl::InternalThreadEntry() {
while (true) {
Task task;
{
std::unique_lock lock(mutex_);
cv_.wait(lock, [this] { return !task_queue_.empty() || abort_; });
if (abort_) {
break;
}
task = std::move(task_queue_.front());
}
if (task) {
task();
}
{
std::lock_guard lock(mutex_);
task_queue_.pop();
}
cv_.notify_one();
}
}
////////////////////////////////////////////////////////////////////////////////
/// CpuEventImpl
CpuEventImpl::CpuEventImpl(Device device) : EventImpl(device) {}
Result<void> CpuEventImpl::Init(uint64_t flags) {
Reset();
return success();
};
Result<void> CpuEventImpl::Init(std::shared_ptr<void> native, uint64_t flags) {
return Status(eNotSupported);
};
Result<void> CpuEventImpl::Query() {
auto status = future_.wait_for(std::chrono::microseconds::zero());
if (status == std::future_status::ready) {
return success();
} else {
return Status(eNotReady);
}
}
Result<void> CpuEventImpl::Record(Stream& stream) {
if (stream.GetDevice() != device_) {
return Status(eInvalidArgument);
}
auto cpu_stream = static_cast<CpuStreamImpl*>(stream.GetNative());
if (!cpu_stream) return Status(eInvalidArgument);
Reset();
return cpu_stream->Enqueue([this] { promise_.set_value(); });
}
Result<void> CpuEventImpl::Wait() {
future_.wait();
return success();
};
void CpuEventImpl::Reset() {
promise_ = std::promise<void>();
future_ = promise_.get_future();
}
void* CpuEventImpl::GetNative(ErrorCode* ec) {
if (ec) *ec = ErrorCode::eSuccess;
return this;
}
////////////////////////////////////////////////////////////////////////////////
Kernel CreateCpuKernel(std::function<void()> task) {
return Kernel(std::make_shared<CpuKernelImpl>(gCpuPlatform().GetDevice(0), std::move(task)));
}
////////////////////////////////////////////////////////////////////////////////
/// CpuPlatformRegisterer
CpuPlatformImpl& gCpuPlatform() {
static Platform platform("cpu");
return Access::get<CpuPlatformImpl>(platform);
}
class CpuPlatformRegisterer {
public:
CpuPlatformRegisterer() {
gPlatformRegistry().Register([] { return std::make_shared<CpuPlatformImpl>(); });
}
};
CpuPlatformRegisterer g_cpu_platform_registerer;
} // namespace mmdeploy::framework
// Copyright (c) OpenMMLab. All rights reserved.
#include <condition_variable>
#include <functional>
#include <future>
#include <mutex>
#include <queue>
#include <thread>
#include "mmdeploy/core/device_impl.h"
#include "mmdeploy/core/types.h"
namespace mmdeploy::framework {
class CpuPlatformImpl : public PlatformImpl {
public:
int GetPlatformId() const noexcept override;
const char* GetPlatformName() const noexcept override;
Result<void> BindDevice(Device device, Device* prev) override;
shared_ptr<BufferImpl> CreateBuffer(Device device) override;
shared_ptr<StreamImpl> CreateStream(Device device) override;
shared_ptr<EventImpl> CreateEvent(Device device) override;
Result<void> Copy(const void* host_ptr, Buffer dst, size_t size, size_t dst_offset,
Stream stream) override;
Result<void> Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
Stream stream) override;
Result<void> Copy(Buffer src, Buffer dst, size_t size, size_t src_offset, size_t dst_offset,
Stream stream) override;
Result<Stream> GetDefaultStream(int32_t device_id) override;
Device GetDevice(int device_id) const { return Device(GetPlatformId(), device_id); }
private:
static bool CheckCopyParam(size_t src_size, size_t dst_size, size_t src_offset, size_t dst_offset,
size_t copy_size);
static Result<void> CopyImpl(const void* src, void* dst, size_t src_size, size_t dst_size,
size_t src_offset, size_t dst_offset, size_t size, Stream st);
Stream default_stream_;
std::once_flag init_flag_;
};
CpuPlatformImpl& gCpuPlatform();
class CpuHostMemory;
class CpuBufferImpl : public BufferImpl {
public:
explicit CpuBufferImpl(Device device);
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) override;
Result<void> Init(size_t size, std::shared_ptr<void> native, uint64_t flags) override;
Result<BufferImplPtr> SubBuffer(size_t offset, size_t size, uint64_t flags) override;
void* GetNative(ErrorCode* ec) override;
Allocator GetAllocator() const override;
size_t GetSize(ErrorCode* ec) override;
private:
std::shared_ptr<CpuHostMemory> memory_;
size_t offset_{0};
size_t size_{0};
};
class CpuStreamImpl : public StreamImpl {
public:
using Task = std::function<void()>;
explicit CpuStreamImpl(Device device);
~CpuStreamImpl() override;
Result<void> Init(uint64_t flags) override;
Result<void> Init(std::shared_ptr<void> native, uint64_t flags) override;
Result<void> Enqueue(Task task);
Result<void> DependsOn(Event& event) override;
Result<void> Query() override;
Result<void> Wait() override;
Result<void> Submit(Kernel& kernel) override;
void* GetNative(ErrorCode* ec) override;
private:
void InternalThreadEntry();
std::mutex mutex_;
std::condition_variable cv_;
std::queue<Task> task_queue_;
std::thread thread_;
Device device_;
bool abort_{false};
};
class CpuEventImpl : public EventImpl {
public:
explicit CpuEventImpl(Device device);
~CpuEventImpl() override = default;
Result<void> Init(uint64_t flags) override;
Result<void> Init(std::shared_ptr<void> native, uint64_t flags) override;
Result<void> Query() override;
Result<void> Record(Stream& stream) override;
Result<void> Wait() override;
void* GetNative(ErrorCode* ec) override;
private:
void Reset();
std::shared_future<void> future_;
std::promise<void> promise_;
};
class CpuKernelImpl : public KernelImpl {
public:
using Task = CpuStreamImpl::Task;
explicit CpuKernelImpl(Device device, Task task) : KernelImpl(device), task_(std::move(task)) {}
void* GetNative(ErrorCode* ec) override {
if (ec) *ec = ErrorCode::eSuccess;
return &task_;
}
private:
Task task_;
};
} // namespace mmdeploy::framework
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_cuda_device)
set(SRCS cuda_device.cpp)
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
target_include_directories(${PROJECT_NAME} PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PRIVATE ${CUDA_LIBRARIES} cuda)
add_library(mmdeploy::device::cuda ALIAS ${PROJECT_NAME})
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_DEVICE_CUDA_BUDDY_ALLOCATOR_H_
#define MMDEPLOY_SRC_DEVICE_CUDA_BUDDY_ALLOCATOR_H_
#include <cuda_runtime.h>
#include <atomic>
#include <chrono>
#include <list>
#include <mutex>
#include <vector>
#include "mmdeploy/core/logger.h"
#include "mmdeploy/device/cuda/default_allocator.h"
namespace mmdeploy::cuda {
class BuddyAllocator {
public:
using size_type = std::size_t;
BuddyAllocator(size_type size, size_type block_size) {
block_size_ = block_size;
block_count_ = size / block_size_;
if (!IsPowerOfTwo(block_count_)) {
block_count_ = RoundToPowerOfTwo(block_count_);
MMDEPLOY_WARN("Rounding up block_count to next power of 2 {}", block_count_);
}
base_ = LogPowerOfTwo(block_count_);
size_ = block_size_ * block_count_;
memory_ = gDefaultAllocator().Allocate(size_);
tree_.resize(block_count_ * 2);
free_.resize(base_ + 1);
Build(1, 0);
Add(1, 0);
MMDEPLOY_ERROR("size = {}, block_size = {}, block_count = {}", size_, block_size_,
block_count_);
size = size_;
for (int i = 0; i <= base_; ++i) {
MMDEPLOY_ERROR("level {}, size = {}", i, size);
size /= 2;
}
}
~BuddyAllocator() {
for (int i = 0; i < free_.size(); ++i) {
MMDEPLOY_ERROR("free_[{}].size(): {}", i, free_[i].size());
}
gDefaultAllocator().Deallocate(memory_, size_);
}
[[nodiscard]] void* Allocate(size_type n) {
std::lock_guard lock{mutex_};
if (n > size_) {
return nullptr;
}
auto n_level = GetLevel(n);
auto level = n_level;
for (; level >= 0; --level) {
if (!free_[level].empty()) {
break;
}
}
if (level < 0) {
MMDEPLOY_WARN("failed to allocate memory size = {} bytes", n);
return nullptr;
}
for (; level < n_level; ++level) {
auto index = free_[level].front();
Split(index, level);
}
auto index = free_[level].front();
Del(index, level);
auto offset = (index ^ (1 << level)) << (base_ - level);
auto p = static_cast<uint8_t*>(memory_) + offset * block_size_;
return p;
}
void Deallocate(void* p, size_type n) {
std::lock_guard lock{mutex_};
auto offset = static_cast<uint8_t*>(p) - static_cast<uint8_t*>(memory_);
if (offset < 0 || offset % block_size_) {
MMDEPLOY_ERROR("invalid address: {}", p);
}
offset /= static_cast<long>(block_size_);
auto level = GetLevel(n);
auto index = (offset >> (base_ - level)) ^ (1 << level);
Add(index, level);
while (index > 1) {
auto buddy = index ^ 1;
if (tree_[buddy] != free_[level].end()) {
Merge(index, level);
index /= 2;
--level;
} else {
break;
}
}
}
private:
void Add(size_type index, size_type level) {
assert(tree_[index] == free_[level].end());
tree_[index] = free_[level].insert(free_[level].end(), index);
}
void Del(size_type index, size_type level) {
assert(tree_[index] != free_[level].end());
free_[level].erase(tree_[index]);
tree_[index] = free_[level].end();
}
void Split(size_type index, size_type level) {
Del(index, level);
Add(index * 2, level + 1);
Add(index * 2 + 1, level + 1);
}
void Merge(size_type index, size_type level) {
Del(index, level);
Del(index ^ 1, level);
Add(index / 2, level - 1);
}
size_type GetLevel(size_type size) const {
size = RoundToPowerOfTwo((size + block_size_ - 1) / block_size_);
return base_ - LogPowerOfTwo(size);
}
static bool IsPowerOfTwo(size_type n) { return (n & (n - 1)) == 0; }
static size_type RoundToPowerOfTwo(size_type n) {
--n;
n |= (n >> 1);
n |= (n >> 2);
n |= (n >> 4);
n |= (n >> 8);
n |= (n >> 16);
n |= (n >> 32);
return ++n;
}
static size_type LogPowerOfTwo(size_type v) {
size_type r{};
r |= ((v & 0xFFFFFFFF00000000) != 0) << 5;
r |= ((v & 0xFFFF0000FFFF0000) != 0) << 4;
r |= ((v & 0xFF00FF00FF00FF00) != 0) << 3;
r |= ((v & 0xF0F0F0F0F0F0F0F0) != 0) << 2;
r |= ((v & 0xCCCCCCCCCCCCCCCC) != 0) << 1;
r |= ((v & 0xAAAAAAAAAAAAAAAA) != 0);
return r;
}
void Build(size_type index, size_type level) {
if (index < tree_.size()) {
tree_[index] = free_[level].end();
index *= 2;
++level;
Build(index, level);
Build(index + 1, level);
}
}
private:
size_type size_;
size_type block_size_;
size_type block_count_;
size_type base_;
void* memory_;
std::vector<std::list<size_type>::iterator> tree_;
std::vector<std::list<size_type> > free_;
std::mutex mutex_;
};
inline BuddyAllocator& gBuddyAllocator() {
static BuddyAllocator v(1U << 30, 1024 * 64);
return v;
}
} // namespace mmdeploy::cuda
#endif // MMDEPLOY_SRC_DEVICE_CUDA_BUDDY_ALLOCATOR_H_
// Copyright (c) OpenMMLab. All rights reserved.
#include "cuda_device.h"
#include <cuda.h>
#include "mmdeploy/device/device_allocator.h"
namespace mmdeploy::framework {
inline void* OffsetPtr(void* ptr, size_t offset) {
return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
}
inline const void* OffsetPtr(const void* ptr, size_t offset) {
return static_cast<const void*>(static_cast<const uint8_t*>(ptr) + offset);
}
cudaMemcpyKind MapMemcpyKindToCuda(MemcpyKind kind) {
switch (kind) {
case MemcpyKind::HtoD:
return cudaMemcpyHostToDevice;
case MemcpyKind::DtoH:
return cudaMemcpyDeviceToHost;
case MemcpyKind::DtoD:
return cudaMemcpyDeviceToDevice;
default:
return cudaMemcpyDefault;
}
}
namespace cuda {
class Mallocator : public AllocatorImpl {
public:
Block Allocate(size_t size) noexcept override {
if (size == 0) {
return Block{};
}
Block block;
if (auto status = cudaMalloc(&block.handle, size); status != cudaSuccess) {
// log error
}
block.size = size;
return block;
}
void Deallocate(Block& block) noexcept override {
if (!block.handle) {
return;
}
cudaFree(block.handle);
}
bool Owns(const Block& block) const noexcept override { return true; }
};
Allocator CreateDefaultAllocator() {
using namespace device_allocator;
AllocatorImplPtr allocator = std::make_shared<Mallocator>();
allocator = std::make_shared<Tree>(allocator, -1, .5);
allocator = std::make_shared<Locked>(allocator);
MMDEPLOY_DEBUG("Default CUDA allocator initialized");
return Access::create<Allocator>(allocator);
}
} // namespace cuda
// ! this class doesn't handle device id
class CudaDeviceMemory : public NonCopyable {
public:
explicit CudaDeviceMemory(int device_id) : device_id_(device_id), size_(), owned_block_() {}
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) {
if (alignment > 256 || 256 % alignment != 0) {
return Status(eNotSupported);
}
allocator_ = std::move(allocator);
CudaDeviceGuard guard(device_id_);
block_ = Access::get<AllocatorImpl>(allocator_).Allocate(size);
if (size && !block_.handle) {
return Status(eOutOfMemory);
}
size_ = size;
owned_block_ = true;
return success();
}
Result<void> Init(size_t size, std::shared_ptr<void> data, uint64_t flags) {
size_ = size;
external_ = std::move(data);
block_.handle = external_.get();
block_.size = size;
owned_block_ = false;
return success();
}
~CudaDeviceMemory() {
if (block_.handle) {
if (owned_block_) {
CudaDeviceGuard guard(device_id_);
Access::get<AllocatorImpl>(allocator_).Deallocate(block_);
owned_block_ = false;
}
block_.handle = nullptr;
}
external_.reset();
size_ = 0;
}
size_t size() const { return size_; }
void* data() const { return block_.handle; }
const Allocator& allocator() const { return allocator_; }
private:
int device_id_;
size_t size_;
AllocatorImpl::Block block_;
bool owned_block_;
Allocator allocator_;
std::shared_ptr<void> external_;
};
shared_ptr<BufferImpl> CudaPlatformImpl::CreateBuffer(Device device) {
return std::make_shared<CudaBufferImpl>(device);
}
shared_ptr<StreamImpl> CudaPlatformImpl::CreateStream(Device device) {
return std::make_shared<CudaStreamImpl>(device);
}
shared_ptr<EventImpl> CudaPlatformImpl::CreateEvent(Device device) {
return std::make_shared<CudaEventImpl>(device);
}
Result<void> CudaPlatformImpl::BindDevice(Device device, Device* prev) {
if (device.platform_id() != platform_id_) {
return Status(eInvalidArgument);
}
// skip null device
if (device.device_id() == -1) {
return success();
}
int prev_device_id = -1;
if (prev) {
CUcontext ctx{};
cuCtxGetCurrent(&ctx);
if (ctx) {
cudaGetDevice(&prev_device_id);
*prev = Device(platform_id_, prev_device_id);
} else {
// cuda is not initialized return a null device as previous
*prev = Device(platform_id_, -1);
}
}
if (device.device_id() != prev_device_id) {
cudaSetDevice(device.device_id());
}
return success();
}
bool CudaPlatformImpl::CheckCopyDevice(const Device& src, const Device& dst, const Device& st) {
return st.is_device() && (src.is_host() || src == st) && (dst.is_host() || dst == st);
}
Result<void> CudaPlatformImpl::Copy(const void* host_ptr, Buffer dst, size_t size,
size_t dst_offset, Stream stream) {
if (!CheckCopyDevice(Device{0, 0}, dst.GetDevice(), stream.GetDevice())) {
return Status(eInvalidArgument);
}
if (size == 0) {
return success();
}
auto dst_ptr = dst.GetNative();
if (!dst_ptr) {
return Status(eInvalidArgument);
}
// auto device = dst.GetDevice();
return CopyImpl(stream.GetDevice(), host_ptr, dst_ptr, size, dst.GetSize(), 0, dst_offset, size,
stream);
}
Result<void> CudaPlatformImpl::Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
Stream stream) {
if (!CheckCopyDevice(src.GetDevice(), Device{0, 0}, stream.GetDevice())) {
return Status(eInvalidArgument);
}
if (size == 0) {
return success();
}
auto src_ptr = src.GetNative();
if (!src_ptr) {
return Status(eInvalidArgument);
}
// auto device = src.GetDevice();
return CopyImpl(stream.GetDevice(), src_ptr, host_ptr, src.GetSize(), size, src_offset, 0, size,
stream);
}
Result<void> CudaPlatformImpl::Copy(Buffer src, Buffer dst, size_t size, size_t src_offset,
size_t dst_offset, Stream stream) {
if (!CheckCopyDevice(src.GetDevice(), dst.GetDevice(), stream.GetDevice())) {
return Status(eInvalidArgument);
}
if (size == 0) {
return success();
}
auto src_ptr = src.GetNative();
auto dst_ptr = dst.GetNative();
if (!src_ptr || !dst_ptr) {
return Status(eInvalidArgument);
}
return CopyImpl(stream.GetDevice(), src_ptr, dst_ptr, src.GetSize(), dst.GetSize(), src_offset,
dst_offset, size, stream);
}
bool CudaPlatformImpl::CheckCopyParam(size_t src_size, size_t dst_size, size_t src_offset,
size_t dst_offset, size_t copy_size) {
if (src_offset + copy_size > src_size) {
return false;
}
if (dst_offset + copy_size > dst_size) {
return false;
}
return true;
}
Result<void> CudaPlatformImpl::CopyImpl(Device device, const void* src, void* dst, size_t src_size,
size_t dst_size, size_t src_offset, size_t dst_offset,
size_t size, Stream st) {
if (!CheckCopyParam(src_size, dst_size, src_offset, dst_offset, size)) {
return Status(eInvalidArgument);
}
auto p_dst = OffsetPtr(dst, dst_offset);
auto p_src = OffsetPtr(src, src_offset);
CudaDeviceGuard guard(device);
if (st) {
auto cuda_stream = ::mmdeploy::framework::GetNative<cudaStream_t>(st);
// TODO: how about default stream cudaStream_t(0)?
if (!cuda_stream) {
return Status(eInvalidArgument);
}
auto err = cudaMemcpyAsync(p_dst, p_src, size, cudaMemcpyDefault, cuda_stream);
if (err != cudaSuccess) {
return Status(eFail);
}
} else {
auto err = cudaMemcpy(p_dst, p_src, size, cudaMemcpyDefault);
if (err != cudaSuccess) {
return Status(eFail);
}
}
return success();
}
Result<Stream> CudaPlatformImpl::GetDefaultStream(int32_t device_id) {
if (device_id >= per_device_data_.size()) {
return Status(eInvalidArgument);
}
return per_device_data_[device_id]->default_stream();
}
void CudaPlatformImpl::PerDeviceData::init() {
std::call_once(init_flag_, [&] {
CudaDeviceGuard guard(device_id_);
default_stream_ = Stream(gCudaPlatform().GetDevice(device_id_));
default_allocator_ = cuda::CreateDefaultAllocator();
});
}
CudaPlatformImpl::CudaPlatformImpl() {
int count{};
if (auto err = cudaGetDeviceCount(&count); err != cudaSuccess) {
MMDEPLOY_ERROR("error getting device count: {}", cudaGetErrorString(err));
throw_exception(eFail);
}
per_device_data_storage_.reserve(count);
per_device_data_.reserve(count);
for (int device_id = 0; device_id < count; ++device_id) {
per_device_data_storage_.push_back(std::make_unique<PerDeviceData>(device_id));
per_device_data_.push_back(per_device_data_storage_.back().get());
}
}
Allocator CudaPlatformImpl::GetDefaultAllocator(int32_t device_id) {
return per_device_data_[device_id]->default_allocator();
}
////////////////////////////////////////////////////////////////////////////////
/// CudaStreamImpl
CudaStreamImpl::CudaStreamImpl(Device device) : StreamImpl(device), stream_(), owned_stream_() {}
CudaStreamImpl::~CudaStreamImpl() {
CudaDeviceGuard guard(device_.device_id());
if (owned_stream_) {
if (auto status = cudaStreamDestroy(stream_); status != cudaSuccess) {
// TODO: signal error
}
owned_stream_ = false;
}
external_.reset();
}
Result<void> CudaStreamImpl::Init(uint64_t flags) {
CudaDeviceGuard guard(device_);
if (auto status = cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
status != cudaSuccess) {
return Status(eFail);
}
owned_stream_ = true;
return success();
}
Result<void> CudaStreamImpl::Init(std::shared_ptr<void> native, uint64_t flags) {
// ! nullptr is valid for cudaStream_t
external_ = std::move(native);
stream_ = static_cast<cudaStream_t>(external_.get());
owned_stream_ = false;
return success();
}
Result<void> CudaStreamImpl::DependsOn(Event& event) {
if (event.GetDevice() == device_) {
CudaDeviceGuard guard(device_);
auto native_event = ::mmdeploy::framework::GetNative<cudaEvent_t>(event);
cudaStreamWaitEvent(stream_, native_event, 0);
return success();
}
return Status(eInvalidArgument);
}
Result<void> CudaStreamImpl::Query() {
CudaDeviceGuard guard(device_);
if (cudaStreamQuery(stream_) == cudaSuccess) {
return success();
} else {
return Status(eFail);
}
}
Result<void> CudaStreamImpl::Wait() {
CudaDeviceGuard guard(device_);
if (cudaStreamSynchronize(stream_) == cudaSuccess) {
return success();
} else {
return Status(eFail);
}
}
Result<void> CudaStreamImpl::Submit(Kernel& kernel) {
auto task = ::mmdeploy::framework::GetNative<CudaTask*>(kernel);
if (task) {
CudaDeviceGuard guard(device_);
(*task)(stream_);
return success();
}
return Status(eInvalidArgument);
}
void* CudaStreamImpl::GetNative(ErrorCode* ec) {
if (ec) *ec = ErrorCode::eSuccess;
return stream_;
}
////////////////////////////////////////////////////////////////////////////////
/// CudaEventImpl
CudaEventImpl::CudaEventImpl(Device device) : EventImpl(device), event_(), owned_event_() {}
CudaEventImpl::~CudaEventImpl() {
CudaDeviceGuard guard(device_.device_id());
if (owned_event_) {
if (auto status = cudaEventDestroy(event_); status != cudaSuccess) {
// TODO: signal error
}
owned_event_ = false;
}
external_.reset();
}
Result<void> CudaEventImpl::Init(uint64_t flags) {
CudaDeviceGuard guard(device_);
if (auto status = cudaEventCreateWithFlags(&event_, 0); status != cudaSuccess) {
return Status(eFail);
}
owned_event_ = true;
return success();
}
Result<void> CudaEventImpl::Init(std::shared_ptr<void> native, uint64_t flags) {
if (!native) {
return Status(eInvalidArgument);
}
external_ = std::move(native);
event_ = static_cast<cudaEvent_t>(external_.get());
owned_event_ = false;
return success();
}
Result<void> CudaEventImpl::Query() {
if (cudaEventQuery(event_) == cudaSuccess) {
return success();
} else {
return Status(eFail);
}
}
Result<void> CudaEventImpl::Record(Stream& stream) {
if (stream.GetDevice() != device_) {
return Status(eInvalidArgument);
}
CudaDeviceGuard guard(device_);
auto native_stream = ::mmdeploy::framework::GetNative<cudaStream_t>(stream);
cudaEventRecord(event_, native_stream);
return success();
}
Result<void> CudaEventImpl::Wait() {
CudaDeviceGuard guard(device_);
if (cudaEventSynchronize(event_) == cudaSuccess) {
return success();
} else {
return Status(eFail);
}
}
void* CudaEventImpl::GetNative(ErrorCode* ec) {
if (ec) *ec = ErrorCode::eSuccess;
return event_;
}
////////////////////////////////////////////////////////////////////////////////
/// CudaBufferImpl
CudaBufferImpl::CudaBufferImpl(Device device) : BufferImpl(device) {}
Result<void> CudaBufferImpl::Init(size_t size, Allocator allocator, size_t alignment,
uint64_t flags) {
memory_ = std::make_shared<CudaDeviceMemory>(device_.device_id());
if (!allocator) {
allocator = gCudaPlatform().GetDefaultAllocator(device_.device_id());
}
OUTCOME_TRY(memory_->Init(size, std::move(allocator), alignment, flags));
size_ = size;
return success();
}
Result<void> CudaBufferImpl::Init(size_t size, std::shared_ptr<void> native, uint64_t flags) {
memory_ = std::make_shared<CudaDeviceMemory>(device_.device_id());
OUTCOME_TRY(memory_->Init(size, std::move(native), flags));
size_ = size;
return success();
}
Result<BufferImplPtr> CudaBufferImpl::SubBuffer(size_t offset, size_t size, uint64_t flags) {
if (offset_ + offset + size > memory_->size()) {
return Status(eInvalidArgument);
}
auto impl = std::make_shared<CudaBufferImpl>(device_);
impl->memory_ = memory_;
impl->offset_ = offset_ + offset;
impl->size_ = size;
return impl;
}
size_t CudaBufferImpl::GetSize(ErrorCode* ec) { return size_; }
void* CudaBufferImpl::GetNative(ErrorCode* ec) {
if (!memory_) {
if (ec) *ec = eInvalidArgument;
return nullptr;
}
if (ec) *ec = ErrorCode::eSuccess;
return OffsetPtr(memory_->data(), offset_);
}
Allocator CudaBufferImpl::GetAllocator() const { return memory_->allocator(); }
////////////////////////////////////////////////////////////////////////////////
/// CudaKernelImpl
void* CudaKernelImpl::GetNative(ErrorCode* ec) {
if (ec) *ec = ErrorCode::eSuccess;
return &task_;
}
CudaKernelImpl::CudaKernelImpl(Device device, CudaTask task)
: KernelImpl(device), task_(std::move(task)) {}
////////////////////////////////////////////////////////////////////////////////
/// CudaPlatformRegisterer
class CudaPlatformRegisterer {
public:
CudaPlatformRegisterer() {
gPlatformRegistry().Register([] { return std::make_shared<CudaPlatformImpl>(); });
}
};
CudaPlatformRegisterer g_cuda_platform_registerer;
CudaPlatformImpl& gCudaPlatform() {
static Platform platform("cuda");
return Access::get<CudaPlatformImpl>(platform);
}
} // namespace mmdeploy::framework
// Copyright (c) OpenMMLab. All rights reserved.
#include <any>
#include <mutex>
#include "cuda.h"
#include "cuda_runtime.h"
#include "mmdeploy/core/device_impl.h"
#include "mmdeploy/core/types.h"
namespace mmdeploy::framework {
using CudaTask = std::function<void(cudaStream_t)>;
class CudaPlatformImpl : public PlatformImpl {
public:
CudaPlatformImpl();
~CudaPlatformImpl() override {
// The CUDA driver may have already shutdown before the platform dtor is called.
// As a workaround, simply leak per device resources and let the driver handle it
// FIXME: maybe a pair of global mmdeploy_init/deinit function would be a
// better solution
for (auto& data : per_device_data_storage_) {
data.release();
}
}
const char* GetPlatformName() const noexcept override { return "cuda"; }
Result<void> BindDevice(Device device, Device* prev) override;
shared_ptr<BufferImpl> CreateBuffer(Device device) override;
shared_ptr<StreamImpl> CreateStream(Device device) override;
shared_ptr<EventImpl> CreateEvent(Device device) override;
Result<void> Copy(const void* host_ptr, Buffer dst, size_t size, size_t dst_offset,
Stream stream) override;
Result<void> Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
Stream stream) override;
Result<void> Copy(Buffer src, Buffer dst, size_t size, size_t src_offset, size_t dst_offset,
Stream stream) override;
Result<Stream> GetDefaultStream(int32_t device_id) override;
Allocator GetDefaultAllocator(int32_t device_id);
Device GetDevice(int device_id) { return Device(platform_id_, device_id); }
private:
static bool CheckCopyParam(size_t src_size, size_t dst_size, size_t src_offset, size_t dst_offset,
size_t copy_size);
static bool CheckCopyDevice(const Device& src, const Device& dst, const Device& st);
static Result<void> CopyImpl(Device device, const void* src, void* dst, size_t src_size,
size_t dst_size, size_t src_offset, size_t dst_offset, size_t size,
Stream st);
class PerDeviceData {
public:
explicit PerDeviceData(int device_id) : device_id_(device_id) {}
void init();
Stream& default_stream() {
init();
return default_stream_;
}
Allocator& default_allocator() {
init();
return default_allocator_;
}
private:
int device_id_;
std::once_flag init_flag_;
Stream default_stream_;
Allocator default_allocator_;
};
std::vector<std::unique_ptr<PerDeviceData>> per_device_data_storage_;
std::vector<PerDeviceData*> per_device_data_;
};
CudaPlatformImpl& gCudaPlatform();
class CudaDeviceMemory;
class CudaBufferImpl : public BufferImpl {
public:
explicit CudaBufferImpl(Device device);
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) override;
Result<void> Init(size_t size, std::shared_ptr<void> native, uint64_t flags) override;
Result<BufferImplPtr> SubBuffer(size_t offset, size_t size, uint64_t flags) override;
void* GetNative(ErrorCode* ec) override;
Allocator GetAllocator() const override;
size_t GetSize(ErrorCode* ec) override;
private:
std::shared_ptr<CudaDeviceMemory> memory_;
size_t offset_{0};
size_t size_{0};
};
class CudaStreamImpl : public StreamImpl {
public:
explicit CudaStreamImpl(Device device);
~CudaStreamImpl() override;
Result<void> Init(uint64_t flags) override;
Result<void> Init(std::shared_ptr<void> native, uint64_t flags) override;
Result<void> DependsOn(Event& event) override;
Result<void> Query() override;
Result<void> Wait() override;
Result<void> Submit(Kernel& kernel) override;
void* GetNative(ErrorCode* ec) override;
private:
cudaStream_t stream_;
bool owned_stream_;
std::shared_ptr<void> external_;
};
class CudaEventImpl : public EventImpl {
public:
explicit CudaEventImpl(Device device);
~CudaEventImpl() override;
Result<void> Init(uint64_t flags) override;
Result<void> Init(std::shared_ptr<void> native, uint64_t flags) override;
Result<void> Query() override;
Result<void> Record(Stream& stream) override;
Result<void> Wait() override;
void* GetNative(ErrorCode* ec) override;
private:
cudaEvent_t event_;
bool owned_event_;
std::shared_ptr<void> external_;
};
class CudaKernelImpl : public KernelImpl {
public:
explicit CudaKernelImpl(Device device, CudaTask task);
void* GetNative(ErrorCode* ec) override;
private:
CudaTask task_;
};
class CudaDeviceGuard {
public:
explicit CudaDeviceGuard(Device device) : CudaDeviceGuard(device.device_id()) {}
explicit CudaDeviceGuard(int device_id) : device_id_(device_id), prev_device_id_(-1) {
CUcontext ctx{};
cuCtxGetCurrent(&ctx);
if (ctx) {
cudaGetDevice(&prev_device_id_);
}
if (prev_device_id_ != device_id_) {
cudaSetDevice(device_id_);
}
}
~CudaDeviceGuard() {
if (prev_device_id_ >= 0 && prev_device_id_ != device_id_) {
cudaSetDevice(prev_device_id_);
}
}
private:
int device_id_;
int prev_device_id_;
};
} // namespace mmdeploy::framework
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_DEVICE_CUDA_DEFAULT_ALLOCATOR_H_
#define MMDEPLOY_SRC_DEVICE_CUDA_DEFAULT_ALLOCATOR_H_
#include <cuda_runtime.h>
#include <atomic>
#include <chrono>
#include "mmdeploy/core/logger.h"
namespace mmdeploy::cuda {
class DefaultAllocator {
public:
DefaultAllocator() = default;
~DefaultAllocator() {
MMDEPLOY_ERROR("=== CUDA Default Allocator ===");
MMDEPLOY_ERROR(" Allocation: count={}, size={}MB, time={}ms", alloc_count_,
alloc_size_ / (1024 * 1024.f), alloc_time_ / 1000000.f);
MMDEPLOY_ERROR("Deallocation: count={}, size={}MB, time={}ms", dealloc_count_,
dealloc_size_ / (1024 * 1024.f), dealloc_time_ / 1000000.f);
}
[[nodiscard]] void* Allocate(std::size_t n) {
void* p{};
auto t0 = std::chrono::high_resolution_clock::now();
auto ret = cudaMalloc(&p, n);
auto t1 = std::chrono::high_resolution_clock::now();
alloc_time_ += (int64_t)std::chrono::duration<double, std::nano>(t1 - t0).count();
if (ret != cudaSuccess) {
MMDEPLOY_ERROR("error allocating cuda memory: {}", cudaGetErrorString(ret));
return nullptr;
}
alloc_count_ += 1;
alloc_size_ += n;
return p;
}
void Deallocate(void* p, std::size_t n) {
(void)n;
auto t0 = std::chrono::high_resolution_clock::now();
auto ret = cudaFree(p);
auto t1 = std::chrono::high_resolution_clock::now();
dealloc_time_ += (int64_t)std::chrono::duration<double, std::nano>(t1 - t0).count();
if (ret != cudaSuccess) {
MMDEPLOY_ERROR("error deallocating cuda memory: {}", cudaGetErrorString(ret));
return;
}
dealloc_count_ += 1;
dealloc_size_ += n;
}
private:
std::atomic<std::size_t> alloc_count_;
std::atomic<std::size_t> alloc_size_;
std::atomic<std::size_t> alloc_time_;
std::atomic<std::size_t> dealloc_count_;
std::atomic<std::size_t> dealloc_size_;
std::atomic<std::size_t> dealloc_time_;
};
inline DefaultAllocator& gDefaultAllocator() {
static DefaultAllocator v;
return v;
}
} // namespace mmdeploy::cuda
#endif // MMDEPLOY_SRC_DEVICE_CUDA_DEFAULT_ALLOCATOR_H_
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_DEVICE_CUDA_LINEARALLOCATOR_H_
#define MMDEPLOY_SRC_DEVICE_CUDA_LINEARALLOCATOR_H_
#include "default_allocator.h"
namespace mmdeploy::cuda {
class LinearAllocator {
public:
explicit LinearAllocator(std::size_t size) : size_(size) {
base_ = static_cast<uint8_t*>(gDefaultAllocator().Allocate(size));
ptr_ = base_;
}
~LinearAllocator() { gDefaultAllocator().Deallocate(base_, size_); }
[[nodiscard]] void* Allocate(std::size_t n) {
std::optional<std::lock_guard<std::mutex> > lock;
if (mutex_) {
lock.emplace(*mutex_);
}
++count_;
total_ += n;
auto ptr = static_cast<void*>(ptr_);
std::size_t space = base_ + size_ - ptr_;
if (std::align(16, n, ptr, space)) {
MMDEPLOY_ERROR("success n={}, total={}, count={}", n, total_, count_);
ptr_ = static_cast<uint8_t*>(ptr) + n;
return ptr;
}
MMDEPLOY_ERROR("fallback {}, total={}, count={}", n, total_, count_);
return gDefaultAllocator().Allocate(n);
}
void Deallocate(void* _p, std::size_t n) {
std::optional<std::lock_guard<std::mutex> > lock;
if (mutex_) {
lock.emplace(*mutex_);
}
auto p = static_cast<uint8_t*>(_p);
if (!(base_ <= p && p < ptr_)) {
gDefaultAllocator().Deallocate(_p, n);
}
total_ -= n;
--count_;
MMDEPLOY_ERROR("deallocate total={}, count={}", total_, count_);
if (total_ == 0) {
assert(count_ == 0);
ptr_ = base_;
}
}
private:
std::size_t size_;
uint8_t* base_;
uint8_t* ptr_;
std::size_t total_{};
std::size_t count_{};
std::optional<std::mutex> mutex_;
};
inline LinearAllocator& gLinearAllocator() {
static LinearAllocator v(1U << 30);
return v;
}
} // namespace mmdeploy::cuda
#endif // MMDEPLOY_SRC_DEVICE_CUDA_LINEARALLOCATOR_H_
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CORE_DEVICE_ALLOCATOR_H_
#define MMDEPLOY_SRC_CORE_DEVICE_ALLOCATOR_H_
#include <chrono>
#include <iostream>
#include <map>
#include <mutex>
#include <numeric>
#include <stack>
#include "mmdeploy/core/device_impl.h"
#include "mmdeploy/core/logger.h"
namespace mmdeploy::framework::device_allocator {
class Fallback : public AllocatorImpl {
public:
Fallback(AllocatorImplPtr primary, AllocatorImplPtr fallback)
: primary_(std::move(primary)), fallback_(std::move(fallback)) {}
Block Allocate(size_t size) noexcept override {
if (auto block = primary_->Allocate(size); block.handle) {
return block;
}
return fallback_->Allocate(size);
}
void Deallocate(Block& block) noexcept override {
if (primary_->Owns(block)) {
primary_->Deallocate(block);
return;
}
fallback_->Deallocate(block);
}
bool Owns(const Block& block) const noexcept override {
return primary_->Owns(block) || fallback_->Owns(block);
}
private:
AllocatorImplPtr primary_;
AllocatorImplPtr fallback_;
};
// TODO: batch allocation
class Pool : public AllocatorImpl {
public:
explicit Pool(AllocatorImplPtr allocator, size_t min_size, size_t max_size, unsigned pool_size)
: allocator_(std::move(allocator)),
min_size_(min_size),
max_size_(max_size),
pool_size_(pool_size) {
free_.reserve(pool_size);
}
~Pool() override {
while (!free_.empty()) {
Block block(free_.back(), max_size_);
allocator_->Deallocate(block);
free_.pop_back();
}
}
Block Allocate(size_t size) noexcept override {
if (min_size_ <= size && size <= max_size_) {
if (!free_.empty()) {
auto handle = free_.back();
free_.pop_back();
return Block{handle, max_size_};
} else {
return allocator_->Allocate(max_size_);
}
}
return Block{};
}
void Deallocate(Block& block) noexcept override {
if (Owns(block)) {
if (free_.size() < pool_size_) {
free_.push_back(block.handle);
block.handle = nullptr;
block.size = 0;
} else {
allocator_->Deallocate(block);
}
}
}
bool Owns(const Block& block) const noexcept override {
return block.handle && min_size_ <= block.size && block.size <= max_size_;
}
private:
AllocatorImplPtr allocator_;
size_t min_size_;
size_t max_size_;
unsigned pool_size_;
std::vector<void*> free_;
};
class Tree : public AllocatorImpl {
static constexpr auto kQuantizer = 100;
public:
Tree(AllocatorImplPtr allocator, size_t max_bytes, float threshold)
: allocator_(std::move(allocator)), max_tree_bytes_(max_bytes) {
if (threshold) {
thresh_numerator_ = static_cast<int>(threshold * kQuantizer);
thresh_denominator_ = kQuantizer;
auto divisor = std::gcd(thresh_numerator_, thresh_denominator_);
thresh_numerator_ /= divisor;
thresh_denominator_ /= divisor;
}
}
~Tree() override {
for (const auto& [size, handle] : tree_) {
Block block(handle, size);
allocator_->Deallocate(block);
}
}
Block Allocate(size_t size) noexcept override {
if (auto it = tree_.lower_bound(size); it != tree_.end()) {
if (size * thresh_denominator_ >= it->first * thresh_numerator_) {
Block block(it->second, it->first);
tree_bytes_ -= it->first;
tree_.erase(it);
return block;
}
}
return allocator_->Allocate(size);
}
void Deallocate(Block& block) noexcept override {
auto bytes = tree_bytes_ + block.size;
if (bytes < max_tree_bytes_) {
tree_.insert({block.size, block.handle});
tree_bytes_ = bytes;
block.size = 0;
block.handle = nullptr;
} else {
allocator_->Deallocate(block);
}
}
bool Owns(const Block& block) const noexcept override { return true; }
private:
AllocatorImplPtr allocator_;
// threshold ~ thresh_numerator_ / thresh_denominator_
int thresh_numerator_{};
int thresh_denominator_{};
std::multimap<size_t, void*> tree_;
size_t max_tree_bytes_;
size_t tree_bytes_{};
};
class Stats : public AllocatorImpl {
public:
explicit Stats(AllocatorImplPtr allocator, std::string name)
: allocator_(std::move(allocator)), name_(std::move(name)) {}
~Stats() override {
MMDEPLOY_INFO("=== {} ===", name_);
MMDEPLOY_INFO(" Allocation: count={}, size={}MB, time={}ms", data_.allocation_count,
data_.allocated_bytes / (1024 * 1024.f),
static_cast<float>(data_.allocation_time));
MMDEPLOY_INFO("Deallocation: count={}, size={}MB, time={}ms", data_.deallocation_count,
data_.deallocated_bytes / (1024 * 1024.f),
static_cast<float>(data_.deallocation_time));
MMDEPLOY_INFO("Peak memory usage: size={}MB", data_.peak / (1024 * 1024.f));
}
Block Allocate(size_t size) noexcept override {
auto t0 = std::chrono::high_resolution_clock::now();
auto block = allocator_->Allocate(size);
auto t1 = std::chrono::high_resolution_clock::now();
data_.allocation_time += std::chrono::duration<double, std::milli>(t1 - t0).count();
data_.allocated_bytes += block.size;
data_.peak = std::max(data_.peak, data_.allocated_bytes - data_.deallocated_bytes);
++data_.allocation_count;
return block;
}
void Deallocate(Block& block) noexcept override {
++data_.deallocation_count;
data_.deallocated_bytes += block.size;
auto t0 = std::chrono::high_resolution_clock::now();
allocator_->Deallocate(block);
auto t1 = std::chrono::high_resolution_clock::now();
data_.deallocation_time += std::chrono::duration<double, std::milli>(t1 - t0).count();
}
bool Owns(const Block& block) const noexcept override { return allocator_->Owns(block); }
const char* Name() const noexcept override { return name_.c_str(); }
private:
struct Data {
size_t allocation_count{};
size_t deallocation_count{};
size_t allocated_bytes{};
size_t deallocated_bytes{};
size_t peak{};
double allocation_time{};
double deallocation_time{};
};
Data data_;
AllocatorImplPtr allocator_;
std::string name_;
};
class Locked : public AllocatorImpl {
public:
explicit Locked(AllocatorImplPtr allocator) : allocator_(std::move(allocator)) {}
Block Allocate(size_t size) noexcept override {
std::lock_guard lock(mutex_);
return allocator_->Allocate(size);
}
void Deallocate(Block& block) noexcept override {
std::lock_guard lock(mutex_);
allocator_->Deallocate(block);
}
bool Owns(const Block& block) const noexcept override {
std::lock_guard lock(mutex_);
return allocator_->Owns(block);
}
private:
AllocatorImplPtr allocator_;
mutable std::mutex mutex_;
};
class Segregator : public AllocatorImpl {
public:
Segregator(size_t threshold, AllocatorImplPtr small, AllocatorImplPtr large)
: threshold_(threshold), small_(std::move(small)), large_(std::move(large)) {}
Block Allocate(size_t size) noexcept override {
if (size <= threshold_) {
return small_->Allocate(size);
}
return large_->Allocate(size);
}
void Deallocate(Block& block) noexcept override {
if (block.size <= threshold_) {
return small_->Deallocate(block);
}
return large_->Deallocate(block);
}
bool Owns(const Block& block) const noexcept override {
if (block.size <= threshold_) {
return small_->Owns(block);
}
return large_->Owns(block);
}
private:
size_t threshold_;
AllocatorImplPtr small_;
AllocatorImplPtr large_;
};
template <typename Allocator>
class AllocatorAdapter : public AllocatorImpl {
public:
Block Allocate(size_t size) noexcept override { return allocator_.Allocate(size); }
void Deallocate(Block& block) noexcept override { return allocator_.Deallocate(block); }
bool Owns(const Block& block) const noexcept override { return allocator_.Owns(block); }
private:
Allocator allocator_;
};
class Bucketizer : public AllocatorImpl {
public:
using AllocatorCreator = std::function<AllocatorImplPtr(size_t, size_t)>;
Bucketizer(const AllocatorCreator& creator, size_t min_size, size_t max_size, size_t step_size)
: min_size_(min_size), max_size_(max_size), step_size_(step_size) {
for (auto base = min_size_; base < max_size_; base += step_size_) {
// MMDEPLOY_ERROR("{}, {}", base, base + step_size - 1);
allocator_.push_back(creator(base, base + step_size - 1));
}
// MMDEPLOY_ERROR("{}", allocator_.size());
}
Block Allocate(size_t size) noexcept override {
auto index = (size - min_size_) / step_size_;
if (0 <= index && index < allocator_.size()) {
return allocator_[index]->Allocate(size);
}
return Block{};
}
void Deallocate(Block& block) noexcept override {
auto index = (block.size - min_size_) / step_size_;
if (0 <= index && index < allocator_.size()) {
return allocator_[index]->Deallocate(block);
}
}
bool Owns(const Block& block) const noexcept override {
return min_size_ <= block.size && block.size < max_size_;
}
private:
std::vector<AllocatorImplPtr> allocator_;
size_t min_size_;
size_t max_size_;
size_t step_size_;
};
inline AllocatorImplPtr CreateFallback(AllocatorImplPtr primary, AllocatorImplPtr fallback) {
return std::make_shared<Fallback>(std::move(primary), std::move(fallback));
}
inline AllocatorImplPtr CreateStats(const std::string& name, AllocatorImplPtr allocator) {
return std::make_shared<Stats>(std::move(allocator), name);
}
inline AllocatorImplPtr CreatePool(size_t min_size, size_t max_size, unsigned int pool_size,
AllocatorImplPtr allocator) {
return std::make_shared<Pool>(std::move(allocator), min_size, max_size, pool_size);
}
inline AllocatorImplPtr CreateSegregator(size_t threshold, AllocatorImplPtr small,
AllocatorImplPtr large) {
return std::make_shared<Segregator>(threshold, std::move(small), std::move(large));
}
inline AllocatorImplPtr CreateBucketizer(size_t min_size, size_t max_size, size_t step_size,
const Bucketizer::AllocatorCreator& creator) {
return std::make_shared<Bucketizer>(creator, min_size, max_size, step_size);
}
inline AllocatorImplPtr CreatePoolBucketizer(size_t min_size, size_t max_size, size_t step_size,
unsigned pool_size,
const AllocatorImplPtr& allocator) {
auto creator = [&](size_t lo, size_t hi) {
return std::make_shared<Locked>(CreatePool(lo, hi, pool_size, allocator));
};
return CreateBucketizer(min_size, max_size, step_size, creator);
}
} // namespace mmdeploy::framework::device_allocator
#endif // MMDEPLOY_SRC_CORE_DEVICE_ALLOCATOR_H_
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_execution)
set(SRCS schedulers/schedulers.cpp)
mmdeploy_add_module(${PROJECT_NAME} LIBRARY "${SRCS}")
add_library(mmdeploy::execution ALIAS ${PROJECT_NAME})
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/brycelelbach/wg21_p2300_std_execution/blob/main/include/execution.hpp
#ifndef MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_BULK_H_
#define MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_BULK_H_
#include "closure.h"
#include "concepts.h"
#include "mmdeploy/core/logger.h"
#include "utility.h"
namespace mmdeploy {
namespace __bulk {
template <typename CvrefSender, typename Shape, typename Func, typename Receiver>
struct _Operation {
struct type;
};
template <typename CvrefSender, typename Shape, typename Func, typename Receiver>
using Operation = typename _Operation<CvrefSender, Shape, Func, remove_cvref_t<Receiver>>::type;
template <typename Receiver, typename Shape, typename Func>
struct _Receiver {
struct type;
};
template <typename Receiver, typename Shape, typename Func>
using receiver_t = typename _Receiver<Receiver, Shape, Func>::type;
template <typename Receiver, typename Shape, typename Func>
struct _Receiver<Receiver, Shape, Func>::type {
Receiver receiver_;
Shape shape_;
Func func_;
template <class... As>
friend void tag_invoke(set_value_t, type&& self, As&&... as) noexcept {
MMDEPLOY_DEBUG("fallback Bulk implementation");
for (Shape i = 0; i < self.shape_; ++i) {
self.func_(i, as...);
}
SetValue(std::move(self.receiver_), (As &&) as...);
}
};
template <typename CvrefSender, typename Shape, typename Func, typename Receiver>
struct _Operation<CvrefSender, Shape, Func, Receiver>::type {
connect_result_t<CvrefSender, receiver_t<Receiver, Shape, Func>> op_state2_;
friend void tag_invoke(start_t, type& self) { Start(self.op_state2_); }
};
template <typename Sender, typename Shape, typename Func>
struct _Sender {
struct type;
};
template <typename Sender, typename Shape, typename Func>
using sender_t = typename _Sender<remove_cvref_t<Sender>, remove_cvref_t<Shape>, Func>::type;
template <typename Sender, typename Shape, typename Func>
struct _Sender<Sender, Shape, Func>::type {
using value_types = completion_signatures_of_t<Sender>;
template <typename Receiver>
using _receiver_t = receiver_t<Receiver, Shape, Func>;
Sender sender_;
Shape shape_;
Func func_;
template <typename Self, typename Receiver, _decays_to<Self, type, int> = 0>
friend auto tag_invoke(connect_t, Self&& self, Receiver&& receiver)
-> Operation<_copy_cvref_t<Self, Sender>, Shape, Func, Receiver> {
return {Connect(((Self &&) self).sender_,
_receiver_t<Receiver>{(Receiver &&) receiver, ((Self &&) self).shape_,
((Self &&) self).func_})};
}
};
using std::enable_if_t;
struct bulk_t {
template <typename Sender, typename Shape, typename Func,
enable_if_t<_is_sender<Sender> &&
_tag_invocable_with_completion_scheduler<bulk_t, Sender, Shape, Func>,
int> = 0>
auto operator()(Sender&& sender, Shape&& shape, Func func) const {
auto scheduler = GetCompletionScheduler(sender);
return tag_invoke(bulk_t{}, std::move(scheduler), (Sender &&) sender, (Shape &&) shape,
(Func &&) func);
}
template <
typename Sender, typename Shape, typename Func,
enable_if_t<_is_sender<Sender> &&
!_tag_invocable_with_completion_scheduler<bulk_t, Sender, Shape, Func> &&
tag_invocable<bulk_t, Sender, Shape, Func>,
int> = 0>
auto operator()(Sender&& sender, Shape&& shape, Func func) const {
return tag_invoke(bulk_t{}, (Sender &&) sender, (Shape &&) shape, (Func &&) func);
}
template <
typename Sender, typename Shape, typename Func,
enable_if_t<_is_sender<Sender> &&
!_tag_invocable_with_completion_scheduler<bulk_t, Sender, Shape, Func> &&
!tag_invocable<bulk_t, Sender, Shape, Func>,
int> = 0>
auto operator()(Sender&& sender, Shape&& shape, Func func) const
-> sender_t<Sender, Shape, Func> {
return {(Sender &&) sender, (Shape &&) shape, std::move(func)};
}
template <typename Shape, typename Func>
_BinderBack<bulk_t, Shape, Func> operator()(Shape shape, Func fun) const {
return {{}, {}, {shape, std::move(fun)}};
}
};
} // namespace __bulk
using __bulk::bulk_t;
inline constexpr bulk_t Bulk{};
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_BULK_H_
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/brycelelbach/wg21_p2300_std_execution/blob/main/include/execution.hpp
#include <utility>
#include "concepts.h"
#include "utility.h"
#ifndef MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_CLOSURE_H_
#define MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_CLOSURE_H_
namespace mmdeploy {
namespace __closure {
template <class D>
struct SenderAdaptorClosure;
} // namespace __closure
using __closure::SenderAdaptorClosure;
namespace __closure {
template <typename T0, typename T1>
struct _Compose : SenderAdaptorClosure<_Compose<T0, T1>> {
T0 t0_;
T1 t1_;
template <typename Sender, std::enable_if_t<_is_sender<Sender>, int> = 0>
std::invoke_result_t<T1, std::invoke_result_t<T0, Sender>> operator()(Sender&& sender) && {
return ((T1 &&) t1_)(((T0 &&) t0_)((Sender &&) sender));
}
template <typename Sender, std::enable_if_t<_is_sender<Sender>, int> = 0>
std::invoke_result_t<T1, std::invoke_result_t<T0, Sender>> operator()(Sender&& sender) const& {
return t1_(t0_((Sender &&) sender));
}
};
template <typename D>
struct SenderAdaptorClosure {};
template <typename T0, typename T1,
typename = std::enable_if_t<
std::is_base_of_v<SenderAdaptorClosure<remove_cvref_t<T0>>, remove_cvref_t<T0>> &&
std::is_base_of_v<SenderAdaptorClosure<remove_cvref_t<T1>>, remove_cvref_t<T1>>>>
_Compose<remove_cvref_t<T0>, remove_cvref_t<T1>> operator|(T0&& t0, T1&& t1) {
return {(T0 &&) t0, (T1 &&) t1};
}
template <typename Sender, typename Closure,
typename = std::enable_if_t<
_is_sender<Sender> && std::is_base_of_v<SenderAdaptorClosure<remove_cvref_t<Closure>>,
remove_cvref_t<Closure>>>>
std::invoke_result_t<Closure, Sender> operator|(Sender&& sender, Closure&& closure) {
return ((Closure &&) closure)((Sender &&) sender);
}
template <typename Func, typename... As>
struct _BinderBack : SenderAdaptorClosure<_BinderBack<Func, As...>> {
Func func_;
std::tuple<As...> as_;
template <typename Sender, std::enable_if_t<_is_sender<Sender>, int> = 0>
std::invoke_result_t<Func, Sender, As...> operator()(Sender&& sender) && {
return std::apply(
[&sender, this](As&... as) { return ((Func &&) func_)((Sender &&) sender, (As &&) as...); },
as_);
}
template <typename Sender, std::enable_if_t<_is_sender<Sender>, int> = 0>
std::invoke_result_t<Func, Sender, As...> operator()(Sender&& sender) const& {
return std::apply([&sender, this](const As&... as) { return func_((Sender &&) sender, as...); },
as_);
}
};
} // namespace __closure
using __closure::_BinderBack;
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_CLOSURE_H_
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_CONCEPTS_H_
#define MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_CONCEPTS_H_
#include "tag_invoke.h"
namespace mmdeploy {
namespace _get_completion_signatures {
struct get_completion_signatures_t {
template <typename Sender, typename ValueTypes = typename remove_cvref_t<Sender>::value_types>
constexpr identity<ValueTypes> operator()(Sender&& sender) const noexcept {
return {};
}
};
} // namespace _get_completion_signatures
using _get_completion_signatures::get_completion_signatures_t;
inline constexpr get_completion_signatures_t GetCompletionSignatures{};
template <typename Sender>
inline constexpr bool _is_sender = std::is_invocable_v<get_completion_signatures_t, Sender>&&
std::is_move_constructible_v<remove_cvref_t<Sender>>;
// GetCompletionSignatures is expected to return identity<std::tuple<Types...>>;
template <typename Sender>
using completion_signatures_of_t =
typename std::invoke_result_t<get_completion_signatures_t, Sender>::type;
namespace _set_value {
struct set_value_t {
template <typename Receiver, typename... Args,
std::enable_if_t<is_tag_invocable_v<set_value_t, Receiver, Args...>, int> = 0>
void operator()(Receiver&& receiver, Args&&... args) const noexcept {
static_assert(is_nothrow_tag_invocable_v<set_value_t, Receiver, Args...>);
(void)tag_invoke(set_value_t{}, (Receiver &&) receiver, (Args &&) args...);
}
};
} // namespace _set_value
using _set_value::set_value_t;
inline constexpr set_value_t SetValue{};
namespace _start {
struct start_t {
template <typename Operation, std::enable_if_t<tag_invocable<start_t, Operation&>, int> = 0>
void operator()(Operation& op_state) const
noexcept(is_nothrow_tag_invocable_v<start_t, Operation&>) {
(void)tag_invoke(start_t{}, op_state);
}
};
} // namespace _start
using _start::start_t;
inline constexpr start_t Start{};
namespace _connect {
struct connect_t {
template <typename Sender, typename Receiver,
std::enable_if_t<is_tag_invocable_v<connect_t, Sender, Receiver>, int> = 0>
auto operator()(Sender&& sender, Receiver&& receiver) const
-> tag_invoke_result_t<connect_t, Sender, Receiver> {
return tag_invoke(connect_t{}, (Sender &&) sender, (Receiver &&) receiver);
}
};
} // namespace _connect
using _connect::connect_t;
inline constexpr connect_t Connect{};
namespace _get_completion_scheduler {
struct get_completion_scheduler_t {
template <
typename Sender,
std::enable_if_t<is_tag_invocable_v<get_completion_scheduler_t, const Sender&>, int> = 0>
auto operator()(const Sender& sender) const noexcept
-> tag_invoke_result_t<get_completion_scheduler_t, const Sender&> {
return tag_invoke(get_completion_scheduler_t{}, sender);
}
};
} // namespace _get_completion_scheduler
using _get_completion_scheduler::get_completion_scheduler_t;
inline constexpr get_completion_scheduler_t GetCompletionScheduler{};
template <typename Sender>
inline constexpr bool _has_completion_scheduler_v =
std::is_invocable_v<get_completion_scheduler_t, Sender>;
template <typename Sender>
struct _has_completion_scheduler : std::bool_constant<_has_completion_scheduler_v<Sender>> {};
template <typename Sender>
using _completion_scheduler_for = std::invoke_result_t<get_completion_scheduler_t, Sender>;
namespace impl {
template <typename Func, typename Sender, typename TArgs, typename SFINAE = void>
struct _tag_invocable_with_completion_scheduler : std::false_type {};
template <typename Func, typename Sender, typename... Args>
struct _tag_invocable_with_completion_scheduler<
Func, Sender, std::tuple<Args...>, std::enable_if_t<_has_completion_scheduler_v<Sender>>>
: is_tag_invocable<Func, _completion_scheduler_for<Sender>, Sender, Args...> {};
} // namespace impl
template <typename Func, typename Sender, typename... Args>
inline constexpr bool _tag_invocable_with_completion_scheduler =
impl::_tag_invocable_with_completion_scheduler<Func, Sender, std::tuple<Args...>>::value;
template <typename T, typename SFINAE = void>
struct _is_range : std::false_type {};
template <typename T>
struct _is_range<T,
std::void_t<decltype(std::begin(std::declval<T>()), std::end(std::declval<T>()))>>
: std::true_type {};
template <typename T>
inline constexpr bool _is_range_v = _is_range<T>::value;
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_EXPERIMENTAL_EXECUTION_CONCEPTS_H_
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_EXECUTION_DYNAMIC_BATCH_H_
#define MMDEPLOY_CSRC_EXECUTION_DYNAMIC_BATCH_H_
#include <atomic>
#include "mmdeploy/execution/then.h"
#include "mmdeploy/execution/utility.h"
namespace mmdeploy {
namespace _dynamic_batch {
struct dynamic_batch_t {
struct context_base_t {
void (*destroy_)(context_base_t*);
};
struct context_t {
std::atomic<context_base_t*> base{};
~context_t() {
if (auto p = base.load()) {
p->destroy_(p);
}
}
};
template <typename Sender, typename Func,
std::enable_if_t<
_tag_invocable_with_completion_scheduler<dynamic_batch_t, Sender, context_t&, Func>,
int> = 0>
auto operator()(Sender&& sender, context_t& context, Func func) const {
auto scheduler = GetCompletionScheduler(sender);
return tag_invoke(*this, std::move(scheduler), (Sender &&) sender, context, std::move(func));
}
template <typename Sender, typename Func,
std::enable_if_t<!_tag_invocable_with_completion_scheduler<dynamic_batch_t, Sender,
context_t&, Func> &&
tag_invocable<dynamic_batch_t, Sender, context_t&, Func>,
int> = 0>
auto operator()(Sender&& sender, context_t& context, Func func) const {
return tag_invoke(*this, (Sender &&) sender, context, std::move(func));
}
template <typename Sender, typename Context, typename Func,
std::enable_if_t<
!_tag_invocable_with_completion_scheduler<dynamic_batch_t, Sender, Context, Func> &&
!tag_invocable<dynamic_batch_t, Sender, Context, Func>,
int> = 0>
auto operator()(Sender&& sender, Context&&, Func func) const {
return Then((Sender &&) sender, std::move(func));
}
};
} // namespace _dynamic_batch
using _dynamic_batch::dynamic_batch_t;
inline constexpr dynamic_batch_t DynamicBatch{};
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_EXECUTION_DYNAMIC_BATCH_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment