Commit a715222c authored by yuguo's avatar yuguo
Browse files

0.9.1-rocm

parent f262efc9
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_PYTHON_CASTER_SIZE_H_
#define ONEFLOW_API_PYTHON_CASTER_SIZE_H_
#include <type_traits>
#include <Python.h>
#include <pybind11/pybind11.h>
#include "oneflow/api/python/framework/size.h"
#include "oneflow/core/common/shape.h"
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
class shape : public object {
public:
PYBIND11_OBJECT_CVT(shape, object, oneflow::TensorSize_Check, raw_shape)
explicit shape(size_t size = 0) : object(oneflow::TensorSize_New((ssize_t)size), stolen_t{}) {
if (!m_ptr) pybind11_fail("Could not allocate tensor size object!");
}
size_t size() const { return (size_t)PyTuple_Size(m_ptr); }
bool empty() const { return size() == 0; }
detail::tuple_accessor operator[](size_t index) const { return {*this, index}; }
detail::item_accessor operator[](handle h) const { return object::operator[](h); }
detail::tuple_iterator begin() const { return {*this, 0}; }
detail::tuple_iterator end() const { return {*this, PyTuple_GET_SIZE(m_ptr)}; }
private:
static PyObject* raw_shape(PyObject* op) {
if (oneflow::TensorSize_Check(op)) return handle(op).inc_ref().ptr();
return PyObject_CallFunctionObjArgs((PyObject*)&oneflow::TensorSize_Type, op, NULL);
}
};
PYBIND11_NAMESPACE_BEGIN(detail)
template<typename T>
struct shape_type_caster {
public:
bool load(handle src, bool convert) {
value_ = nullptr;
if (src && src.is_none()) { return true; }
if (!oneflow::TensorSize_Check(src.ptr())) { return false; }
value_ = std::make_shared<T>(oneflow::TensorSize_AsShape(src.ptr()));
return true;
}
template<typename U>
static handle cast(U&& src, return_value_policy /*policy*/, handle /*parent*/) {
return cast_impl(std::forward<U>(src));
}
template<typename U>
static handle cast(U* src, return_value_policy policy, handle parent) {
if (!src) { return none().release(); }
return cast(*src, policy, parent);
}
operator T*() { return value_.get(); }
operator T&() { return *value_; }
operator T&&() && { return std::move(*value_); }
operator std::shared_ptr<T>*() { return &value_; }
operator std::shared_ptr<T>&() { return value_; }
operator std::shared_ptr<T>&&() && { return std::move(value_); }
static constexpr auto name = _("shape");
template<typename U>
using cast_op_type = pybind11::detail::cast_op_type<std::shared_ptr<T>>;
private:
static handle cast_impl(const oneflow::Shape& src) {
return reinterpret_steal<shape>(oneflow::TensorSize_NewFromShape(src)).release();
}
static handle cast_impl(const std::shared_ptr<const oneflow::Shape>& src) {
return reinterpret_steal<shape>(oneflow::TensorSize_NewFromShape(*src)).release();
}
protected:
std::shared_ptr<T> value_;
};
template<>
struct type_caster<oneflow::Shape> : public shape_type_caster<oneflow::Shape> {};
template<>
struct type_caster<std::shared_ptr<oneflow::Shape>> : public shape_type_caster<oneflow::Shape> {};
template<>
struct type_caster<std::shared_ptr<const oneflow::Shape>>
: public shape_type_caster<const oneflow::Shape> {};
PYBIND11_NAMESPACE_END(detail)
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
#endif // ONEFLOW_API_PYTHON_CASTER_SIZE_H_
......@@ -13,6 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_PYTHON_CASTER_TENSOR_H_
#define ONEFLOW_API_PYTHON_CASTER_TENSOR_H_
#include <pybind11/pybind11.h>
#include "oneflow/api/python/caster/common.h"
......@@ -100,3 +103,5 @@ struct type_caster<std::shared_ptr<const oneflow::one::Parameter>>
} // namespace detail
} // namespace pybind11
#endif // ONEFLOW_API_PYTHON_CASTER_TENSOR_H_
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/api/python/dlpack/dlpack.h"
#include "oneflow/api/python/exception/exception.h"
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/common/data_type.h"
#include "oneflow/core/eager/eager_blob_object.h"
#include "oneflow/core/framework/tensor.h"
#include "oneflow/core/framework/device.h"
#include "oneflow/core/framework/tensor_util.h"
namespace oneflow {
Maybe<Symbol<Device>> ToOneFlowDevice(const DLDevice& ctx) {
switch (ctx.device_type) {
case DLDeviceType::kDLCPU: return JUST(Device::New("cpu"));
#if defined(WITH_CUDA) || defined(WITH_ROCM)
case DLDeviceType::kDLCUDA: return JUST(Device::New("cuda", ctx.device_id));
#endif
default: UNIMPLEMENTED_THEN_RETURN() << "Unsupported device type: " << ctx.device_type;
}
}
Maybe<DataType> ToOneFlowDataType(const DLDataType& dtype) {
DataType ofdtype = DataType::kInvalidDataType;
CHECK_EQ_OR_RETURN(dtype.lanes, 1) << "OneFlow does not support lanes != 1";
switch (dtype.code) {
case DLDataTypeCode::kDLUInt:
switch (dtype.bits) {
case 8: ofdtype = DataType::kUInt8; break;
default:
UNIMPLEMENTED_THEN_RETURN() << "Unsupported data type: " << dtype.code << dtype.bits;
}
break;
case DLDataTypeCode::kDLInt:
switch (dtype.bits) {
case 8: ofdtype = DataType::kInt8; break;
case 16: ofdtype = DataType::kInt16; break;
case 32: ofdtype = DataType::kInt32; break;
case 64: ofdtype = DataType::kInt64; break;
default:
UNIMPLEMENTED_THEN_RETURN() << "Unsupported data type: " << dtype.code << dtype.bits;
}
break;
case DLDataTypeCode::kDLFloat:
switch (dtype.bits) {
case 16: ofdtype = DataType::kFloat16; break;
case 32: ofdtype = DataType::kFloat; break;
case 64: ofdtype = DataType::kDouble; break;
default:
UNIMPLEMENTED_THEN_RETURN() << "Unsupported data type: " << dtype.code << dtype.bits;
}
break;
case DLDataTypeCode::kDLBfloat:
switch (dtype.bits) {
case 16: ofdtype = DataType::kBFloat16; break;
default: UNIMPLEMENTED_THEN_RETURN() << "Unsupported data type: bfloat" << dtype.bits;
}
break;
case DLDataTypeCode::kDLComplex:
UNIMPLEMENTED_THEN_RETURN() << "Unsupported data type: complex" << dtype.bits;
break;
default: UNIMPLEMENTED_THEN_RETURN() << "Unsupported code " << dtype.code;
}
CHECK_NE_OR_RETURN(ofdtype, DataType::kInvalidDataType);
return ofdtype;
}
Maybe<one::Tensor> fromDLPack(const DLManagedTensor* src) {
using namespace one;
const auto& dl_tensor = src->dl_tensor;
Symbol<Device> device = JUST(ToOneFlowDevice(dl_tensor.device));
DataType dtype = JUST(ToOneFlowDataType(dl_tensor.dtype));
// Build TensorMeta
const Shape shape(dl_tensor.shape, dl_tensor.shape + dl_tensor.ndim);
Symbol<LocalTensorMeta> tensor_meta;
if (dl_tensor.strides) {
const auto stride = Stride(dl_tensor.strides, dl_tensor.strides + dl_tensor.ndim);
tensor_meta = SymbolOf(LocalTensorMeta(shape, stride, dtype, device));
} else {
tensor_meta = SymbolOf(LocalTensorMeta(shape, dtype, device));
}
// Build TensorBuffer
const auto& Free = [src](char* dptr) {
if (src->deleter) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
src->deleter(const_cast<DLManagedTensor*>(src));
}
};
size_t array_size_in_bytes = shape.elem_cnt() * GetSizeOfDataType(dtype);
auto tensor_data = std::make_shared<vm::OutsideVmTensorStorage>();
tensor_data->set_blob_dptr(
std::unique_ptr<char, std::function<void(char*)>>(static_cast<char*>(dl_tensor.data), Free),
array_size_in_bytes);
// Build TensorStorage: decrease ndarray reference count before releasing
auto tensor_storage = std::make_shared<TensorStorage>(tensor_data);
// Build Tensor
auto tensor_impl = std::make_shared<EagerLocalTensorImpl>(tensor_storage,
/*requires_grad=*/false,
/*ls_leaf=*/true);
// Init blob
JUST(tensor_impl->InitEagerBlobObject(tensor_meta, NewLocalDepObject()));
const auto& stream = JUST(GetDefaultStreamByDevice(device));
const auto& eager_blob_object = JUST(tensor_impl->eager_blob_object());
JUST(eager_blob_object->init_producer_stream(stream));
eager_blob_object->set_last_used_stream(stream);
return std::static_pointer_cast<Tensor>(std::make_shared<LocalTensor>(tensor_impl));
}
Maybe<DLDevice> ToDLDevice(Symbol<Device> ofdevice) {
DLDevice ctx;
ctx.device_id = ofdevice->device_id();
switch (ofdevice->enum_type()) {
case DeviceType::kCPU: ctx.device_type = DLDeviceType::kDLCPU; break;
#if defined(WITH_CUDA) || defined(WITH_ROCM)
case DeviceType::kCUDA: ctx.device_type = DLDeviceType::kDLCUDA; break;
#endif
default: UNIMPLEMENTED_THEN_RETURN() << "Unsupported device type: " << ofdevice->type();
}
return ctx;
}
Maybe<DLDataType> ToDLDataType(DataType ofdtype) {
DLDataType dtype;
dtype.lanes = 1;
dtype.bits = GetSizeOfDataType(ofdtype) * 8;
switch (ofdtype) {
case DataType::kUInt8: dtype.code = DLDataTypeCode::kDLUInt; break;
case DataType::kInt8: dtype.code = DLDataTypeCode::kDLInt; break;
case DataType::kInt16: dtype.code = DLDataTypeCode::kDLInt; break;
case DataType::kInt32: dtype.code = DLDataTypeCode::kDLInt; break;
case DataType::kInt64: dtype.code = DLDataTypeCode::kDLInt; break;
case DataType::kFloat16: dtype.code = DLDataTypeCode::kDLFloat; break;
case DataType::kFloat: dtype.code = DLDataTypeCode::kDLFloat; break;
case DataType::kDouble: dtype.code = DLDataTypeCode::kDLFloat; break;
case DataType::kBFloat16: dtype.code = DLDataTypeCode::kDLBfloat; break;
default: UNIMPLEMENTED_THEN_RETURN() << "Unsupported data type: " << DataType_Name(ofdtype);
}
return dtype;
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
struct ATenDLMTensor {
std::shared_ptr<one::Tensor> handle;
DLManagedTensor tensor;
};
void deleter(DLManagedTensor* arg) { delete static_cast<ATenDLMTensor*>(arg->manager_ctx); }
Maybe<DLManagedTensor*> toDLPack(const std::shared_ptr<one::Tensor>& src) {
auto shape = *src->shape();
auto strides = *JUST(src->stride());
// create a new tensor with possibly normalized strides
// Reference:
// https://github.com/pytorch/pytorch/issues/83069
// https://github.com/pytorch/pytorch/issues/82610
for (int i = 0; i < src->ndim(); i++) {
if (shape[i] <= 1) { strides[i] = 1; }
}
ATenDLMTensor* atDLMTensor(new ATenDLMTensor);
atDLMTensor->handle = src;
atDLMTensor->tensor.manager_ctx = atDLMTensor;
atDLMTensor->tensor.deleter = &deleter;
JUST(one::SyncAccessTensorWithTimeOut(
src,
[&](ep::Stream*, const std::shared_ptr<vm::EagerBlobObject>& tensor) {
atDLMTensor->tensor.dl_tensor.data = tensor->mut_raw_dptr();
},
"const"));
auto dldevice = JUST(ToDLDevice(JUST(src->device())));
auto dldtype = JUST(ToDLDataType(src->dtype()->data_type()));
atDLMTensor->tensor.dl_tensor.device = *dldevice;
atDLMTensor->tensor.dl_tensor.ndim = src->ndim();
atDLMTensor->tensor.dl_tensor.dtype = *dldtype;
atDLMTensor->tensor.dl_tensor.shape =
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
const_cast<int64_t*>(src->shape()->data());
atDLMTensor->tensor.dl_tensor.strides =
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
const_cast<int64_t*>(JUST(src->stride())->data());
atDLMTensor->tensor.dl_tensor.byte_offset = 0;
return &(atDLMTensor->tensor);
}
// This function is mostly copied from PyTorch
void DLPack_Capsule_Destructor(PyObject* data) {
if (likely(!PyCapsule_IsValid(data, "dltensor"))) {
// early out, see DLPack spec: if a consuming library sets the capsule
// name to something else, they own it and we don't need to do anything
return;
}
HANDLE_ERRORS
// Causes overheads for validity checks again, but this case is rare
// since consuming libraries should rename the capsule according to spec.
// Note that this cannot set a python error (we checked validity above),
// so we don't need to handle python error state here.
DLManagedTensor* dlMTensor = (DLManagedTensor*)PyCapsule_GetPointer(data, "dltensor");
// the dlMTensor has not been consumed, call deleter ourselves.
// DLPack spec mentions that deleter may be NULL, but deleter from
// `flow.to_dlpack` is never NULL, so no need for an additional check here.
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
dlMTensor->deleter(const_cast<DLManagedTensor*>(dlMTensor));
END_HANDLE_ERRORS_RET()
}
namespace py = pybind11;
ONEFLOW_API_PYBIND11_MODULE("", m) {
m.def("to_dlpack", [](const std::shared_ptr<one::Tensor>& tensor) -> Maybe<py::capsule> {
DLManagedTensor* dlMTensor = JUST(toDLPack(tensor));
return py::capsule(dlMTensor, "dltensor", DLPack_Capsule_Destructor);
});
// from_dlpack is exported in tensor_api.yaml
}
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/api/python/dlpack/dlpack.h"
#include "oneflow/core/common/maybe.h"
namespace oneflow {
namespace one {
class Tensor;
}
Maybe<one::Tensor> fromDLPack(const DLManagedTensor* src);
Maybe<DLManagedTensor*> toDLPack(const std::shared_ptr<one::Tensor>& src);
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*!
* Copyright (c) 2017 by Contributors
* \file dlpack.h
* \brief The common header of DLPack.
*/
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_
/**
* \brief Compatibility with C++
*/
#ifdef __cplusplus
#define DLPACK_EXTERN_C extern "C"
#else
#define DLPACK_EXTERN_C
#endif
/*! \brief The current version of dlpack */
#define DLPACK_VERSION 70
/*! \brief The current ABI version of dlpack */
#define DLPACK_ABI_VERSION 1
/*! \brief DLPACK_DLL prefix for windows */
#ifdef _WIN32
#ifdef DLPACK_EXPORTS
#define DLPACK_DLL __declspec(dllexport)
#else
#define DLPACK_DLL __declspec(dllimport)
#endif
#else
#define DLPACK_DLL
#endif
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \brief The device type in DLDevice.
*/
#ifdef __cplusplus
typedef enum : int32_t {
#else
typedef enum {
#endif
/*! \brief CPU device */
kDLCPU = 1,
/*! \brief CUDA GPU device */
kDLCUDA = 2,
/*!
* \brief Pinned CUDA CPU memory by cudaMallocHost
*/
kDLCUDAHost = 3,
/*! \brief OpenCL devices. */
kDLOpenCL = 4,
/*! \brief Vulkan buffer for next generation graphics. */
kDLVulkan = 7,
/*! \brief Metal for Apple GPU. */
kDLMetal = 8,
/*! \brief Verilog simulator buffer */
kDLVPI = 9,
/*! \brief ROCm GPUs for AMD GPUs */
kDLROCM = 10,
/*!
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
*/
kDLROCMHost = 11,
/*!
* \brief Reserved extension device type,
* used for quickly test extension device
* The semantics can differ depending on the implementation.
*/
kDLExtDev = 12,
/*!
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
*/
kDLCUDAManaged = 13,
/*!
* \brief Unified shared memory allocated on a oneAPI non-partititioned
* device. Call to oneAPI runtime is required to determine the device
* type, the USM allocation type and the sycl context it is bound to.
*
*/
kDLOneAPI = 14,
/*! \brief GPU support for next generation WebGPU standard. */
kDLWebGPU = 15,
/*! \brief Qualcomm Hexagon DSP */
kDLHexagon = 16,
} DLDeviceType;
/*!
* \brief A Device for Tensor and operator.
*/
typedef struct {
/*! \brief The device type used in the device. */
DLDeviceType device_type;
/*!
* \brief The device index.
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
*/
int32_t device_id;
} DLDevice;
/*!
* \brief The type code options DLDataType.
*/
typedef enum {
/*! \brief signed integer */
kDLInt = 0U,
/*! \brief unsigned integer */
kDLUInt = 1U,
/*! \brief IEEE floating point */
kDLFloat = 2U,
/*!
* \brief Opaque handle type, reserved for testing purposes.
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
*/
kDLOpaqueHandle = 3U,
/*! \brief bfloat16 */
kDLBfloat = 4U,
/*!
* \brief complex number
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
} DLDataTypeCode;
/*!
* \brief The data type the tensor can hold. The data type is assumed to follow the
* native endian-ness. An explicit error message should be raised when attempting to
* export an array with non-native endianness
*
* Examples
* - float: type_code = 2, bits = 32, lanes=1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
* - int8: type_code = 0, bits = 8, lanes=1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
*/
typedef struct {
/*!
* \brief Type code of base types.
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
* footprint, but the value should be one of DLDataTypeCode enum values.
* */
uint8_t code;
/*!
* \brief Number of bits, common choices are 8, 16, 32.
*/
uint8_t bits;
/*! \brief Number of lanes in the type, used for vector types. */
uint16_t lanes;
} DLDataType;
/*!
* \brief Plain C Tensor object, does not manage memory.
*/
typedef struct {
/*!
* \brief The data pointer points to the allocated data. This will be CUDA
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
* types. This pointer is always aligned to 256 bytes as in CUDA. The
* `byte_offset` field should be used to point to the beginning of the data.
*
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
* (after which this note will be updated); at the moment it is recommended
* to not rely on the data pointer being correctly aligned.
*
* For given DLTensor, the size of memory required to store the contents of
* data is calculated as follows:
*
* \code{.c}
* static inline size_t GetDataSize(const DLTensor* t) {
* size_t size = 1;
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
* size *= t->shape[i];
* }
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
* return size;
* }
* \endcode
*/
void* data;
/*! \brief The device of the tensor */
DLDevice device;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief The data type of the pointer*/
DLDataType dtype;
/*! \brief The shape of the tensor */
int64_t* shape;
/*!
* \brief strides of the tensor (in number of elements, not bytes)
* can be NULL, indicating tensor is compact and row-majored.
*/
int64_t* strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
} DLTensor;
/*!
* \brief C Tensor object, manage memory of DLTensor. This data structure is
* intended to facilitate the borrowing of DLTensor by another framework. It is
* not meant to transfer the tensor. When the borrowing framework doesn't need
* the tensor, it should call the deleter to notify the host that the resource
* is no longer needed.
*/
typedef struct DLManagedTensor {
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
/*! \brief the context of the original host framework of DLManagedTensor in
* which DLManagedTensor is used in the framework. It can also be NULL.
*/
void* manager_ctx;
/*! \brief Destructor signature void (*)(void*) - this should be called
* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
* if there is no way for the caller to provide a reasonable destructor.
* The destructors deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensor* self);
} DLManagedTensor;
#ifdef __cplusplus
} // DLPACK_EXTERN_C
#endif
#endif // DLPACK_DLPACK_H_
......@@ -22,7 +22,9 @@ ONEFLOW_API_PYBIND11_MODULE("eager", m) {
using namespace oneflow;
namespace py = pybind11;
m.def(
"Sync", []() { return vm::ClusterSync(); }, py::call_guard<py::gil_scoped_release>());
"Sync", []() { return vm::CurrentRankSync(); }, py::call_guard<py::gil_scoped_release>());
m.def(
"ClusterSync", []() { return vm::ClusterSync(); }, py::call_guard<py::gil_scoped_release>());
py::class_<one::DevVmDepObjectConsumeModeGuard,
std::shared_ptr<one::DevVmDepObjectConsumeModeGuard>>(
......
......@@ -18,30 +18,85 @@ limitations under the License.
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/job/env_global_objects_scope.h"
#include "oneflow/core/common/singleton.h"
#include "oneflow/core/job/graph_scope_vars.h"
#include "oneflow/core/vm/vm_util.h"
#include "oneflow/core/vm/virtual_machine.h"
#include "oneflow/core/framework/shut_down_util.h"
#include "oneflow/core/device/cuda_util.h"
#ifdef WITH_CUDA
#include <cuda.h>
#endif // WITH_CUDA
#ifdef WITH_ROCM
#include <hip/hip_runtime.h>
#endif // WITH_ROCM
namespace py = pybind11;
namespace oneflow {
#ifdef WITH_CUDA
void RegisterCudaDeviceProperties(py::module& m) {
py::class_<cudaDeviceProp>(m, "_CudaDeviceProperties", py::module_local())
.def(py::init<>())
.def_readonly("name", &cudaDeviceProp::name)
.def_readonly("major", &cudaDeviceProp::major)
.def_readonly("minor", &cudaDeviceProp::minor)
.def_readonly("is_multi_gpu_board", &cudaDeviceProp::isMultiGpuBoard)
.def_readonly("is_integrated", &cudaDeviceProp::integrated)
.def_readonly("multi_processor_count", &cudaDeviceProp::multiProcessorCount)
.def_readonly("total_memory", &cudaDeviceProp::totalGlobalMem)
.def("__repr__", [](const cudaDeviceProp& prop) {
std::ostringstream stream;
stream << "_CudaDeviceProperties(name='" << prop.name << "', major=" << prop.major
<< ", minor=" << prop.minor
<< ", total_memory=" << prop.totalGlobalMem / (1024 * 1024)
<< "MB, multi_processor_count=" << prop.multiProcessorCount << ")";
return stream.str();
});
}
#endif // WITH_CUDA
#ifdef WITH_ROCM
void RegisterCudaDeviceProperties(py::module& m) {
py::class_<hipDeviceProp_t>(m, "_CudaDeviceProperties", py::module_local())
.def(py::init<>())
.def_readonly("name", &hipDeviceProp_t::name)
.def_readonly("major", &hipDeviceProp_t::major)
.def_readonly("minor", &hipDeviceProp_t::minor)
.def_readonly("is_multi_gpu_board", &hipDeviceProp_t::isMultiGpuBoard)
.def_readonly("is_integrated", &hipDeviceProp_t::integrated)
.def_readonly("multi_processor_count", &hipDeviceProp_t::multiProcessorCount)
.def_readonly("total_memory", &hipDeviceProp_t::totalGlobalMem)
.def("__repr__", [](const hipDeviceProp_t& prop) {
std::ostringstream stream;
stream << "_CudaDeviceProperties(name='" << prop.name << "', major=" << prop.major
<< ", minor=" << prop.minor
<< ", total_memory=" << prop.totalGlobalMem / (1024 * 1024)
<< "MB, multi_processor_count=" << prop.multiProcessorCount << ")";
return stream.str();
});
}
#endif // WITH_ROCM
Maybe<void> SwitchToShuttingDownPhase(EnvGlobalObjectsScope* env, bool is_normal_exit) {
JUST(env->init_is_normal_exit(is_normal_exit));
SetShuttingDown(true);
if (is_normal_exit) {
JUST(vm::ClusterSync());
auto* vm = JUST(SingletonMaybe<VirtualMachine>());
JUST(vm->CloseVMThreads());
}
JUST(env->init_is_normal_exit(is_normal_exit));
SetShuttingDown(true);
return Maybe<void>::Ok();
}
ONEFLOW_API_PYBIND11_MODULE("", m) {
m.def("CurrentResource", &CurrentResource);
m.def("EnvResource", &EnvResource);
m.def("EnableEagerEnvironment", &EnableEagerEnvironment);
py::class_<oneflow::EnvGlobalObjectsScope, std::shared_ptr<oneflow::EnvGlobalObjectsScope>>(
m, "EnvContext")
......@@ -57,19 +112,30 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
m.def("GetLocalRank", &GetLocalRank);
m.def("InitRDMA", &InitRDMA);
m.def("RDMAIsInitialized", &RDMAIsInitialized);
m.def("DestoryRDMA", &DestoryRDMA);
m.def("CudaGetDeviceCount", &CudaGetDeviceCount);
m.def("EmptyCache", &EmptyCache);
#ifdef WITH_CUDA
RegisterCudaDeviceProperties(m);
m.def("GetCudaDeviceIndex", &GetCudaDeviceIndex);
m.def("SetCudaDeviceIndex", &SetCudaDeviceIndex);
m.def("CudaSynchronize", &CudaSynchronize);
m.def("GetCUDAMemoryUsed", &GetCUDAMemoryUsed);
m.def(
"_get_device_properties",
[](int device) -> cudaDeviceProp* { return GetDeviceProperties(device); },
py::return_value_policy::reference);
#endif // WITH_CUDA
#ifdef WITH_ROCM
RegisterCudaDeviceProperties(m);
m.def("GetCudaDeviceIndex", &GetCudaDeviceIndex);
m.def("SetCudaDeviceIndex", &SetCudaDeviceIndex);
m.def("CudaSynchronize", &CudaSynchronize);
m.def("GetCUDAMemoryUsed", &GetCUDAMemoryUsed);
m.def(
"_get_device_properties",
[](int device) -> hipDeviceProp_t* { return GetDeviceProperties(device); },
py::return_value_policy::reference);
#endif // WITH_ROCM
m.def("SetFLAGS_alsologtostderr", &SetFLAGS_alsologtostderr);
m.def("GetFLAGS_alsologtostderr", &GetFLAGS_alsologtostderr);
......@@ -81,6 +147,10 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
m.def("GetGraphDebugMaxPyStackDepth", &GetGraphDebugMaxPyStackDepth);
m.def("SetGraphDebugMode", &SetGraphDebugMode);
m.def("GetGraphDebugMode", &GetGraphDebugMode);
m.def("SetGraphDebugOnlyUserPyStack", &SetGraphDebugOnlyUserPyStack);
m.def("GetGraphDebugOnlyUserPyStack", &GetGraphDebugOnlyUserPyStack);
m.def("InitPythonPathsToBeKeptAndFilteredForDebugging",
&InitPythonPathsToBeKeptAndFilteredForDebugging);
}
} // namespace oneflow
......@@ -20,7 +20,6 @@ limitations under the License.
#include <google/protobuf/text_format.h>
#include "oneflow/core/common/protobuf.h"
#include "oneflow/core/common/singleton.h"
#include "oneflow/core/job/cluster.h"
#include "oneflow/core/job/cluster_instruction.h"
#include "oneflow/core/job/env_global_objects_scope.h"
#include "oneflow/core/job/global_for.h"
......@@ -44,12 +43,6 @@ inline Maybe<std::string> EnvResource() {
return PbMessage2TxtString(Singleton<ResourceDesc, ForEnv>::Get()->resource());
}
inline Maybe<void> EnableEagerEnvironment(bool enable_eager_execution) {
CHECK_NOTNULL_OR_RETURN((Singleton<bool, EagerExecution>::Get()));
*Singleton<bool, EagerExecution>::Get() = enable_eager_execution;
return Maybe<void>::Ok();
}
inline Maybe<long long> CurrentMachineId() { return GlobalProcessCtx::Rank(); }
inline Maybe<int64_t> GetRank() { return GlobalProcessCtx::Rank(); }
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <pybind11/pybind11.h>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/common/throw.h"
#include "oneflow/core/framework/autocast.h"
namespace py = pybind11;
namespace oneflow {
class AutoCastMode {
public:
OF_DISALLOW_COPY_AND_MOVE(AutoCastMode);
AutoCastMode(const std::string& device_type, Symbol<DType> dtype, bool enabled,
bool cache_enabled)
: prev_enabled_(autocast::is_enabled()),
prev_cache_enabled_(autocast::is_autocast_cache_enabled()),
prev_device_type_(autocast::get_autocast_device_type()),
prev_dtype_(autocast::get_autocast_dtype()),
prev_gpu_dtype_(autocast::get_autocast_gpu_dtype()),
prev_cpu_dtype_(autocast::get_autocast_cpu_dtype()) {
// update autocast state
autocast::set_enabled(enabled);
autocast::set_autocast_cache_enabled(cache_enabled);
if (device_type == "cpu") {
autocast::set_autocast_device_type(kCPU);
autocast::set_autocast_dtype(dtype);
autocast::set_autocast_cpu_dtype(dtype);
} else if (device_type == "cuda") {
autocast::set_autocast_device_type(kCUDA);
autocast::set_autocast_dtype(dtype);
autocast::set_autocast_gpu_dtype(dtype);
} else {
THROW(RuntimeError) << "User specified autocast device_type must be 'cuda' or 'cpu'";
}
}
~AutoCastMode() {
autocast::set_enabled(prev_enabled_);
autocast::set_autocast_cache_enabled(prev_cache_enabled_);
autocast::set_autocast_device_type(prev_device_type_);
autocast::set_autocast_dtype(prev_dtype_);
autocast::set_autocast_gpu_dtype(prev_gpu_dtype_);
autocast::set_autocast_cpu_dtype(prev_cpu_dtype_);
}
private:
bool prev_enabled_;
bool prev_cache_enabled_;
DeviceType prev_device_type_;
Symbol<DType> prev_dtype_;
Symbol<DType> prev_gpu_dtype_;
Symbol<DType> prev_cpu_dtype_;
};
ONEFLOW_API_PYBIND11_MODULE("", m) {
py::class_<AutoCastMode, std::shared_ptr<AutoCastMode>>(m, "AutoCastMode")
.def(py::init([](const std::string& device_type, Symbol<DType> dtype, bool enabled,
bool cache_enabled) {
return std::make_shared<AutoCastMode>(device_type, dtype, enabled, cache_enabled);
}));
m.def("is_autocast_enabled", autocast::is_enabled);
m.def("set_autocast_enabled", autocast::set_enabled);
m.def("get_autocast_gpu_dtype", autocast::get_autocast_gpu_dtype);
m.def("get_autocast_cpu_dtype", autocast::get_autocast_cpu_dtype);
m.def("set_autocast_gpu_dtype", autocast::set_autocast_gpu_dtype);
m.def("set_autocast_cpu_dtype", autocast::set_autocast_cpu_dtype);
m.def("is_autocast_cache_enabled", autocast::is_autocast_cache_enabled);
m.def("set_autocast_cache_enabled", autocast::set_autocast_cache_enabled);
m.def("clear_autocast_cache", autocast::clear_cache);
}
} // namespace oneflow
......@@ -20,6 +20,7 @@ limitations under the License.
#include "oneflow/core/framework/device.h"
#include "oneflow/core/common/str_util.h"
#include "oneflow/core/control/global_process_ctx.h"
#include "oneflow/core/ep/include/device.h"
namespace py = pybind11;
......@@ -39,6 +40,10 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
.def("__repr__", [](const Symbol<Device>& d) { return d->ToRepr(); })
.def(py::self == py::self)
.def(py::hash(py::self));
m.def(
"max_alignment_size", []() { return ep::kMaxAlignmentRequirement; },
py::return_value_policy::copy);
}
} // namespace oneflow
......@@ -16,7 +16,10 @@ limitations under the License.
#include <pybind11/pybind11.h>
#include <pybind11/operators.h>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/api/python/framework/tensortype.h"
#include "oneflow/api/python/functional/common.h"
#include "oneflow/core/framework/dtype.h"
namespace py = pybind11;
namespace oneflow {
......@@ -66,6 +69,19 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
m.attr("complex32") = &CHECK_JUST(DType::Get(DataType::kComplex32));
m.attr("complex64") = &CHECK_JUST(DType::Get(DataType::kComplex64));
m.attr("complex128") = &CHECK_JUST(DType::Get(DataType::kComplex128));
py::options options;
options.disable_function_signatures();
m.def("get_default_dtype", []() { return GetDefaultDType(); });
m.def("set_default_dtype",
[](const Symbol<DType>& dtype) { SetDefaultDType(dtype).GetOrThrow(); });
m.def("set_default_tensor_type", [](const py::object& tensor_type) {
if (one::PyTensorType_Check(tensor_type.ptr())) {
CHECK_JUST(SetDefaultDType(one::PyTensorType_UnpackDType(tensor_type.ptr())));
} else {
throw py::type_error("invalid type object");
}
});
}
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <pybind11/pybind11.h>
#include <string>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/job/foreign_callback.h"
namespace py = pybind11;
namespace oneflow {
class PyForeignCallback : public ForeignCallback {
public:
// Inherit the constructors
using ForeignCallback::ForeignCallback;
// Trampoline (need one for each virtual function)
void OfBlobCall(int64_t unique_id, int64_t ofblob_ptr) const override {
PYBIND11_OVERRIDE(void, /* Return type */
ForeignCallback, /* Parent class */
OfBlobCall, /* Name of function in C++ (must match Python name) */
unique_id, ofblob_ptr /* Argument(s) */
);
}
void RemoveForeignCallback(int64_t unique_id) const override {
PYBIND11_OVERRIDE(void, ForeignCallback, RemoveForeignCallback, unique_id);
}
};
} // namespace oneflow
ONEFLOW_API_PYBIND11_MODULE("", m) {
using namespace oneflow;
py::class_<ForeignCallback, PyForeignCallback, std::shared_ptr<ForeignCallback>>(
m, "ForeignCallback")
.def(py::init<>())
.def("OfBlobCall", &ForeignCallback::OfBlobCall)
.def("RemoveForeignCallback", &ForeignCallback::RemoveForeignCallback);
}
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <pybind11/pybind11.h>
#include <string>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/job/foreign_watcher.h"
namespace py = pybind11;
namespace oneflow {
class PyForeignWatcher : public ForeignWatcher {
public:
using ForeignWatcher::ForeignWatcher;
void Call(const std::string& handler_uuid, int64_t ofblob_ptr) const override {
PYBIND11_OVERRIDE(void, ForeignWatcher, Call, handler_uuid, ofblob_ptr);
}
};
} // namespace oneflow
ONEFLOW_API_PYBIND11_MODULE("", m) {
using namespace oneflow;
py::class_<ForeignWatcher, PyForeignWatcher, std::shared_ptr<ForeignWatcher>>(m, "ForeignWatcher")
.def(py::init<>())
.def("Call", &ForeignWatcher::Call);
}
......@@ -25,30 +25,11 @@ namespace py = pybind11;
namespace oneflow {
ONEFLOW_API_PYBIND11_MODULE("", m) {
m.def("RegisterGlobalForeignCallback", &RegisterGlobalForeignCallback);
m.def("DestroyGlobalForeignCallback", &DestroyGlobalForeignCallback);
m.def("RegisterGlobalWatcher", &RegisterGlobalWatcher);
m.def("LaunchJob", &LaunchJob, py::call_guard<py::gil_scoped_release>());
m.def("GetSerializedInterUserJobInfo",
[]() -> Maybe<py::bytes> { return py::bytes(*JUST(GetSerializedInterUserJobInfo())); });
m.def("GetSerializedJobSet",
[]() -> Maybe<py::bytes> { return py::bytes(*JUST(GetSerializedJobSet())); });
m.def("GetSerializedStructureGraph", &GetSerializedStructureGraph /* a prototxt saved to file*/);
m.def("GetSerializedCurrentJob",
[]() -> Maybe<py::bytes> { return py::bytes(*JUST(GetSerializedCurrentJob())); });
m.def("GetFunctionConfigDef", &GetFunctionConfigDef);
m.def("GetScopeConfigDef", &GetScopeConfigDef);
m.def("GetMachine2DeviceIdListOFRecordFromParallelConf",
&GetSerializedMachineId2DeviceIdListOFRecord);
m.def("LoadSavedModel",
[](const std::string& saved_model_meta_file, bool is_prototxt_file) -> Maybe<py::bytes> {
return py::bytes(*JUST(LoadSavedModel(saved_model_meta_file, is_prototxt_file)));
});
m.def("EagerExecutionEnabled", EagerExecutionEnabled);
m.def("LoadLibrary", &LoadLibrary);
}
......
......@@ -25,82 +25,14 @@ limitations under the License.
#include "oneflow/core/job/job_build_and_infer_ctx_mgr.h"
#include "oneflow/core/job/job_desc.h"
#include "oneflow/core/job/inter_user_job_info.pb.h"
#include "oneflow/core/job/foreign_callback.h"
#include "oneflow/core/job/foreign_watcher.h"
#include "oneflow/core/job/job_instance.h"
#include "oneflow/core/job/oneflow.h"
#include "oneflow/core/job/placement.pb.h"
#include "oneflow/core/framework/config_def.h"
#include "oneflow/core/framework/load_library.h"
#include "oneflow/core/serving/saved_model.pb.h"
namespace oneflow {
inline Maybe<void> RegisterGlobalForeignCallback(const std::shared_ptr<ForeignCallback>& callback) {
CHECK_ISNULL_OR_RETURN(Singleton<std::shared_ptr<ForeignCallback>>::Get())
<< "foreign callback registered";
// Singleton<T>::SetAllocated is preferred since Singleton<T>::New will output logs but
// glog is not constructed yet.
Singleton<std::shared_ptr<ForeignCallback>>::SetAllocated(
new std::shared_ptr<ForeignCallback>(callback));
return Maybe<void>::Ok();
}
inline Maybe<void> DestroyGlobalForeignCallback() {
if (Singleton<std::shared_ptr<ForeignCallback>>::Get()) {
Singleton<std::shared_ptr<ForeignCallback>>::Delete();
}
return Maybe<void>::Ok();
}
inline Maybe<void> RegisterGlobalWatcher(const std::shared_ptr<ForeignWatcher>& watcher) {
CHECK_ISNULL_OR_RETURN(Singleton<std::shared_ptr<ForeignWatcher>>::Get())
<< "foreign watcher registered";
// Singleton<T>::SetAllocated is preferred since Singleton<T>::New will output logs but
// glog is not constructed yet.
Singleton<std::shared_ptr<ForeignWatcher>>::SetAllocated(
new std::shared_ptr<ForeignWatcher>(watcher));
return Maybe<void>::Ok();
}
inline Maybe<void> LaunchJob(const std::shared_ptr<oneflow::JobInstance>& cb) {
CHECK_OR_RETURN(GlobalProcessCtx::IsThisProcessMaster());
CHECK_NOTNULL_OR_RETURN(Singleton<Oneflow>::Get());
const auto& job_name = cb->job_name();
auto* buffer_mgr = Singleton<BufferMgr<std::shared_ptr<JobInstance>>>::Get();
int64_t job_id = Singleton<JobName2JobId>::Get()->at(job_name);
if (IsPullJob(job_name, *Singleton<InterUserJobInfo>::Get())) {
buffer_mgr->Get(GetForeignOutputBufferName(job_name))->Push(cb);
}
if (IsPushJob(job_name, *Singleton<InterUserJobInfo>::Get())) {
buffer_mgr->Get(GetForeignInputBufferName(job_name))->Push(cb);
}
buffer_mgr->Get(GetCallbackNotifierBufferName(job_name))->Push(cb);
Singleton<BufferMgr<int64_t>>::Get()->Get(kBufferNameGlobalWaitJobId)->Push(job_id);
return Maybe<void>::Ok();
}
inline Maybe<std::string> GetSerializedStructureGraph() {
const auto* job_ctx_mgr = Singleton<LazyJobBuildAndInferCtxMgr>::Get();
CHECK_NOTNULL_OR_RETURN(job_ctx_mgr);
return job_ctx_mgr->structure_graph();
}
inline Maybe<std::string> GetSerializedInterUserJobInfo() {
CHECK_OR_RETURN(GlobalProcessCtx::IsThisProcessMaster());
CHECK_NOTNULL_OR_RETURN(Singleton<Oneflow>::Get());
CHECK_NOTNULL_OR_RETURN(Singleton<InterUserJobInfo>::Get());
return Singleton<InterUserJobInfo>::Get()->SerializeAsString();
}
inline Maybe<const JobSet&> GetJobSet() {
auto* job_ctx_mgr = JUST(GlobalJobBuildAndInferCtxMgr());
CHECK_NOTNULL_OR_RETURN(job_ctx_mgr);
return job_ctx_mgr->job_set();
}
inline Maybe<std::string> GetSerializedJobSet() { return JUST(GetJobSet()).SerializeAsString(); }
inline Maybe<std::string> GetSerializedCurrentJob() {
auto* job_ctx_mgr = Singleton<LazyJobBuildAndInferCtxMgr>::Get();
CHECK_NOTNULL_OR_RETURN(job_ctx_mgr);
......@@ -130,17 +62,6 @@ inline Maybe<std::string> GetSerializedMachineId2DeviceIdListOFRecord(
return PbMessage2TxtString(*JUST(ParseMachineAndDeviceIdList(parallel_conf)));
}
inline Maybe<std::string> LoadSavedModel(const std::string& saved_model_meta_file,
bool is_prototxt_file) {
SavedModel saved_model_proto;
if (is_prototxt_file) {
CHECK_OR_RETURN(TryParseProtoFromTextFile(saved_model_meta_file, &saved_model_proto));
} else {
CHECK_OR_RETURN(TryParseProtoFromPbFile(saved_model_meta_file, &saved_model_proto));
}
return saved_model_proto.SerializeAsString();
}
inline Maybe<void> LoadLibraryNow(const std::string& lib_path) { return LoadLibrary(lib_path); }
} // namespace oneflow
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/common/symbol.h"
#include "oneflow/core/common/throw.h"
#include "oneflow/core/framework/nd_sbp.h"
#include "oneflow/core/job/global_mode.h"
namespace py = pybind11;
namespace oneflow {
ONEFLOW_API_PYBIND11_MODULE("global_view", m) {
py::class_<GlobalMode::Guard, std::shared_ptr<GlobalMode::Guard>>(m, "global_mode")
.def(py::init([](const bool enabled) {
if (enabled) {
THROW(RuntimeError) << "To enable global mode, placement and sbp must be provided.";
}
return std::make_shared<GlobalMode::Guard>(enabled);
}))
.def(py::init([](const bool enabled, const Symbol<ParallelDesc>& placement,
const std::vector<Symbol<SbpParallel>>& sbp) {
if (!enabled) {
THROW(RuntimeError)
<< "To disable global mode, placement and sbp must not be provided.";
}
return std::make_shared<GlobalMode::Guard>(enabled, CHECK_JUST(GetNdSbp(sbp)),
placement);
}),
py::arg("enabled").none(false), py::arg("placement").none(false),
py::arg("sbp").none(false))
.def(py::init([](const bool enabled, const Symbol<ParallelDesc>& placement,
const Symbol<SbpParallel>& sbp) {
return std::make_shared<GlobalMode::Guard>(enabled, CHECK_JUST(SbpToNdSbp(sbp)),
placement);
}),
py::arg("enabled").none(false), py::arg("placement").none(false),
py::arg("sbp").none(false))
.def("__enter__", [](const GlobalMode::Guard& guard_obj) {})
.def("__exit__", [](const GlobalMode::Guard& guard_obj, const py::object& type,
const py::object& value, const py::object& traceback) {});
py::class_<GlobalMode, std::shared_ptr<GlobalMode>>(m, "current_global_mode")
.def(py::init([]() { return std::make_shared<GlobalMode>(); }))
.def_property_readonly("is_enabled", [](const GlobalMode& gm) { return gm.is_enabled(); })
.def_property_readonly("sbp",
[](const GlobalMode& gm) {
if (!gm.is_enabled()) {
THROW(RuntimeError)
<< "Current global mode is disabled, there is no sbp.";
}
const auto& nd_sbp = gm.nd_sbp();
auto tuple = py::tuple(nd_sbp->sbp_parallel_size());
for (int i = 0; i < nd_sbp->sbp_parallel_size(); ++i) {
tuple[i] = SymbolOf(nd_sbp->sbp_parallel(i));
}
return tuple;
})
.def_property_readonly("placement", [](const GlobalMode& gm) {
if (!gm.is_enabled()) {
THROW(RuntimeError) << "Current global mode is disabled, there is no placement.";
}
return gm.parallel_desc();
});
}
} // namespace oneflow
......@@ -44,29 +44,29 @@ ONEFLOW_API_PYBIND11_MODULE("deprecated", m) {
[](const std::shared_ptr<InstructionsBuilder>& builder, int64_t session_id,
const std::string& job_conf_str, const std::string& device_tag,
const std::vector<std::string>& machine_device_ids,
const std::shared_ptr<Shape>& hierarchy, bool is_mirrored) -> Maybe<Scope> {
const std::shared_ptr<Shape>& hierarchy, bool is_local) -> Maybe<Scope> {
JobConfigProto job_conf;
CHECK_OR_RETURN(TxtString2PbMessage(job_conf_str, &job_conf))
<< Error::RuntimeError() << "job conf parse failed";
return builder->BuildInitialScope(session_id, job_conf, device_tag, machine_device_ids,
hierarchy, is_mirrored);
hierarchy, is_local);
},
py::arg("session_id").none(false), py::arg("job_conf_str").none(false),
py::arg("device_tag").none(false), py::arg("machine_device_ids").none(false),
py::arg("hierarchy").none(true), py::arg("is_mirrored").none(false))
py::arg("hierarchy").none(true), py::arg("is_local").none(false))
.def(
"BuildInitialScopeWithPlacement",
[](const std::shared_ptr<InstructionsBuilder>& builder, int64_t session_id,
const std::string& job_conf_str, Symbol<ParallelDesc> placement,
bool is_mirrored) -> Maybe<Scope> {
bool is_local) -> Maybe<Scope> {
JobConfigProto job_conf;
CHECK_OR_RETURN(TxtString2PbMessage(job_conf_str, &job_conf))
<< Error::RuntimeError() << "job conf parse failed";
return builder->BuildInitialScopeWithPlacement(session_id, job_conf, placement,
is_mirrored);
is_local);
},
py::arg("session_id").none(false), py::arg("job_conf_str").none(false),
py::arg("placement").none(false), py::arg("is_mirrored").none(false))
py::arg("placement").none(false), py::arg("is_local").none(false))
.def("BuildScopeWithNewParallelDesc", &InstructionsBuilder::BuildScopeWithNewParallelDesc,
py::arg("scope").none(false), py::arg("device_tag").none(false),
py::arg("machine_device_ids").none(false), py::arg("hierarchy").none(true))
......@@ -79,7 +79,7 @@ ONEFLOW_API_PYBIND11_MODULE("deprecated", m) {
<< Error::RuntimeError() << "parallel conf parse failed";
return builder->BuildScopeWithNewParallelConf(scope, parallel_conf);
})
.def("BuildScopeWithNewIsMirrored", &InstructionsBuilder::BuildScopeWithNewIsMirrored)
.def("BuildScopeWithNewIsLocal", &InstructionsBuilder::BuildScopeWithNewIsLocal)
.def("BuildScopeWithNewScopeName", &InstructionsBuilder::BuildScopeWithNewScopeName)
.def("BuildScopeByProtoStrSetter", &InstructionsBuilder::BuildScopeByProtoStrSetter);
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <pybind11/pybind11.h>
#include <string>
#include <memory>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/job/job_instance.h"
namespace py = pybind11;
namespace oneflow {
class PyJobInstance : public JobInstance {
public:
// Inherit the constructors
using JobInstance::JobInstance;
// Trampoline (need one for each virtual function)
std::string job_name() const override {
PYBIND11_OVERRIDE(std::string, /* Return type */
JobInstance, /* Parent class */
job_name, /* Name of function in C++ (must match Python name) */
);
}
std::string sole_input_op_name_in_user_job() const override {
PYBIND11_OVERRIDE(std::string, JobInstance, sole_input_op_name_in_user_job, );
}
std::string sole_output_op_name_in_user_job() const override {
PYBIND11_OVERRIDE(std::string, JobInstance, sole_output_op_name_in_user_job, );
}
void PushBlob(uint64_t ofblob_ptr) const override {
PYBIND11_OVERRIDE(void, JobInstance, PushBlob, ofblob_ptr);
}
void PullBlob(uint64_t ofblob_ptr) const override {
PYBIND11_OVERRIDE(void, JobInstance, PullBlob, ofblob_ptr);
}
void Finish() const override { PYBIND11_OVERRIDE(void, JobInstance, Finish, ); }
};
} // namespace oneflow
ONEFLOW_API_PYBIND11_MODULE("", m) {
using namespace oneflow;
py::class_<JobInstance, PyJobInstance, std::shared_ptr<JobInstance>>(m, "JobInstance")
.def(py::init<>())
.def("job_name", &JobInstance::job_name)
.def("sole_input_op_name_in_user_job", &JobInstance::sole_input_op_name_in_user_job)
.def("sole_output_op_name_in_user_job", &JobInstance::sole_output_op_name_in_user_job)
.def("PushBlob", &JobInstance::PushBlob)
.def("PullBlob", &JobInstance::PullBlob)
.def("Finish", &JobInstance::Finish);
}
......@@ -86,19 +86,30 @@ ONEFLOW_API_PYBIND11_MODULE("nn.graph.", m) {
m.def("RunLazyNNGraph", &RunLazyNNGraph);
m.def("SoftSyncNNGraphBuffers", &SoftSyncNNGraphBuffers);
m.def("AddTensorAsGraphLoss", &AddTensorAsGraphLoss);
m.def("MarkVariableGradients", [](const std::vector<std::shared_ptr<one::Tensor>>& variables,
const std::vector<std::shared_ptr<one::Tensor>>& gradients) {
one::TensorTuple variable_tuple(variables.size());
one::TensorTuple gradient_tuple(gradients.size());
for (int i = 0; i < variables.size(); ++i) { variable_tuple[i] = variables[i]; }
for (int i = 0; i < gradients.size(); ++i) { gradient_tuple[i] = gradients[i]; }
return MarkVariableGradients(variable_tuple, gradient_tuple);
});
m.def("ConvertJobToTosaIR", [](const std::string& serialized_job) -> Maybe<std::string> {
Job job;
CHECK_OR_RETURN(TxtString2PbMessage(serialized_job, &job))
<< "serialized job conversion failed.";
CHECK_OR_RETURN(job.ParseFromString(serialized_job)) << "serialized job conversion failed.";
return ConvertJobToTosaIR(&job);
});
m.def("SaveJobToIR",
[](const std::string& serialized_job, const std::string& path) -> Maybe<void> {
Job job;
CHECK_OR_RETURN(TxtString2PbMessage(serialized_job, &job))
<< "serialized job conversion failed.";
return SaveJobToIR(&job, path);
});
m.def(
"SaveJobToIR", [](const std::string& serialized_job, const std::string& path) -> Maybe<void> {
Job job;
CHECK_OR_RETURN(job.ParseFromString(serialized_job)) << "serialized job conversion failed.";
return SaveJobToIR(&job, path);
});
m.def("ConvertJobToIR", [](const std::string& serialized_job) -> Maybe<std::string> {
Job job;
CHECK_OR_RETURN(job.ParseFromString(serialized_job)) << "serialized job conversion failed.";
return ConvertJobToIR(&job);
});
m.def("LoadSerializedJobFromIR", [](const std::string& path) -> Maybe<py::bytes> {
Job job;
JUST(LoadJobFromIR(&job, path));
......
......@@ -222,6 +222,7 @@ class PersistentTableReaderImpl : public PersistentTableReader {
options.value_size = storage_dim * sizeof(Value);
options.target_chunk_size_mb = target_chunk_size_mb;
options.physical_block_size = physical_block_size;
options.read_only = true;
tables_[i] = NewPersistentTable(options);
iterators_[i] =
std::unique_ptr<PersistentTable::Iterator>(tables_[i]->ReadSnapshot(snapshot_name));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment