Commit a715222c authored by yuguo's avatar yuguo
Browse files

0.9.1-rocm

parent f262efc9
......@@ -28,9 +28,10 @@ inline Maybe<void> FillVariableTensorMgr(
auto mgr = Singleton<VariableTensorMgr>::Get();
return mgr->Fill(variable_op_names, variable_tensors);
}
inline void ClearVariableTensorMgr() {
inline void ResetVariableTensorMgr() {
auto mgr = Singleton<VariableTensorMgr>::Get();
mgr->Clear();
mgr->Reset();
}
inline std::tuple<std::vector<std::string>, std::vector<std::shared_ptr<one::Tensor>>>
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/api/cpp/embedding/embedding.h"
#include "oneflow/core/embedding/embedding_manager.h"
namespace oneflow_api {
namespace embedding {
std::string CreateKeyValueStore(const std::string& key_value_store_options, int64_t local_rank_id,
int64_t rank_id, int64_t world_size) {
oneflow::embedding::KeyValueStoreOptions options(key_value_store_options);
#if defined(WITH_CUDA) || defined(WITH_ROCM)
oneflow::Singleton<oneflow::embedding::EmbeddingManager>::Get()->CreateKeyValueStore(
options, local_rank_id, rank_id, world_size);
return options.Name();
#else
UNIMPLEMENTED() << "OneEmbedding Only Support with CUDA";
#endif
return "";
}
void LoadSnapshot(const std::string& snapshot_name, const std::string& embedding_name,
int64_t local_rank_id, int64_t rank_id) {
#if defined(WITH_CUDA) || defined(WITH_ROCM)
oneflow::Singleton<oneflow::embedding::EmbeddingManager>::Get()->LoadSnapshot(
embedding_name, local_rank_id, rank_id, snapshot_name);
#else
UNIMPLEMENTED() << "OneEmbedding Only Support with CUDA";
#endif
}
} // namespace embedding
} // namespace oneflow_api
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_CPP_ONE_EMBEDDING_ONE_EMBEDDING_H_
#define ONEFLOW_API_CPP_ONE_EMBEDDING_ONE_EMBEDDING_H_
#include <string>
namespace oneflow_api {
namespace embedding {
// CreateKeyValueStore returns embedding name in the options.
std::string CreateKeyValueStore(const std::string& key_value_store_options, int64_t local_rank_id,
int64_t rank_id,
int64_t world_size); // key_value_store_options is
// a serialized json string.
void LoadSnapshot(const std::string& snapshot_name, const std::string& embedding_name,
int64_t local_rank_id, int64_t rank_id);
} // namespace embedding
} // namespace oneflow_api
#endif // ONEFLOW_API_CPP_ONE_EMBEDDING_ONE_EMBEDDING_H_
......@@ -18,7 +18,7 @@ limitations under the License.
#include "oneflow/api/cpp/env.h"
#include "oneflow/api/cpp/env_impl.h"
#include "oneflow/core/framework/shut_down_util.h"
#include "oneflow/core/thread/thread_consistent_id.h"
#include "oneflow/core/thread/thread_global_id.h"
namespace oneflow_api {
void initialize() {
......@@ -29,7 +29,7 @@ void initialize() {
void release() {
if (of::Singleton<OneFlowEnv>::Get() != nullptr) { of::Singleton<OneFlowEnv>::Delete(); }
of::SetShuttingDown();
of::ResetThisThreadUniqueConsistentId().GetOrThrow();
of::ResetThisThreadUniqueGlobalId().GetOrThrow();
}
} // namespace oneflow_api
......@@ -107,6 +107,9 @@ void CompleteEnvProto(of::EnvProto& env_proto) {
if (HasEnvVar("GLOG_logbuflevel")) {
cpp_logging_conf->set_logbuflevel(GetEnvVar("GLOG_logbuflevel", -1));
}
if (HasEnvVar("GLOG_minloglevel")) {
cpp_logging_conf->set_minloglevel(GetEnvVar("GLOG_minloglevel", -1));
}
}
} // namespace
......@@ -119,15 +122,15 @@ OneFlowEnv::OneFlowEnv() {
of::ConfigProto config_proto;
config_proto.mutable_resource()->set_cpu_device_num(1); // useless, will be set in TryInit
const int64_t session_id = of::NewSessionId();
CHECK_JUST(of::RegsiterSession(session_id));
config_proto.set_session_id(session_id);
CHECK(of::RegsterSessionId(session_id));
session_ctx_ = std::make_shared<of::MultiClientSessionContext>(env_ctx_);
CHECK_JUST(session_ctx_->TryInit(config_proto));
}
OneFlowEnv::~OneFlowEnv() {
session_ctx_.reset();
CHECK(of::ClearSessionId(CHECK_JUST(of::GetDefaultSessionId())));
env_ctx_.reset();
}
......
......@@ -13,8 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/api/common/ofblob.h"
#include "nlohmann/json.hpp"
#include "oneflow/api/common/variable_tensor_mgr.h"
#include "oneflow/api/cpp/env_impl.h"
#include "oneflow/api/cpp/framework/device.h"
......@@ -23,6 +22,7 @@ limitations under the License.
#include "oneflow/api/cpp/framework/ivalue.h"
#include "oneflow/api/cpp/framework/shape.h"
#include "oneflow/api/cpp/framework/tensor.h"
#include "oneflow/api/cpp/embedding/embedding.h"
#include "oneflow/api/common/job_build_and_infer_ctx.h"
#include "oneflow/api/python/job_build/job_build_and_infer.h"
#include "oneflow/core/common/data_type.pb.h"
......@@ -32,6 +32,8 @@ limitations under the License.
#include "oneflow/core/common/shape.h"
#include "oneflow/core/common/symbol.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/embedding/posix_file.h"
#include "oneflow/core/eager/eager_blob_object.h"
#include "oneflow/core/framework/device.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/framework/multi_client_session_context.h"
......@@ -52,6 +54,8 @@ limitations under the License.
#include "oneflow/core/job/parallel_desc.h"
#include "oneflow/core/job/scope.h"
#include "oneflow/core/job/session.h"
#include "oneflow/core/kernel/kernel_util.h"
#include "oneflow/core/memory/memory_case_util.h"
#include "oneflow/core/operator/interface_blob_conf.pb.h"
#include "oneflow/core/operator/op_conf.pb.h"
#include "oneflow/core/register/logical_blob_id.pb.h"
......@@ -108,6 +112,30 @@ Shape OfShapeToOfApiShape(const of::Shape& of_shape) {
return Shape(dims);
}
#ifdef __linux__
void LoadOneEmbedding(const std::string& model_path, const Device& device) {
const std::string one_embedding_info_name("one_embedding_options.json");
const std::string one_embedding_info_save_path(
oneflow::JoinPath(model_path, one_embedding_info_name));
if (oneflow::embedding::PosixFile::FileExists(one_embedding_info_save_path)) {
std::ifstream one_embedding_info_file(one_embedding_info_save_path);
auto one_embedding_json = nlohmann::json::parse(one_embedding_info_file);
for (auto& it : one_embedding_json["embedding"]) {
const std::string snapshot_path = it["snapshot"];
auto kv_options_json = it["kv_options"];
std::string embedding_name = embedding::CreateKeyValueStore(kv_options_json.dump(),
/*local_rank_id=*/0,
/*rank_id=*/0,
/*world_size=*/1);
embedding::LoadSnapshot(snapshot_path, embedding_name, /*local_rank_id=*/0,
/*rank_id=*/0);
}
}
}
#endif // __linux__
} // namespace
class Graph::GraphImpl final {
......@@ -202,6 +230,9 @@ IValue Graph::Forward(const IValue& inputs) {
void Graph::set_batch_size(int batch_size) { graph_->set_batch_size(batch_size); }
Graph Graph::Load(const std::string& model_path, const Device& device) {
#ifdef __linux__
LoadOneEmbedding(model_path, device);
#endif // __linux__
Graph graph(model_path, device);
return graph;
}
......@@ -306,7 +337,7 @@ of::Maybe<void> Graph::GraphImpl::AddOp(of::OperatorConf op_conf) {
0, batch_size_);
}
auto* ctx = JUST(of::GetCurInferCtx());
JUST(ctx->AddAndInferConsistentOp(op_conf));
JUST(ctx->AddAndInferGlobalOp(op_conf));
return of::Maybe<void>::Ok();
}
......@@ -374,11 +405,12 @@ of::Maybe<void> Graph::GraphImpl::LoadCheckpoint() {
ss << variable_file.rdbuf();
return ss.str();
}();
const auto& callback = [&](uint64_t of_blob_ptr) {
CHECK_JUST(of::BlobBufferCopyUtil<void>::From(
of_blob_ptr, buffer.data(),
variable_tensor->shape()->elem_cnt()
* of::GetSizeOfDataType(variable_tensor->dtype()->data_type())));
const auto& callback = [&](of::ep::Stream* stream,
const std::shared_ptr<of::vm::EagerBlobObject>& eager_blob_object) {
of::AutoMemcpy(stream, eager_blob_object->mut_dptr(), buffer.data(),
variable_tensor->shape()->elem_cnt()
* of::GetSizeOfDataType(variable_tensor->dtype()->data_type()),
eager_blob_object->mem_case(), of::memory::MakeHostMemCase());
};
JUST(of::one::SyncAccessTensorWithTimeOut(variable_tensor, callback, "mut"));
}
......
......@@ -21,9 +21,8 @@ limitations under the License.
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/job/lazy_mode.h"
#include "oneflow/core/kernel/kernel_util.h"
#include "oneflow/core/framework/instructions_builder.h"
#include "oneflow/core/register/ofblob.h"
#include "oneflow/api/common/ofblob.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/vm/virtual_machine.h"
......@@ -68,14 +67,14 @@ Device Tensor::device() const {
DType Tensor::dtype() const { return static_cast<DType>(tensor_->dtype()->data_type()); }
void Tensor::zeros_() {
std::shared_ptr<of::one::MirroredTensor> local_tensor =
tensor_->AsMirroredTensor().GetPtrOrThrow();
std::shared_ptr<of::one::LocalTensor> local_tensor = tensor_->AsLocalTensor().GetPtrOrThrow();
of::PhysicalRun([&](of::InstructionsBuilder* builder) -> of::Maybe<void> {
JUST(builder->AccessBlobByCallback(
local_tensor,
[](uint64_t of_blob_ptr) {
auto* of_blob = reinterpret_cast<of::OfBlob*>(of_blob_ptr);
of_blob->AsyncAutoMemset(0);
[](of::ep::Stream* stream,
const std::shared_ptr<of::vm::EagerBlobObject>& eager_blob_object) {
of::AutoMemset(stream, eager_blob_object->mut_dptr(), 0,
eager_blob_object->ByteSizeOfBlobBody(), eager_blob_object->mem_case());
},
"mut"));
return of::Maybe<void>::Ok();
......@@ -85,14 +84,16 @@ void Tensor::zeros_() {
Tensor Tensor::from_buffer(const void* buffer, const Shape& shape, const Device& device,
const DType& dtype) {
Tensor tensor(shape, device, dtype);
std::shared_ptr<of::one::MirroredTensor> local_tensor =
tensor.tensor_->AsMirroredTensor().GetPtrOrThrow();
std::shared_ptr<of::one::LocalTensor> local_tensor =
tensor.tensor_->AsLocalTensor().GetPtrOrThrow();
of::PhysicalRun([&](of::InstructionsBuilder* builder) -> of::Maybe<void> {
return builder->AccessBlobByCallback(
local_tensor,
[buffer, shape, dtype](uint64_t ofblob_ptr) {
CHECK_JUST(of::BlobBufferCopyUtil<void>::From(ofblob_ptr, buffer,
shape.Count(0) * GetDTypeSize(dtype)));
[buffer, shape, dtype](of::ep::Stream* stream,
const std::shared_ptr<of::vm::EagerBlobObject>& eager_blob_object) {
of::AutoMemcpy(stream, eager_blob_object->mut_dptr(), buffer,
shape.Count(0) * GetDTypeSize(dtype), eager_blob_object->mem_case(),
of::memory::MakeHostMemCase());
},
"mut");
}).GetOrThrow();
......@@ -101,14 +102,16 @@ Tensor Tensor::from_buffer(const void* buffer, const Shape& shape, const Device&
template<typename T>
void Tensor::copy_to(T* buffer) const {
std::shared_ptr<of::one::MirroredTensor> local_tensor =
tensor_->AsMirroredTensor().GetPtrOrThrow();
std::shared_ptr<of::one::LocalTensor> local_tensor = tensor_->AsLocalTensor().GetPtrOrThrow();
const auto shape = this->shape();
const auto& Callback = [buffer, shape](uint64_t ofblob_ptr) {
CHECK_JUST(of::BlobBufferCopyUtil<T>::To(ofblob_ptr, buffer, shape.Count(0)));
const auto& Callback = [buffer, shape](
of::ep::Stream* stream,
const std::shared_ptr<of::vm::EagerBlobObject>& eager_blob_object) {
of::AutoMemcpy(stream, buffer, eager_blob_object->mut_dptr(), shape.Count(0) * sizeof(T),
of::memory::MakeHostMemCase(), eager_blob_object->mem_case());
};
auto btb = std::make_shared<of::BlockingThenBusy>(1);
auto btb = std::make_shared<of::BlockingThenBusy>();
CHECK_JUST(of::PhysicalRun([&](of::InstructionsBuilder* builder) -> of::Maybe<void> {
return builder->SyncAccessBlobByCallback(local_tensor, btb, Callback, "const");
}));
......
......@@ -30,13 +30,8 @@ limitations under the License.
namespace oneflow_api {
namespace {
std::mt19937 rng(std::random_device{}());
}
Shape RandomShape() {
thread_local static std::mt19937 rng(std::random_device{}());
std::uniform_int_distribution<> dist_ndim(1, 4), dist_dims(16, 64);
std::vector<std::int64_t> dims(dist_ndim(rng), 0);
for (auto& x : dims) { x = dist_dims(rng); }
......@@ -45,6 +40,7 @@ Shape RandomShape() {
template<typename T>
std::vector<T> RandomData(size_t size) {
thread_local static std::mt19937 rng(std::random_device{}());
std::uniform_int_distribution<> dist(-100, 100);
std::vector<T> data(size);
for (auto& x : data) { x = static_cast<T>(dist(rng)); }
......
......@@ -64,27 +64,7 @@ TEST(Api, graph_cpu_test) {
Forward(graph, device, 1);
}
#ifdef WITH_CUDA
TEST(Api, graph_gpu_test) {
EnvScope scope;
Device device("cuda", 0);
Graph graph = LoadGraph(device);
Forward(graph, device);
}
TEST(Api, graph_multi_gpu_test) {
EnvScope scope;
Device device("cuda", 0);
Graph graph = LoadGraph(device);
Forward(graph, device);
Device device1("cuda", 1);
Graph graph1 = LoadGraph(device1);
Forward(graph1, device1);
}
#endif
#ifdef WITH_ROCM
#if defined(WITH_CUDA) || defined(WITH_ROCM)
TEST(Api, graph_gpu_test) {
EnvScope scope;
Device device("cuda", 0);
......@@ -112,7 +92,7 @@ TEST(Api, graph_cpu_batching_test) {
Forward(graph, device, 10);
}
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_ROCM)
TEST(Api, graph_gpu_batching_test) {
EnvScope scope;
Device device("cuda", 0);
......
module {
oneflow.job @MyGraph_0(%arg0: tensor<1x3xf32>) -> tensor<1x4xf32> {
%output = "oneflow.input"(%arg0) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_0-input_0", output_lbns = ["_MyGraph_0-input_0/out"], scope_symbol_id = 4611686018427469823 : i64, shape = [1 : si64, 3 : si64]} : (tensor<1x3xf32>) -> tensor<1x3xf32>
%output_0 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], nd_sbp = ["B"], op_name = "model.a", output_lbns = ["model.a/out"], scope_symbol_id = 4611686018427482111 : i64, shape = [3 : si64, 4 : si64]} : () -> tensor<3x4xf32>
%output_1 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], nd_sbp = ["B"], op_name = "model.b", output_lbns = ["model.b/out"], scope_symbol_id = 4611686018427494399 : i64, shape = [4 : si64]} : () -> tensor<4xf32>
%output_0 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], parallel = #sbp.parallel<[] -> [#sbp.B]>, op_name = "model.a", output_lbns = ["model.a/out"], scope_symbol_id = 4611686018427482111 : i64, shape = [3 : si64, 4 : si64]} : () -> tensor<3x4xf32>
%output_1 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], parallel = #sbp.parallel<[] -> [#sbp.B]>, op_name = "model.b", output_lbns = ["model.b/out"], scope_symbol_id = 4611686018427494399 : i64, shape = [4 : si64]} : () -> tensor<4xf32>
%0 = "oneflow.matmul"(%output, %output_0) {alpha = 1.000000e+00 : f64, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-matmul_0", output_lbns = ["model-matmul_0/out_0"], scope_symbol_id = 4611686018427486207 : i64, transpose_a = false, transpose_b = false} : (tensor<1x3xf32>, tensor<3x4xf32>) -> tensor<1x4xf32>
%1 = "oneflow.broadcast_add"(%0, %output_1) {device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-broadcast_add_1", output_lbns = ["model-broadcast_add_1/z_0"], scope_symbol_id = 4611686018427486207 : i64} : (tensor<1x4xf32>, tensor<4xf32>) -> tensor<1x4xf32>
%output_2 = "oneflow.output"(%1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_0-output_0", output_lbns = ["_MyGraph_0-output_0/out"], scope_symbol_id = 4611686018427469823 : i64, shape = [1 : si64, 4 : si64]} : (tensor<1x4xf32>) -> tensor<1x4xf32>
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <gtest/gtest.h>
#include "oneflow/api/cpp/tests/api_test.h"
namespace oneflow_api {
#if defined(WITH_CUDA) || defined(WITH_ROCM)
TEST(Api, embedding_test) {
EnvScope scope;
Device device("cuda");
Graph graph = Graph::Load("/path/to/embedding", device);
int64_t batch_size = 10000;
int64_t num_features = 39;
std::vector<int64_t> data(batch_size * num_features);
std::fill(data.begin(), data.end(), 1);
std::vector<Tensor> inputs;
inputs.emplace_back(
Tensor::from_buffer(data.data(), Shape({batch_size, num_features}), device, DType::kInt64));
const auto& value = graph.Forward(inputs);
ASSERT_TRUE(value.IsTensor());
Tensor output = value.ToTensor();
Shape shape = output.shape();
ASSERT_EQ(shape.At(0), batch_size);
ASSERT_EQ(shape.At(1), 1);
std::vector<float> buf(batch_size);
output.copy_to(buf.data());
}
#endif
} // namespace oneflow_api
......@@ -25,16 +25,7 @@ TEST(Api, device) {
auto device = Device("cpu");
ASSERT_EQ(device.type(), "cpu");
#ifdef WITH_CUDA
device = Device("cuda:0");
ASSERT_EQ(device.type(), "cuda");
ASSERT_EQ(device.device_id(), 0);
device = Device("cuda", 1);
ASSERT_EQ(device.type(), "cuda");
ASSERT_EQ(device.device_id(), 1);
#endif
#ifdef WITH_ROCM
#if defined(WITH_CUDA) || defined(WITH_ROCM)
device = Device("cuda:0");
ASSERT_EQ(device.type(), "cuda");
ASSERT_EQ(device.device_id(), 0);
......
......@@ -16,9 +16,13 @@ limitations under the License.
#include <pybind11/pybind11.h>
#include <memory>
#include <utility>
#include <vector>
#include "oneflow/api/python/of_api_registry.h"
#include "oneflow/api/python/job_build/job_build_and_infer.h"
#include "oneflow/core/common/throw.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/framework/scope_util.h"
#include "oneflow/core/framework/tensor.h"
#include "oneflow/core/framework/tensor_tuple.h"
#include "oneflow/core/autograd/autograd_engine.h"
......@@ -26,6 +30,7 @@ limitations under the License.
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/common/container_util.h"
#include "oneflow/core/framework/saved_tensor_hooks.h"
namespace oneflow {
namespace autograd {
......@@ -50,7 +55,8 @@ Maybe<one::TensorTuple> CheckAndInitOutGrads(const one::TensorTuple& outputs,
<< " gradients";
for (int i = 0; i < outputs.size(); ++i) {
CHECK_OR_RETURN(outputs.at(i)->requires_grad())
<< "All output tensors `.requires_grad` should be true";
<< "\nRuntimeError: element " << i
<< " of tensors does not require grad and does not have a grad_fn";
if (!outputs.at(i)->grad_fn_node()) {
CHECK_OR_RETURN(outputs.at(i)->is_leaf())
<< "output[" << i << "] doesn't have grad_fn and it is not leaf tensor!\n"
......@@ -66,7 +72,6 @@ Maybe<one::TensorTuple> CheckAndInitOutGrads(const one::TensorTuple& outputs,
CHECK_OR_RETURN(*(outputs.at(i)->shape()) == *(out_grads.at(i)->shape()))
<< "out_grad's shape must be same as output's (" << outputs.at(i)->shape()->ToString()
<< " vs " << out_grads.at(i)->shape()->ToString() << ")";
// if (outputs.at(i)->dtype() != out_grads.at(i)->dtype()) {
if (JUST(oneflow::VectorAt(outputs, i))->dtype()
!= JUST(oneflow::VectorAt(out_grads, i))->dtype()) {
JUST(oneflow::VectorAt(*gradients, i)) =
......@@ -76,6 +81,7 @@ Maybe<one::TensorTuple> CheckAndInitOutGrads(const one::TensorTuple& outputs,
}
}
}
if (LazyMode::is_enabled()) { JUST(MarkOutputGradients(outputs, *gradients)); }
return gradients;
}
......@@ -83,6 +89,7 @@ Maybe<one::TensorTuple> CheckAndInitOutGrads(const one::TensorTuple& outputs,
Maybe<one::TensorTuple> Backward(const one::TensorTuple& outputs, const one::TensorTuple& out_grads,
bool retain_graph, bool create_graph) {
BackwardPassScopeGuard backward_guard;
if (create_graph) { retain_graph = true; }
std::shared_ptr<one::TensorTuple> gradients = JUST(CheckAndInitOutGrads(outputs, out_grads));
JUST(one::GetThreadLocalAutogradEngine()->RunBackwardAndSaveGrads4LeafTensorIf(
......@@ -93,6 +100,7 @@ Maybe<one::TensorTuple> Backward(const one::TensorTuple& outputs, const one::Ten
Maybe<one::TensorTuple> Grad(const one::TensorTuple& outputs, const one::TensorTuple& inputs,
const one::TensorTuple& out_grads, bool retain_graph,
bool create_graph) {
BackwardPassScopeGuard backward_guard;
if (create_graph) { retain_graph = true; }
if (inputs.empty()) { return Backward(outputs, out_grads, retain_graph, create_graph); }
CHECK_OR_RETURN(std::all_of(
......@@ -104,9 +112,80 @@ Maybe<one::TensorTuple> Grad(const one::TensorTuple& outputs, const one::TensorT
outputs, inputs, *gradients, retain_graph, create_graph);
}
namespace py = pybind11;
class PySavedTensorHook final : public one::SavedTensorHook {
public:
PySavedTensorHook(const py::function& pack_hook, const py::function& unpack_hook)
: pack_hook_(pack_hook), unpack_hook_(unpack_hook) {}
void pack(const std::shared_ptr<one::Tensor>& tensor) {
py::gil_scoped_acquire acquire;
py::object packed = pack_hook_(tensor);
data_ = packed.release().ptr();
}
std::shared_ptr<one::Tensor> unpack() {
py::gil_scoped_acquire acquire;
py::object obj = py::cast<py::object>(data_);
py::object x = unpack_hook_(obj);
std::shared_ptr<one::Tensor> tensor;
try {
tensor = py::cast<std::shared_ptr<one::Tensor>>(x);
} catch (const py::cast_error& e) {
THROW(RuntimeError) << "unpack_hook should return a Tensor, but got `"
<< py::str(x.get_type()).cast<std::string>() << "` instead";
}
return tensor;
}
private:
PyObject* data_ = nullptr;
py::function pack_hook_;
py::function unpack_hook_;
};
class PySavedTensorHookCreator final : public one::SavedTensorHookCreator {
public:
std::unique_ptr<one::SavedTensorHook> new_saved_tensor_hook() const override {
if (hooks_.empty()) { return nullptr; }
return std::make_unique<PySavedTensorHook>(hooks_.back().first, hooks_.back().second);
}
void append_new_hooks(const py::function& pack_hook, const py::function& unpack_hook) {
hooks_.emplace_back(pack_hook, unpack_hook);
}
void pop_hooks() {
CHECK_OR_THROW(!hooks_.empty()) << "pop_hooks should not be called when there are no hooks";
hooks_.pop_back();
}
private:
small_vector<std::pair<py::function, py::function>, 1> hooks_;
};
ONEFLOW_API_PYBIND11_MODULE("autograd", m) {
m.def("backward", &Backward);
m.def("grad", &Grad);
m.def_submodule("graph")
.def("register_saved_tensors_hook_manager",
[]() {
Singleton<one::SavedTensorHookCreator>::SetAllocated(new PySavedTensorHookCreator());
})
.def("append_new_hooks",
[](const py::function& pack_hook, const py::function& unpack_hook) {
PySavedTensorHookCreator* creator = dynamic_cast<PySavedTensorHookCreator*>(
Singleton<one::SavedTensorHookCreator>::Get());
CHECK_NOTNULL_OR_THROW(creator)
<< "`register_saved_tensors_hook_manager` should be called "
"before calling `append_new_hooks`";
creator->append_new_hooks(pack_hook, unpack_hook);
})
.def("pop_hooks", []() {
PySavedTensorHookCreator* creator =
dynamic_cast<PySavedTensorHookCreator*>(Singleton<one::SavedTensorHookCreator>::Get());
CHECK_NOTNULL_OR_THROW(creator) << "`register_saved_tensors_hook_manager` should be called "
"before calling `pop_hooks`";
creator->pop_hooks();
});
}
} // namespace autograd
......
......@@ -38,17 +38,22 @@ Maybe<one::TensorTuple> UnpackTensorTuple(const py::object& input) {
tp.emplace_back(input.cast<std::shared_ptr<one::Tensor>>());
} else if (py::isinstance<py::tuple>(input)) {
auto tuple = input.cast<py::tuple>();
tp.resize(tuple.size());
for (int i = 0; i < tuple.size(); ++i) {
PyObject* obj = tuple[i].ptr();
if (!one::PyTensor_Check(obj)) {
if (obj == Py_None) {
// do nothing
} else if (one::PyTensor_Check(obj)) {
tp[i] = one::PyTensor_Unpack(obj);
} else {
return Error::RuntimeError()
<< "expected Tensor as element " << i << ", but got "
<< "expected Tensor or None as element " << i << ", but got "
<< one::functional::PyStringAsString(PyObject_Str((PyObject*)Py_TYPE(obj)));
}
tp.emplace_back(one::PyTensor_Unpack(obj));
}
} else {
return Error::RuntimeError() << "Only support tensor or list of tensors";
return Error::RuntimeError()
<< "autograd.Function's output only support tensor or list of tensors";
}
return tp;
}
......@@ -90,22 +95,6 @@ ONEFLOW_API_PYBIND11_MODULE("autograd", m) {
*input_tensor_tuple));
return PackTensorTuple(*res);
});
py::class_<FunctionAutoGradCaptureState, std::shared_ptr<FunctionAutoGradCaptureState>>(
m, "FunctionAutoGradCaptureState")
.def(py::init([]() { return std::make_shared<FunctionAutoGradCaptureState>(); }))
.def("save_for_backward",
[](FunctionAutoGradCaptureState& ctx, const py::args& input) {
const auto& tensors = UnpackTensorTuple(input).GetOrThrow();
for (const auto& tensor : tensors) { ctx.SaveTensorForBackward(tensor); }
})
.def_property_readonly(
"saved_tensors",
[](const FunctionAutoGradCaptureState& ctx) { return py::cast(ctx.SavedTensors()); })
.def("mark_non_differentiable", [](FunctionAutoGradCaptureState& ctx, const py::args& input) {
const auto& tensors = UnpackTensorTuple(input).GetOrThrow();
for (const auto& tensor : tensors) { ctx.MarkNonDifferentiable(tensor); }
});
}
} // namespace one
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/api/python/autograd/autograd_function_state.h"
#include <pybind11/pybind11.h>
#include "oneflow/api/python/exception/exception.h"
#include "oneflow/api/python/functional/common.h"
#include "oneflow/api/python/of_api_registry.h"
namespace py = pybind11;
namespace oneflow {
namespace one {
namespace {
inline FunctionAutoGradCaptureState* CheckAndGetStateData(PyAutogradFunctionState* state) {
if (!state->data.lock()) {
PyErr_Format(PyExc_RuntimeError, "Data is deallocated. Please don't hold context outside "
"autograd.Function.forward or autograd.Function.backward");
return nullptr;
}
return state->data.lock().get();
}
} // namespace
#if PY_VERSION_HEX < 0x03070000
#define PYGETSET_NAME(name) const_cast<char*>(name)
#else
#define PYGETSET_NAME(name) (name)
#endif
#define PY_XINCREF(p) (({ Py_XINCREF(p); }), (p))
static PyObject* PyAutogradFunctionState_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
PyAutogradFunctionState* self = (PyAutogradFunctionState*)type->tp_alloc(type, 0);
if (self != NULL) {
self->dynamic_attr_dict = PyDict_New();
if (self->dynamic_attr_dict == NULL) {
Py_DECREF(self);
return NULL;
}
}
return (PyObject*)self;
}
static void PyAutogradFunctionState_dealloc(PyAutogradFunctionState* self) {
Py_XDECREF(self->dynamic_attr_dict);
Py_TYPE(self)->tp_free((PyObject*)self);
}
// PyMethodDef start
static PyObject* PyAutogradFunctionState_save_for_backward(PyObject* self, PyObject* args) {
HANDLE_ERRORS
auto* _self = (PyAutogradFunctionState*)self;
if (!functional::PyTensorSequenceCheck(args)) {
return PyErr_Format(PyExc_TypeError, "save_for_backward() only support Tensor or Tensors");
}
const std::vector<std::shared_ptr<Tensor>>& tensor_list =
functional::PyUnpackTensorSequence(args);
for (const auto& tensor : tensor_list) {
CheckAndGetStateData(_self)->SaveTensorForBackward(tensor);
}
Py_RETURN_NONE;
END_HANDLE_ERRORS
}
static PyObject* PyAutogradFunctionState_mark_non_differentiable(PyObject* self, PyObject* args) {
HANDLE_ERRORS
auto* _self = (PyAutogradFunctionState*)self;
if (!functional::PyTensorSequenceCheck(args)) {
return PyErr_Format(PyExc_TypeError, "save_for_backward() only support Tensor or Tensors");
}
const std::vector<std::shared_ptr<Tensor>>& tensor_list =
functional::PyUnpackTensorSequence(args);
for (const auto& tensor : tensor_list) {
CheckAndGetStateData(_self)->MarkNonDifferentiable(tensor);
}
Py_RETURN_NONE;
END_HANDLE_ERRORS
}
static PyObject* PyAutogradFunctionState_is_data_valid(PyObject* self) {
auto* _self = (PyAutogradFunctionState*)self;
return functional::CastToPyObject(_self->data.lock() != nullptr);
}
static PyMethodDef PyAutogradFunctionState_methods[] = {
{"save_for_backward", (PyCFunction)PyAutogradFunctionState_save_for_backward, METH_VARARGS,
NULL},
{"mark_non_differentiable", (PyCFunction)PyAutogradFunctionState_mark_non_differentiable,
METH_VARARGS, NULL},
{"_is_data_valid", (PyCFunction)PyAutogradFunctionState_is_data_valid, METH_NOARGS, NULL},
{NULL} /* Sentinel */
};
// PyMethodDef end
// PyAutogradFunctionState_getset start
static PyObject* PyAutogradFunctionState_saved_tensors(PyObject* self, void*) {
auto* _self = (PyAutogradFunctionState*)self;
return functional::CastToPyObject<Maybe<TensorTuple>>(
CheckAndGetStateData(_self)->SavedTensors());
}
static PyObject* PyAutogradFunctionState_get_dict(PyObject* self, PyObject* args) {
HANDLE_ERRORS
auto* _self = (PyAutogradFunctionState*)self;
return _self->dynamic_attr_dict;
Py_RETURN_NONE;
END_HANDLE_ERRORS
}
static PyGetSetDef PyAutogradFunctionState_properties[] = {
{PYGETSET_NAME("saved_tensors"), (getter)PyAutogradFunctionState_saved_tensors, NULL, NULL,
NULL},
{PYGETSET_NAME("__dict__"), (getter)PyAutogradFunctionState_get_dict, NULL, NULL, NULL},
{NULL} /* Sentinel */
};
// PyAutogradFunctionState_getset end
PyObject* PyAutogradFunctionState_getattro(PyObject* self, PyObject* attr) {
PyObject* res = NULL;
res = PyDict_GetItem(((PyAutogradFunctionState*)self)->dynamic_attr_dict, attr);
if (!res) {
// Not found attr in dynamic_attr_dict, try to find it in tp_dict
res = PyObject_GenericGetAttr(self, attr);
if (!res) {
return PyErr_Format(PyExc_AttributeError, "attribute %s not found", PyUnicode_AsUTF8(attr));
}
}
return res;
}
int PyAutogradFunctionState_setattro(PyObject* self, PyObject* attr, PyObject* value) {
auto* _self = (PyAutogradFunctionState*)self;
return PyDict_SetItem(_self->dynamic_attr_dict, attr, value);
}
PyTypeObject PyAutogradFunctionState_Type = {
PyVarObject_HEAD_INIT(NULL, 0) "oneflow.autograd.Function.FunctionCtx", /* tp_name */
sizeof(PyAutogradFunctionState), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)PyAutogradFunctionState_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_reserved */
NULL, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
PyAutogradFunctionState_getattro, /* tp_getattro */
PyAutogradFunctionState_setattro, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
NULL, /* tp_traverse */
NULL, /* tp_clear */
NULL, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
PyAutogradFunctionState_methods, /* tp_methods */
NULL, /* tp_members */
PyAutogradFunctionState_properties, /* tp_getset */
0, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
offsetof(PyAutogradFunctionState, dynamic_attr_dict), /* tp_dictoffset */
NULL, /* tp_init */
NULL, /* tp_alloc */
PyAutogradFunctionState_new, /* tp_new */
NULL, /* tp_free */
};
PyObject* PyAutogradFunctionState_NewFromPtr(
const std::shared_ptr<FunctionAutoGradCaptureState>& data) {
if (!data) { Py_RETURN_NONE; }
if (data->pyobject()) { return PY_XINCREF((PyObject*)data->pyobject()); }
auto* self = (PyAutogradFunctionState*)(PyObject_CallObject(
(PyObject*)&PyAutogradFunctionState_Type, NULL));
if (self) {
PY_XINCREF(self);
self->data = data;
CheckAndGetStateData(self)->set_pyobject_ptr(
std::unique_ptr<void, void (*)(void*)>(self, [](void* ptr) { Py_DECREF((PyObject*)ptr); }));
}
return (PyObject*)self;
}
ONEFLOW_API_PYBIND11_MODULE("autograd.Function", m) {
if (PyType_Ready(&PyAutogradFunctionState_Type) < 0) { return; }
Py_INCREF(&PyAutogradFunctionState_Type);
if (PyModule_AddObject(m.ptr(), "FunctionCtx", (PyObject*)&PyAutogradFunctionState_Type) < 0) {
return;
}
}
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_PYTHON_AUTOGRAD_AUTOGRAD_FUNCTION_STATE_H_
#define ONEFLOW_API_PYTHON_AUTOGRAD_AUTOGRAD_FUNCTION_STATE_H_
#include <Python.h>
#include <pybind11/pybind11.h>
#include "oneflow/core/framework/op_expr_grad_function.h"
namespace oneflow {
namespace one {
typedef struct {
PyObject_HEAD;
PyObject* dynamic_attr_dict;
std::weak_ptr<FunctionAutoGradCaptureState> data;
} PyAutogradFunctionState;
extern PyTypeObject PyAutogradFunctionState_Type;
inline bool PyAutogradFunctionState_Check(PyObject* state) {
return PyObject_TypeCheck(state, &PyAutogradFunctionState_Type);
}
PyObject* PyAutogradFunctionState_NewFromPtr(
const std::shared_ptr<FunctionAutoGradCaptureState>& data);
} // namespace one
} // namespace oneflow
#endif // ONEFLOW_API_PYTHON_AUTOGRAD_AUTOGRAD_FUNCTION_STATE_H_
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_PYTHON_CASTER_AUTOGRAD_FUNCTION_STATE_H_
#define ONEFLOW_API_PYTHON_CASTER_AUTOGRAD_FUNCTION_STATE_H_
#include <pybind11/pybind11.h>
#include "oneflow/api/python/caster/common.h"
#include "oneflow/api/python/autograd/autograd_function_state.h"
namespace py = pybind11;
namespace pybind11 {
namespace detail {
template<typename T>
struct autograd_function_state_type_caster {
public:
bool load(handle src, bool convert) {
using namespace oneflow::one;
value_ = nullptr;
if (!src) { return false; }
if (src.is_none()) { return true; }
if (!PyAutogradFunctionState_Check(src.ptr())) { return false; }
value_ = ((PyAutogradFunctionState*)src.ptr())->data;
return true;
}
template<typename U>
static handle cast(U&& src, return_value_policy policy, handle parent) {
using namespace oneflow::one;
return reinterpret_steal<object>(
PyAutogradFunctionState_NewFromPtr(
std::const_pointer_cast<FunctionAutoGradCaptureState>(src)))
.release();
}
operator std::shared_ptr<T>*() { return &value_; }
operator std::shared_ptr<T>&() { return value_; }
operator std::shared_ptr<T>&&() && { return std::move(value_); }
static constexpr auto name = _("autograd_function_state");
protected:
std::shared_ptr<T> value_;
};
template<>
struct type_caster<std::shared_ptr<oneflow::one::FunctionAutoGradCaptureState>>
: public autograd_function_state_type_caster<oneflow::one::FunctionAutoGradCaptureState> {};
template<>
struct type_caster<std::shared_ptr<const oneflow::one::FunctionAutoGradCaptureState>>
: public autograd_function_state_type_caster<const oneflow::one::FunctionAutoGradCaptureState> {
};
} // namespace detail
} // namespace pybind11
#endif // ONEFLOW_API_PYTHON_CASTER_AUTOGRAD_FUNCTION_STATE_H_
......@@ -13,8 +13,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <type_traits>
#ifndef ONEFLOW_API_PYTHON_CASTER_COMMON_H_
#define ONEFLOW_API_PYTHON_CASTER_COMMON_H_
#include <type_traits>
#include <pybind11/pybind11.h>
namespace pybind11 {
......@@ -49,3 +51,5 @@ using IsSupportedByPybind11WhenInsideSharedPtr =
} // namespace detail
} // namespace pybind11
#endif // ONEFLOW_API_PYTHON_CASTER_COMMON_H_
......@@ -13,6 +13,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_PYTHON_CASTER_MAYBE_H_
#define ONEFLOW_API_PYTHON_CASTER_MAYBE_H_
#include <pybind11/pybind11.h>
#include "oneflow/api/python/caster/common.h"
......@@ -84,7 +86,7 @@ template<>
struct maybe_caster<Maybe<void>> {
template<typename T>
static handle cast(T&& src, return_value_policy policy, handle parent) {
if (!src.IsOk()) { oneflow::ThrowError(src.error()); }
if (!src.IsOk()) { oneflow::ThrowError(src.stacked_error()); }
return none().inc_ref();
}
......@@ -104,3 +106,5 @@ struct type_caster<Maybe<T>> : public maybe_caster<Maybe<T>> {};
} // namespace detail
} // namespace pybind11
#endif // ONEFLOW_API_PYTHON_CASTER_MAYBE_H_
......@@ -13,6 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_API_PYTHON_CASTER_OPTIONAL_H_
#define ONEFLOW_API_PYTHON_CASTER_OPTIONAL_H_
#include <pybind11/pybind11.h>
#include "oneflow/api/python/caster/common.h"
......@@ -109,3 +112,5 @@ struct type_caster<Optional<T>> : public oneflow_optional_caster<Optional<T>> {}
} // namespace detail
} // namespace pybind11
#endif // ONEFLOW_API_PYTHON_CASTER_OPTIONAL_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment