Commit 21d47d0e authored by yuguo's avatar yuguo
Browse files

Oneflow 0.8 for DCU

parents
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
namespace oneflow {
namespace one {
struct IdentityCaptureState : public AutoGradCaptureState {
bool requires_grad;
};
class Identity : public OpExprGradFunction<IdentityCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(IdentityCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg)
ctx->requires_grad = inputs.at(0)->requires_grad();
return Maybe<void>::Ok();
}
Maybe<void> Apply(const IdentityCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(1);
if (ctx->requires_grad) { in_grads->at(0) = out_grads.at(0); }
return Maybe<void>::Ok();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION("identity", Identity);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct KLDivLossCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
bool log_target = false;
};
class KLDivLoss : public OpExprGradFunction<KLDivLossCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(KLDivLossCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const KLDivLossCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> KLDivLoss::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> KLDivLoss::Capture(KLDivLossCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->log_target = JUST(composed_attrs.GetAttr<bool>("log_target"));
ctx->SaveTensorForBackward(inputs.at(0)); // input
ctx->SaveTensorForBackward(inputs.at(1)); // target
return Maybe<void>::Ok();
}
Maybe<void> KLDivLoss::Apply(const KLDivLossCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
const auto& dy = out_grads.at(0);
const auto& input = ctx->SavedTensors().at(0);
const auto& target = ctx->SavedTensors().at(1);
in_grads->resize(ctx->SavedTensors().size());
in_grads->at(0) = JUST(functional::KLDivLossGrad(dy, input, target, ctx->log_target));
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("kl_div_loss", KLDivLoss);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct L2NormalizeCaptureState : public AutoGradCaptureState {
int64_t axis;
float epsilon;
bool requires_grad;
};
class L2Normalize : public OpExprGradFunction<L2NormalizeCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(L2NormalizeCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const L2NormalizeCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> L2Normalize::Init(const OpExpr& op) {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> L2Normalize::Capture(L2NormalizeCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ctx->SaveTensorForBackward(outputs.at(0)); // y
ctx->SaveTensorForBackward(outputs.at(1)); // square_x_sum
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->axis = JUST(composed_attrs.GetAttr<int32_t>("axis"));
ctx->epsilon = JUST(composed_attrs.GetAttr<float>("epsilon"));
return Maybe<void>::Ok();
}
Maybe<void> L2Normalize::Apply(const L2NormalizeCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
in_grads->resize(1);
CHECK_EQ_OR_RETURN(out_grads.size(), 2); // NOLINT(maybe-need-error-msg)
const auto& y = ctx->SavedTensors().at(0);
const auto& square_x_sum = ctx->SavedTensors().at(1);
in_grads->at(0) =
JUST(functional::L2NormalizeGrad(out_grads.at(0), y, square_x_sum, ctx->axis, ctx->epsilon));
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("l2_normalize", L2Normalize);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct LayerNormCaptureState : public AutoGradCaptureState {
bool center = true;
bool scale = true;
int64_t begin_norm_axis = 1;
int64_t begin_params_axis = 1;
double epsilon = 1e-5;
bool x_requires_grad = true;
bool has_affine = true;
size_t gamma_index = 0;
size_t x_index = 1;
size_t mean_index = 2;
size_t inv_variance_index = 3;
};
// y, mean, inv_variance =
// layer_norm(x, [gamma], [beta], center=False, scale=False, begin_norm_axis=1,
// begin_params_axis=-1, epsilon=1e-5)
class LayerNorm : public OpExprGradFunction<LayerNormCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(LayerNormCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const LayerNormCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
std::string op_name_;
};
Maybe<void> LayerNorm::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
op_name_ = fw_op_expr->op_name();
return Maybe<void>::Ok();
}
Maybe<void> LayerNorm::Capture(LayerNormCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->center = JUST(composed_attrs.GetAttr<bool>("center"));
ctx->scale = JUST(composed_attrs.GetAttr<bool>("scale"));
ctx->begin_norm_axis = JUST(composed_attrs.GetAttr<int64_t>("begin_norm_axis"));
ctx->begin_params_axis = JUST(composed_attrs.GetAttr<int64_t>("begin_params_axis"));
ctx->epsilon = JUST(composed_attrs.GetAttr<double>("epsilon"));
CHECK_EQ_OR_RETURN(inputs.size(), ctx->center + ctx->scale + 1); // NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN(outputs.size(), 3); // NOLINT(maybe-need-error-msg)
bool has_gamma_diff = ctx->scale && inputs.at(1)->requires_grad();
bool has_beta_diff = ctx->center && inputs.at(2)->requires_grad();
ctx->has_affine = has_gamma_diff && has_beta_diff;
ctx->x_requires_grad = inputs.at(0)->requires_grad();
if (ctx->x_requires_grad || ctx->has_affine) {
ctx->x_index = ctx->SaveTensorForBackward(inputs.at(0));
ctx->mean_index = ctx->SaveTensorForBackward(outputs.at(1));
ctx->inv_variance_index = ctx->SaveTensorForBackward(outputs.at(2));
if (ctx->x_requires_grad && ctx->scale) {
ctx->gamma_index = ctx->SaveTensorForBackward(inputs.at(1)); // save gamma.
}
}
return Maybe<void>::Ok();
}
Maybe<void> LayerNorm::Apply(const LayerNormCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
const auto& saved_tensors = ctx->SavedTensors();
in_grads->resize(ctx->center + ctx->scale + 1);
std::shared_ptr<Tensor> dy = out_grads.at(0);
int64_t begin_params_axis = ctx->begin_params_axis;
if (begin_params_axis < 0) { begin_params_axis += dy->shape()->NumAxes(); }
int64_t begin_norm_axis = ctx->begin_norm_axis;
if (begin_norm_axis < 0) { begin_norm_axis += dy->shape()->NumAxes(); }
std::shared_ptr<Tensor> x = saved_tensors.at(ctx->x_index);
std::shared_ptr<Tensor> mean = saved_tensors.at(ctx->mean_index);
std::shared_ptr<Tensor> inv_variance = saved_tensors.at(ctx->inv_variance_index);
if (ctx->has_affine) {
// Use LayerNormParamGrad(Tensor dy, Tensor x, Tensor mean, Tensor inv_variance, Int64
// begin_params_axis, Double epsilon).
const auto& results = JUST(
functional::LayerNormParamGrad(dy, x, mean, inv_variance, begin_params_axis, ctx->epsilon));
in_grads->at(1) = results->at(0); // For gamma.
in_grads->at(2) = results->at(1); // For beta.
}
if (ctx->x_requires_grad) {
if (ctx->scale) {
std::shared_ptr<Tensor> gamma = saved_tensors.at(ctx->gamma_index);
in_grads->at(0) = JUST(functional::LayerNormAffineGrad(dy, x, mean, inv_variance, gamma,
begin_norm_axis, ctx->epsilon));
} else {
in_grads->at(0) =
JUST(functional::LayerNormGrad(dy, x, mean, inv_variance, begin_norm_axis, ctx->epsilon));
}
}
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("layer_norm", LayerNorm);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
namespace oneflow {
namespace one {
struct LogSoftmaxCaptureState : public AutoGradCaptureState {
bool requires_grad;
};
class LogSoftmax : public OpExprGradFunction<LogSoftmaxCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(LogSoftmaxCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const LogSoftmaxCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
std::shared_ptr<OpExpr> grad_op_;
};
Maybe<void> LogSoftmax::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
const std::string& op_name = fw_op_expr->op_name();
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
grad_op_ = JUST(one::OpBuilder("log_softmax_grad", GradientOpName(op_name))
.Input("prob")
.Input("dy")
.Output("dx")
.Build());
return Maybe<void>::Ok();
}
Maybe<void> LogSoftmax::Capture(LogSoftmaxCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ComposedAttrMap composed_attrs(attrs, base_attrs_);
CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg)
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) return Maybe<void>::Ok();
ctx->SaveTensorForBackward(outputs.at(0));
return Maybe<void>::Ok();
}
Maybe<void> LogSoftmax::Apply(const LogSoftmaxCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) return Maybe<void>::Ok();
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
const auto& dy = out_grads.at(0);
const auto& prob = ctx->SavedTensors().at(0);
in_grads->resize(1);
in_grads->at(0) = JUST(OpInterpUtil::Dispatch<Tensor>(*grad_op_, {prob, dy}));
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("log_softmax", LogSoftmax);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct MaskedFillCaptureState : public AutoGradCaptureState {
bool requires_grad = true;
};
class MaskedFill : public OpExprGradFunction<MaskedFillCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(MaskedFillCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ctx->SaveTensorForBackward(inputs.at(0));
ctx->SaveTensorForBackward(inputs.at(1));
return Maybe<void>::Ok();
}
Maybe<void> Apply(const MaskedFillCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
const std::shared_ptr<oneflow::one::Tensor>& x = ctx->SavedTensors().at(0);
const std::shared_ptr<oneflow::one::Tensor>& mask = ctx->SavedTensors().at(1);
std::shared_ptr<oneflow::one::Tensor> zero_out = JUST(functional::ZerosLike(x));
in_grads->resize(2);
in_grads->at(0) = JUST(functional::Where(mask, zero_out, out_grads.at(0)));
return Maybe<void>::Ok();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION("masked_fill", MaskedFill);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/user/ops/math_binary_elementwise_seq.h"
namespace oneflow {
namespace one {
struct BinaryMathCaptureState : public AutoGradCaptureState {
bool x_requires_grad;
bool y_requires_grad;
};
typedef Maybe<one::Tensor> (*BinaryBwFunc)(const std::shared_ptr<one::Tensor>&,
const std::shared_ptr<one::Tensor>&,
const std::shared_ptr<one::Tensor>&);
template<BinaryBwFunc BwXFunc, BinaryBwFunc BwYFunc>
class BinaryMathOp : public OpExprGradFunction<BinaryMathCaptureState> {
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(BinaryMathCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->x_requires_grad = inputs.at(0)->requires_grad();
ctx->y_requires_grad = inputs.at(1)->requires_grad();
ctx->SaveTensorForBackward(inputs.at(0));
ctx->SaveTensorForBackward(inputs.at(1));
return Maybe<void>::Ok();
}
Maybe<void> Apply(const BinaryMathCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (!(ctx->x_requires_grad || ctx->y_requires_grad)) { return Maybe<void>::Ok(); }
in_grads->resize(2);
const std::shared_ptr<one::Tensor>& x = ctx->SavedTensors().at(0);
const std::shared_ptr<one::Tensor>& y = ctx->SavedTensors().at(1);
if (ctx->x_requires_grad) { in_grads->at(0) = JUST(BwXFunc(x, y, out_grads.at(0))); }
if (ctx->y_requires_grad) { in_grads->at(1) = JUST(BwYFunc(x, y, out_grads.at(0))); }
return Maybe<void>::Ok();
}
};
#define INSTANTIAT_AND_REGISTER_BINARY_MATHOP_CLASS(op_type_name, op_cls) \
class op_cls##Cls final \
: public BinaryMathOp<functional::op_cls##XGrad, functional::op_cls##YGrad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, op_cls##Cls);
OF_PP_FOR_EACH_TUPLE(INSTANTIAT_AND_REGISTER_BINARY_MATHOP_CLASS, MATH_BINARY_ELEMENTWISE_FUNC_SEQ);
#undef INSTANTIAT_AND_REGISTER_BINARY_MATHOP_CLASS
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/user/ops/math_unary_elementwise_seq.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct UnaryMathCaptureState : public AutoGradCaptureState {
bool x_requires_grad;
};
typedef Maybe<one::Tensor> (*UnaryBwFunc)(const std::shared_ptr<one::Tensor>&,
const std::shared_ptr<one::Tensor>&);
template<UnaryBwFunc BwFunc>
class UnaryMathOp : public OpExprGradFunction<UnaryMathCaptureState> {
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(UnaryMathCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->x_requires_grad = inputs.at(0)->requires_grad();
ctx->SaveTensorForBackward(inputs.at(0));
return Maybe<void>::Ok();
}
Maybe<void> Apply(const UnaryMathCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (!ctx->x_requires_grad) { return Maybe<void>::Ok(); }
const auto& x = ctx->SavedTensors().at(0);
in_grads->at(0) = JUST(BwFunc(x, out_grads.at(0)));
return Maybe<void>::Ok();
}
protected:
std::shared_ptr<OpExpr> grad_op_;
};
#define INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS(op_type_name, op_cls) \
class op_cls##Cls final : public UnaryMathOp<functional::op_cls##Grad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, op_cls##Cls);
OF_PP_FOR_EACH_TUPLE(INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS, MATH_UNARY_ELEMENTWISE_FUNC_SEQ);
OF_PP_FOR_EACH_TUPLE(INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS,
OF_PP_MAKE_TUPLE_SEQ("tanh", Tanh));
// higher order derivative
OF_PP_FOR_EACH_TUPLE(INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS,
OF_PP_MAKE_TUPLE_SEQ("sin_grad", SinGrad));
OF_PP_FOR_EACH_TUPLE(INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS,
OF_PP_MAKE_TUPLE_SEQ("cos_grad", CosGrad));
#undef INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct MatmulCaptureState : public AutoGradCaptureState {
bool transpose_a;
bool transpose_b;
double alpha;
bool requires_grad_a;
bool requires_grad_b;
size_t a_index;
size_t b_index;
};
class Matmul : public OpExprGradFunction<MatmulCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(MatmulCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const MatmulCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
protected:
AttrMap base_attrs_;
};
Maybe<void> Matmul::Init(const OpExpr& op) {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Matmul::Capture(MatmulCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ctx->requires_grad_a = inputs.at(0)->requires_grad();
ctx->requires_grad_b = inputs.at(1)->requires_grad();
if (!ctx->requires_grad_a && !ctx->requires_grad_b) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->transpose_a = JUST(composed_attrs.GetAttr<bool>("transpose_a"));
ctx->transpose_b = JUST(composed_attrs.GetAttr<bool>("transpose_b"));
ctx->alpha = JUST(composed_attrs.GetAttr<double>("alpha"));
if (ctx->requires_grad_a) {
ctx->b_index = ctx->SaveTensorForBackward(inputs.at(1)); // input b
}
if (ctx->requires_grad_b) {
ctx->a_index = ctx->SaveTensorForBackward(inputs.at(0)); // input a
}
return Maybe<void>::Ok();
}
Maybe<void> Matmul::Apply(const MatmulCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad_a && !ctx->requires_grad_b) { return Maybe<void>::Ok(); }
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(2);
if (ctx->requires_grad_a) {
const auto& input_b = ctx->SavedTensors().at(ctx->b_index);
if (ctx->transpose_a) {
in_grads->at(0) =
JUST(functional::MatMul(input_b, out_grads.at(0), ctx->transpose_b, true, ctx->alpha));
} else {
in_grads->at(0) = JUST(
functional::MatMul(out_grads.at(0), input_b, false, !(ctx->transpose_b), ctx->alpha));
}
}
if (ctx->requires_grad_b) {
const auto& input_a = ctx->SavedTensors().at(ctx->a_index);
if (ctx->transpose_b) {
in_grads->at(1) =
JUST(functional::MatMul(out_grads.at(0), input_a, true, ctx->transpose_a, ctx->alpha));
} else {
in_grads->at(1) = JUST(
functional::MatMul(input_a, out_grads.at(0), !(ctx->transpose_a), false, ctx->alpha));
}
}
return Maybe<void>::Ok();
}
class BroadcastMatmul : public Matmul {
public:
Maybe<void> Apply(const MatmulCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
};
Maybe<void> BroadcastMatmul::Apply(const MatmulCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad_a && !ctx->requires_grad_b) { return Maybe<void>::Ok(); }
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(2);
if (ctx->requires_grad_a) {
const auto& input_b = ctx->SavedTensors().at(ctx->b_index);
if (ctx->transpose_a) {
in_grads->at(0) =
JUST(functional::MatMul(input_b, out_grads.at(0), ctx->transpose_b, true, ctx->alpha));
} else {
in_grads->at(0) = JUST(
functional::MatMul(out_grads.at(0), input_b, false, !(ctx->transpose_b), ctx->alpha));
}
}
if (ctx->requires_grad_b) {
const auto& input_a = ctx->SavedTensors().at(ctx->a_index);
if (ctx->transpose_b) {
in_grads->at(1) =
JUST(functional::BroadcastMatmulGradB(out_grads.at(0), input_a, ctx->alpha));
} else {
in_grads->at(1) =
JUST(functional::BroadcastMatmulGradB(input_a, out_grads.at(0), ctx->alpha));
}
}
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("matmul", Matmul);
REGISTER_OP_EXPR_GRAD_FUNCTION("batch_matmul", Matmul);
REGISTER_OP_EXPR_GRAD_FUNCTION("broadcast_matmul", BroadcastMatmul);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
namespace {
struct MaxPoolCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
size_t input_index = 0;
size_t indice_index = 0;
std::string data_format;
std::vector<int32_t> padding;
std::vector<int32_t> kernel_size;
std::vector<int32_t> stride;
std::vector<int32_t> dilation;
bool return_indices = false;
bool ceil_mode = false;
};
class MaxPoolNdGrad : public OpExprGradFunction<MaxPoolCaptureState> {
public:
virtual ~MaxPoolNdGrad() = default;
using OpExprGradFunction<MaxPoolCaptureState>::Init;
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(MaxPoolCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const MaxPoolCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> MaxPoolNdGrad::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> MaxPoolNdGrad::Capture(MaxPoolCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ctx->input_index = ctx->SaveTensorForBackward(inputs.at(0));
ctx->indice_index = ctx->SaveTensorForBackward(outputs.at(1));
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->data_format = JUST(composed_attrs.GetAttr<std::string>("data_format"));
ctx->padding = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("padding"));
ctx->kernel_size = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("kernel_size"));
ctx->stride = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("stride"));
ctx->dilation = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("dilation"));
ctx->return_indices = JUST(composed_attrs.GetAttr<bool>("return_indices"));
ctx->ceil_mode = JUST(composed_attrs.GetAttr<bool>("ceil_mode"));
return Maybe<void>::Ok();
}
Maybe<void> MaxPoolNdGrad::Apply(const MaxPoolCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
CHECK_LE_OR_RETURN(out_grads.size(), 2); // NOLINT(maybe-need-error-msg)
int32_t ndims = ctx->kernel_size.size();
const auto& input = ctx->SavedTensors().at(ctx->input_index);
const auto& indice = ctx->SavedTensors().at(ctx->indice_index);
in_grads->resize(1);
(*in_grads)[0] = JUST(functional::MaxPoolNdGrad(
input, indice, out_grads[0], ndims, ctx->data_format, ctx->padding, ctx->kernel_size,
ctx->stride, ctx->dilation, ctx->return_indices, ctx->ceil_mode));
return Maybe<void>::Ok();
}
} // namespace
REGISTER_OP_EXPR_GRAD_FUNCTION("max_pool_1d", MaxPoolNdGrad);
REGISTER_OP_EXPR_GRAD_FUNCTION("max_pool_2d", MaxPoolNdGrad);
REGISTER_OP_EXPR_GRAD_FUNCTION("max_pool_3d", MaxPoolNdGrad);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/functional/sequence_function.h"
#include "oneflow/core/common/container_util.h"
namespace oneflow {
namespace one {
struct MedianCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
};
class Median : public OpExprGradFunction<MedianCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(MedianCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->requires_grad = JUST(VectorAt(inputs, 0))->requires_grad();
if (ctx->requires_grad) {
ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 0)));
ctx->SaveTensorForBackward(JUST(VectorAt(outputs, 0)));
}
return Maybe<void>::Ok();
}
Maybe<void> Apply(const MedianCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (ctx->requires_grad) {
const auto& input = JUST(VectorAt(ctx->SavedTensors(), 0));
const auto& output = JUST(VectorAt(ctx->SavedTensors(), 1));
const auto& dy = JUST(VectorAt(out_grads, 0));
std::vector<int32_t> axis(input->ndim());
std::iota(axis.begin(), axis.end(), 0);
const auto cast_like =
JUST(functional::SequenceFunction<Maybe<Tensor>()>(
[&]() { return functional::BroadcastLike(output, input, axis); })
.then(std::bind(functional::BroadcastEqual, input, std::placeholders::_1))
.then(std::bind(functional::CastLike, std::placeholders::_1, input))
.call());
const auto bcast_like_div =
JUST(functional::SequenceFunction<Maybe<Tensor>()>(
[&]() { return functional::ReduceSum(cast_like, axis, false); })
.then(std::bind(functional::Div, dy, std::placeholders::_1))
.then(std::bind(functional::BroadcastLike, std::placeholders::_1, input, axis))
.call());
in_grads->resize(1);
JUST(VectorAt(*in_grads, 0)) = JUST(functional::Mul(bcast_like_div, cast_like));
}
return Maybe<void>::Ok();
}
};
struct MedianWithIndicesCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
};
class MedianWithIndices : public OpExprGradFunction<MedianWithIndicesCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(MedianWithIndicesCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->requires_grad = JUST(VectorAt(inputs, 0))->requires_grad();
if (ctx->requires_grad) {
ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 0)));
ctx->SaveTensorForBackward(JUST(VectorAt(outputs, 1)));
}
return Maybe<void>::Ok();
}
Maybe<void> Apply(const MedianWithIndicesCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (ctx->requires_grad) {
in_grads->resize(1);
const auto& input = JUST(VectorAt(ctx->SavedTensors(), 0));
const auto& indices = JUST(functional::Unsqueeze(JUST(VectorAt(ctx->SavedTensors(), 1)), -1));
const auto& dout = JUST(functional::Unsqueeze(JUST(VectorAt(out_grads, 0)), -1));
JUST(VectorAt(*in_grads, 0)) = JUST(
functional::DimScatter(JUST(functional::Constant(*(input->shape()), Scalar(0),
*dout->dtype(), JUST(dout->device()))),
-1, indices, dout));
}
return Maybe<void>::Ok();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION("median", Median);
REGISTER_OP_EXPR_GRAD_FUNCTION("median_with_indices", MedianWithIndices);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/job/lazy_mode.h"
#include "oneflow/core/framework/nd_sbp.h"
namespace oneflow {
namespace one {
struct NarrowCaptureState : public AutoGradCaptureState {
bool requires_grad;
Shape shape;
int64_t dim;
int64_t start;
int64_t length;
};
class Narrow : public OpExprGradFunction<NarrowCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Capture(NarrowCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN(outputs.size(), 1); // NOLINT(maybe-need-error-msg)
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->dim = JUST(composed_attrs.GetAttr<int64_t>("dim"));
ctx->start = JUST(composed_attrs.GetAttr<int64_t>("start"));
ctx->length = JUST(composed_attrs.GetAttr<int64_t>("length"));
if (LazyMode::is_enabled()) {
ctx->SaveTensorForBackward(inputs.at(0));
} else {
ctx->shape = *(inputs.at(0)->shape());
}
return Maybe<void>::Ok();
}
Maybe<void> Apply(const NarrowCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
const auto& dy = out_grads.at(0);
if (ctx->requires_grad) {
std::shared_ptr<Tensor> like;
if (LazyMode::is_enabled()) {
like = ctx->SavedTensors().at(0);
} else if (dy->is_local()) {
like = JUST(
functional::Empty(ctx->shape, dy->dtype(), JUST(dy->device()), /*pin_memory=*/false));
} else {
like = JUST(
functional::ConsistentEmpty(ctx->shape, dy->dtype(), JUST(dy->parallel_desc()),
*JUST(private_details::RawGetSbpList(JUST(dy->nd_sbp())))));
}
in_grads->resize(1);
in_grads->at(0) = JUST(functional::NarrowGrad(dy, like, ctx->dim, ctx->start, ctx->length));
}
return Maybe<void>::Ok();
}
private:
AttrMap base_attrs_;
};
REGISTER_OP_EXPR_GRAD_FUNCTION("narrow", Narrow);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace oneflow {
namespace one {
struct NLLCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
int64_t ignore_index = -100;
};
class NLLGradFunction : public OpExprGradFunction<NLLCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(NLLCaptureState* ctx, const TensorTuple& inputs, const TensorTuple& outputs,
const AttrMap& attrs) const override;
Maybe<void> Apply(const NLLCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> NLLGradFunction::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> NLLGradFunction::Capture(NLLCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
auto input = JUST(VectorAt(inputs, 0));
ctx->requires_grad = input->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->ignore_index = JUST(composed_attrs.GetAttr<int64_t>("ignore_index"));
ctx->SaveTensorForBackward(input); // input
ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 1))); // target
if (inputs.size() == 3) {
ctx->SaveTensorForBackward(inputs[2]); // weight
}
return Maybe<void>::Ok();
}
Maybe<void> NLLGradFunction::Apply(const NLLCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
CHECK_EQ_OR_RETURN(out_grads.size(), 2); // NOLINT(maybe-need-error-msg)
CHECK_GE_OR_RETURN(ctx->SavedTensors().size(), 2)
<< Error::RuntimeError()
<< "The number of saved tensors is expected to be greater than or equal to 2, but got "
<< ctx->SavedTensors().size();
const auto& out_grad = out_grads[0];
const auto& input = ctx->SavedTensors()[0];
const auto& target = ctx->SavedTensors()[1];
in_grads->resize(ctx->SavedTensors().size());
if (ctx->SavedTensors().size() == 2) {
JUST(VectorAt(*in_grads, 0)) =
JUST(functional::NLLGrad(out_grad, input, target, NullOpt, ctx->ignore_index));
} else {
// has weight
auto weight = JUST(VectorAt(ctx->SavedTensors(), 2));
JUST(VectorAt(*in_grads, 0)) =
JUST(functional::NLLGrad(out_grad, input, target, weight, ctx->ignore_index));
}
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("nll", NLLGradFunction);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct NormalizationGradCaptureState : public AutoGradCaptureState {
int32_t axis;
float epsilon;
bool track_running_stats;
bool is_training;
bool x_requires_grad;
bool gamma_requires_grad;
bool beta_requires_grad;
};
// training:
// y, mean, inv_variance = normalization(x, moving_mean, moving_variance, gamma, beta,
// axis=1, epsilon=0.01, momentum=0.9)
// y, mean, inv_variance = normalization(x, gamma, beta, axis=1, epsilon=0.01, momentum=0.9)
// inference:
// y = normalization(x, moving_mean, moving_variance, gamma, beta, axis=1, epsilon=0.01,
// momentum=0.9)
class NormalizationGrad : public OpExprGradFunction<NormalizationGradCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Capture(NormalizationGradCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
// input_size may be 3 or 5, as inputs may be
// (x, gamma, beta) or (x, moving_mean, moving_variance, gamma, beta)
// ref to track_running_stats false/true
// output_size may be 1 or 3, as outputs may be
// (x, ) or (x, mean, inv_variance)
// ref to is_training false/true
ctx->x_requires_grad = inputs.at(0)->requires_grad();
std::shared_ptr<Tensor> gamma, beta;
if (inputs.size() == 3) {
gamma = inputs.at(1);
beta = inputs.at(2);
ctx->track_running_stats = false;
} else {
CHECK_EQ_OR_RETURN(inputs.size(), 5); // NOLINT(maybe-need-error-msg)
gamma = inputs.at(3);
beta = inputs.at(4);
ctx->track_running_stats = true;
}
ctx->gamma_requires_grad = gamma->requires_grad();
ctx->beta_requires_grad = beta->requires_grad();
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->axis = JUST(composed_attrs.GetAttr<int32_t>("axis"));
ctx->epsilon = JUST(composed_attrs.GetAttr<float>("epsilon"));
ctx->is_training = JUST(composed_attrs.GetAttr<bool>("training"));
ctx->SaveTensorForBackward(inputs.at(0)); // x
ctx->SaveTensorForBackward(gamma); // gamma
if (ctx->is_training || !ctx->track_running_stats) {
ctx->SaveTensorForBackward(outputs.at(1)); // mean
ctx->SaveTensorForBackward(outputs.at(2)); // inv_variance
} else {
ctx->SaveTensorForBackward(inputs.at(1)); // moving_mean
ctx->SaveTensorForBackward(inputs.at(2)); // moving_variance
}
return Maybe<void>::Ok();
}
Maybe<void> Apply(const NormalizationGradCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
const auto& x = ctx->SavedTensors().at(0); // x
const auto& gamma = ctx->SavedTensors().at(1); // gamma
const auto& y_grad = out_grads.at(0);
std::shared_ptr<Tensor> mean, inv_variance;
if (ctx->is_training || !ctx->track_running_stats) {
mean = ctx->SavedTensors().at(2); // mean
inv_variance = ctx->SavedTensors().at(3); // inv_variance
} else {
const auto& moving_mean = ctx->SavedTensors().at(2); // moving_mean
const auto& moving_variance = ctx->SavedTensors().at(3); // moving_variance
const auto& add_eps = JUST(
functional::ScalarAdd(moving_variance, ctx->epsilon, /*alpha=*/1, /*inplace=*/false));
mean = moving_mean;
inv_variance = JUST(functional::Rsqrt(add_eps));
}
const auto& results = JUST(functional::NormalizationGrad(y_grad, x, mean, inv_variance, gamma,
ctx->epsilon, ctx->axis));
CHECK_EQ_OR_RETURN(results->size(), 3)
<< Error::RuntimeError() << "The number of results is expected to be 3, but got "
<< results->size();
if (ctx->track_running_stats) {
// The normalization op has 5 inputs which are x, moving_mean, moving_variance, gamma and
// beta.
in_grads->resize(5);
if (ctx->gamma_requires_grad) {
in_grads->at(3) = results->at(1); // gamma_diff;
}
if (ctx->beta_requires_grad) {
in_grads->at(4) = results->at(2); // beta_diff
}
} else {
// The normalization op has 3 inputs which are x, gamma and beta.
in_grads->resize(3);
if (ctx->gamma_requires_grad) {
in_grads->at(1) = results->at(1); // gamma_diff;
}
if (ctx->beta_requires_grad) {
in_grads->at(2) = results->at(2); // beta_diff
}
}
if (!ctx->x_requires_grad) { return Maybe<void>::Ok(); }
if (ctx->is_training) {
in_grads->at(0) = results->at(0);
return Maybe<void>::Ok();
}
Shape shape;
for (int i = 0; i < x->shape()->NumAxes(); ++i) {
if (i != ctx->axis) {
shape.emplace_back(1);
} else {
shape.emplace_back(x->shape()->At(ctx->axis));
}
}
const auto& reshaped_gamma = JUST(functional::Reshape(gamma, shape));
const auto& reshaped_inv_variance = JUST(functional::Reshape(inv_variance, shape));
std::shared_ptr<Tensor> y_grad_fp32 = y_grad;
bool is_fp16 = y_grad->dtype()->data_type() == DataType::kFloat16;
if (is_fp16) {
y_grad_fp32 = JUST(functional::Cast(y_grad, DType::Float(), /*pin_memory=*/false));
}
const auto& dy_mul_gamma = JUST(functional::Mul(reshaped_gamma, y_grad_fp32));
const auto& dy_mul_inv_var = JUST(functional::Mul(dy_mul_gamma, reshaped_inv_variance));
if (is_fp16) {
(*in_grads)[0] =
JUST(functional::Cast(dy_mul_inv_var, DType::Float16(), /*pin_memory=*/false));
} else {
(*in_grads)[0] = dy_mul_inv_var;
}
return Maybe<void>::Ok();
}
private:
AttrMap base_attrs_;
};
REGISTER_OP_EXPR_GRAD_FUNCTION("normalization", NormalizationGrad);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct NormalizationAddReluGradCaptureState : public AutoGradCaptureState {
int32_t axis = 1;
float epsilon = 1e-5;
bool track_running_stats = true;
bool is_training = true;
bool has_addend = false;
bool x_requires_grad = true;
bool addend_requires_grad = true;
bool gamma_requires_grad = true;
bool beta_requires_grad = true;
};
// training:
// y, mean, inv_variance = normalization_add_relu(x, Optional(add_end), moving_mean,
// moving_variance, gamma, beta, axis=1, epsilon=0.01, momentum=0.9) y, mean, inv_variance =
// normalization_add_relu(x, Optional(add_end), gamma, beta, axis=1, epsilon=0.01, momentum=0.9)
// inference:
// y = normalization_add_relu(x, Optional(add_end), moving_mean, moving_variance, gamma, beta,
// axis=1, epsilon=0.01, momentum=0.9)
class NormalizationAddReluGrad : public OpExprGradFunction<NormalizationAddReluGradCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Capture(NormalizationAddReluGradCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
// input_size may be 3/4/5/6, as inputs may be
// (x, gamma, beta) or (x, moving_mean, moving_variance, gamma, beta)
// (x, addend, gamma, beta) or (x, addend, moving_mean, moving_variance, gamma, beta)
// ref to track_running_stats false/true
// output_size may be 2 or 4, as outputs may be
// (x, reserve_space) or (x, reserve_space, mean, inv_variance)
// ref to is_training false/true
ctx->x_requires_grad = inputs.at(0)->requires_grad();
std::shared_ptr<Tensor> add_end, gamma, beta;
if (inputs.size() == 3 || inputs.size() == 5) {
add_end = nullptr;
if (inputs.size() == 3) {
gamma = inputs.at(1);
beta = inputs.at(2);
ctx->track_running_stats = false;
} else {
gamma = inputs.at(3);
beta = inputs.at(4);
ctx->track_running_stats = true;
}
ctx->has_addend = false;
} else if (inputs.size() == 4 || inputs.size() == 6) {
add_end = inputs.at(1);
if (inputs.size() == 4) {
gamma = inputs.at(2);
beta = inputs.at(3);
ctx->track_running_stats = false;
} else {
gamma = inputs.at(4);
beta = inputs.at(5);
ctx->track_running_stats = true;
}
ctx->has_addend = true;
ctx->addend_requires_grad = inputs.at(1)->requires_grad();
}
ctx->gamma_requires_grad = gamma->requires_grad();
ctx->beta_requires_grad = beta->requires_grad();
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->axis = JUST(composed_attrs.GetAttr<int32_t>("axis"));
ctx->epsilon = JUST(composed_attrs.GetAttr<float>("epsilon"));
ctx->is_training = JUST(composed_attrs.GetAttr<bool>("training"));
ctx->SaveTensorForBackward(inputs.at(0)); // x 0
ctx->SaveTensorForBackward(gamma); // gamma 1
ctx->SaveTensorForBackward(beta); // beta 2
if (ctx->is_training || !ctx->track_running_stats) {
ctx->SaveTensorForBackward(outputs.at(2)); // mean 3
ctx->SaveTensorForBackward(outputs.at(3)); // inv_variance 4
} else {
if (inputs.size() == 5) {
// without add_end
ctx->SaveTensorForBackward(inputs.at(1)); // moving_mean 3
ctx->SaveTensorForBackward(inputs.at(2)); // moving_variance 4
} else {
CHECK_EQ_OR_RETURN(inputs.size(), 6); // NOLINT(maybe-need-error-msg)
// with add_end
ctx->SaveTensorForBackward(inputs.at(2)); // moving_mean 3
ctx->SaveTensorForBackward(inputs.at(3)); // moving_variance 4
}
}
ctx->SaveTensorForBackward(outputs.at(0)); // y 5
ctx->SaveTensorForBackward(outputs.at(1)); // reserve space 6
return Maybe<void>::Ok();
}
Maybe<void> Apply(const NormalizationAddReluGradCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
const auto& x = ctx->SavedTensors().at(0); // x
const auto& gamma = ctx->SavedTensors().at(1); // gamma
const auto& beta = ctx->SavedTensors().at(2); // beta
const auto& y_grad = out_grads.at(0);
std::shared_ptr<Tensor> mean, inv_variance;
if (ctx->is_training || !ctx->track_running_stats) {
mean = ctx->SavedTensors().at(3); // mean
inv_variance = ctx->SavedTensors().at(4); // inv_variance
} else {
const auto& moving_mean = ctx->SavedTensors().at(3); // moving_mean
const auto& moving_variance = ctx->SavedTensors().at(4); // moving_variance
const auto& add_eps = JUST(
functional::ScalarAdd(moving_variance, ctx->epsilon, /*alpha=*/1, /*inplace=*/false));
mean = moving_mean;
inv_variance = JUST(functional::Rsqrt(add_eps));
}
const auto& y = ctx->SavedTensors().at(5);
const auto& reserve_space = ctx->SavedTensors().at(6);
const auto& results = JUST(functional::NormalizationAddReluGrad(
x, y_grad, mean, inv_variance, gamma, beta, reserve_space, y, ctx->axis, ctx->epsilon,
ctx->has_addend));
CHECK_EQ_OR_RETURN(results->size(), (ctx->has_addend ? 4 : 3))
<< Error::RuntimeError() << "The number of results is expected to be "
<< (ctx->has_addend ? 4 : 3) << ", but got "
<< results->size(); // here output includes "gamma_diff" "beta_diff" "dx" "addend_diff"
if (ctx->track_running_stats) {
// The normalization op has 5 inputs which are x, moving_mean, moving_variance, gamma and
// beta. or 6 inputs: x, add_end, moving_mean, moving_variance, gamma and beta.
if (ctx->has_addend) {
in_grads->resize(6);
if (ctx->gamma_requires_grad) {
in_grads->at(4) = results->at(1); // gamma_diff;
}
if (ctx->beta_requires_grad) {
in_grads->at(5) = results->at(2); // beta_diff
}
if (ctx->addend_requires_grad) {
in_grads->at(1) = results->at(3); // add_end_diff
}
} else {
in_grads->resize(5);
if (ctx->gamma_requires_grad) {
in_grads->at(3) = results->at(1); // gamma_diff;
}
if (ctx->beta_requires_grad) {
in_grads->at(4) = results->at(2); // beta_diff
}
}
} else {
// The normalization op has 3 inputs which are x, addend, gamma and beta.
// or has 4 inputs which are x, addend, gamma and beta.
if (ctx->has_addend) {
in_grads->resize(4);
if (ctx->addend_requires_grad) {
in_grads->at(1) = results->at(3); // addend_diff
}
if (ctx->gamma_requires_grad) {
in_grads->at(2) = results->at(1); // gamma_diff;
}
if (ctx->beta_requires_grad) {
in_grads->at(3) = results->at(2); // beta_diff
}
} else {
in_grads->resize(3);
if (ctx->gamma_requires_grad) {
in_grads->at(1) = results->at(1); // gamma_diff;
}
if (ctx->beta_requires_grad) {
in_grads->at(2) = results->at(2); // beta_diff
}
}
}
if (!ctx->x_requires_grad) { return Maybe<void>::Ok(); }
if (ctx->is_training) {
in_grads->at(0) = results->at(0);
return Maybe<void>::Ok();
}
// todo(zzk): add eval mode.
return Maybe<void>::Ok();
}
private:
AttrMap base_attrs_;
};
REGISTER_OP_EXPR_GRAD_FUNCTION("normalization_add_relu", NormalizationAddReluGrad);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace oneflow {
namespace one {
struct Pad2dCaptureState : public AutoGradCaptureState {
bool requires_grad;
std::vector<int64_t> paddings;
};
class Pad2d : public OpExprGradFunction<Pad2dCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Capture(Pad2dCaptureState* ctx, const TensorTuple& inputs, const TensorTuple& outputs,
const AttrMap& attrs) const override {
CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN(outputs.size(), 1); // NOLINT(maybe-need-error-msg)
ctx->requires_grad = JUST(VectorAt(inputs, 0))->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->paddings = JUST(composed_attrs.GetAttr<std::vector<int64_t>>("padding"));
return Maybe<void>::Ok();
}
private:
AttrMap base_attrs_;
};
class ReflectionPad2d : public Pad2d {
public:
Maybe<void> Apply(const Pad2dCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(1);
if (ctx->requires_grad) {
(*in_grads)[0] =
JUST(functional::PadGrad(JUST(VectorAt(out_grads, 0)), ctx->paddings, "reflect", 0));
}
return Maybe<void>::Ok();
}
};
class ReplicationPad2d : public Pad2d {
public:
Maybe<void> Apply(const Pad2dCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(1);
if (ctx->requires_grad) {
(*in_grads)[0] =
JUST(functional::PadGrad(JUST(VectorAt(out_grads, 0)), ctx->paddings, "replicate", 0));
}
return Maybe<void>::Ok();
}
};
struct ConstantPadNdCaptureState : public AutoGradCaptureState {
bool requires_grad;
std::vector<int64_t> paddings;
};
class ConstantPadNd : public OpExprGradFunction<ConstantPadNdCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Capture(ConstantPadNdCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN(outputs.size(), 1); // NOLINT(maybe-need-error-msg)
const std::shared_ptr<Tensor>& input_0 = JUST(VectorAt(inputs, 0));
ctx->requires_grad = input_0->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->paddings = JUST(composed_attrs.GetAttr<std::vector<int64_t>>("padding"));
for (int i = 0; i < ctx->paddings.size(); i++) { ctx->paddings[i] = -ctx->paddings[i]; }
return Maybe<void>::Ok();
}
Maybe<void> Apply(const ConstantPadNdCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg)
in_grads->resize(1);
if (ctx->requires_grad) {
(*in_grads)[0] =
JUST(functional::Pad(JUST(VectorAt(out_grads, 0)), ctx->paddings, "constant", Scalar(0)));
}
return Maybe<void>::Ok();
}
private:
AttrMap base_attrs_;
};
REGISTER_OP_EXPR_GRAD_FUNCTION("pad", ConstantPadNd);
REGISTER_OP_EXPR_GRAD_FUNCTION("reflection_pad2d", ReflectionPad2d);
REGISTER_OP_EXPR_GRAD_FUNCTION("replication_pad2d", ReplicationPad2d);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct PartialFCSampleState : public AutoGradCaptureState {
bool requires_grad = false;
int32_t index_sampled_label = -1;
int32_t index_weight = -1;
};
class PartialFCSample : public OpExprGradFunction<PartialFCSampleState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(PartialFCSampleState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const PartialFCSampleState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> PartialFCSample::Init(const OpExpr& op) {
const UserOpExpr* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> PartialFCSample::Capture(PartialFCSampleState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ctx->index_sampled_label = ctx->SaveTensorForBackward(outputs.at(1)); // sampled_label
ctx->index_weight = ctx->SaveTensorForBackward(inputs.at(0));
return Maybe<void>::Ok();
}
Maybe<void> PartialFCSample::Apply(const PartialFCSampleState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
CHECK_EQ_OR_RETURN(out_grads.size(), 3); // NOLINT(maybe-need-error-msg)
in_grads->resize(2);
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
const auto& diff_sampled_weight = out_grads.at(2); // diff of sampled_weight
const auto& sampled_tensor = ctx->SavedTensors().at(ctx->index_sampled_label);
const auto& weight = ctx->SavedTensors().at(ctx->index_weight);
const auto& out_tensors_of_op0 = JUST(
functional::DistributedPariticalFCSampleDisableBoxing(diff_sampled_weight, sampled_tensor));
const auto& out_tensors_of_op1 = JUST(functional::UnsortedSegmentSumLike(
out_tensors_of_op0->at(0), out_tensors_of_op0->at(1), weight, 0));
in_grads->at(0) = out_tensors_of_op1;
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("distributed_partial_fc_sample", PartialFCSample);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/functional/sequence_function.h"
namespace oneflow {
namespace one {
struct ReduceSumCaptureState : public AutoGradCaptureState {
std::vector<int32_t> axis;
};
class ReduceSum : public OpExprGradFunction<ReduceSumCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(ReduceSumCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const ReduceSumCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> ReduceSum::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> ReduceSum::Capture(ReduceSumCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->axis = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("axis"));
ctx->SaveTensorForBackward(inputs.at(0));
return Maybe<void>::Ok();
}
Maybe<void> ReduceSum::Apply(const ReduceSumCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
const auto& input = ctx->SavedTensors().at(0);
const auto& dy = out_grads.at(0);
in_grads->resize(1);
in_grads->at(0) = JUST(functional::BroadcastLike(dy, input, ctx->axis));
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("reduce_sum", ReduceSum);
struct ReduceProdOpInterpState : public AutoGradCaptureState {
std::vector<int32_t> axis;
bool requires_grad;
};
class ReduceProdOp : public OpExprGradFunction<ReduceProdOpInterpState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(ReduceProdOpInterpState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const ReduceProdOpInterpState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> ReduceProdOp::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> ReduceProdOp::Capture(ReduceProdOpInterpState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->axis = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("axis"));
ctx->requires_grad = inputs.at(0)->requires_grad();
ctx->SaveTensorForBackward(inputs.at(0));
ctx->SaveTensorForBackward(outputs.at(0));
return Maybe<void>::Ok();
}
Maybe<void> ReduceProdOp::Apply(const ReduceProdOpInterpState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
const auto& input = ctx->SavedTensors().at(0);
const auto& output = ctx->SavedTensors().at(1);
const auto& dy = out_grads.at(0);
in_grads->resize(1);
in_grads->at(0) = JUST(
functional::SequenceFunction<Maybe<Tensor>()>([&]() { return functional::Mul(dy, output); })
.then(std::bind(functional::BroadcastLike, std::placeholders::_1, input, ctx->axis))
.then(std::bind(functional::Div, std::placeholders::_1, input))
.call());
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("reduce_prod", ReduceProdOp);
struct ReduceMaxOrMinCaptureState : public AutoGradCaptureState {
std::vector<int32_t> axis;
bool keepdims;
};
class ReduceMaxOrMin : public OpExprGradFunction<ReduceMaxOrMinCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override;
Maybe<void> Capture(ReduceMaxOrMinCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override;
Maybe<void> Apply(const ReduceMaxOrMinCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override;
private:
AttrMap base_attrs_;
};
Maybe<void> ReduceMaxOrMin::Init(const OpExpr& op) {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> ReduceMaxOrMin::Capture(ReduceMaxOrMinCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const {
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->axis = JUST(composed_attrs.GetAttr<std::vector<int32_t>>("axis"));
ctx->keepdims = JUST(composed_attrs.GetAttr<bool>("keepdims"));
ctx->SaveTensorForBackward(inputs.at(0));
ctx->SaveTensorForBackward(outputs.at(0));
return Maybe<void>::Ok();
}
Maybe<void> ReduceMaxOrMin::Apply(const ReduceMaxOrMinCaptureState* ctx,
const TensorTuple& out_grads, TensorTuple* in_grads) const {
const auto& input = ctx->SavedTensors().at(0);
const auto& output = ctx->SavedTensors().at(1);
const auto& dy = out_grads.at(0);
const auto cast_like =
JUST(functional::SequenceFunction<Maybe<Tensor>()>(
[&]() { return functional::BroadcastLike(output, input, ctx->axis); })
.then(std::bind(functional::BroadcastEqual, input, std::placeholders::_1))
.then(std::bind(functional::CastLike, std::placeholders::_1, input))
.call());
const auto& bcast_like_div =
JUST(functional::SequenceFunction<Maybe<Tensor>()>(
[&]() { return functional::ReduceSum(cast_like, ctx->axis, ctx->keepdims); })
.then(std::bind(functional::Div, dy, std::placeholders::_1))
.then(std::bind(functional::BroadcastLike, std::placeholders::_1, input, ctx->axis))
.call());
in_grads->resize(1);
in_grads->at(0) = JUST(functional::Mul(bcast_like_div, cast_like));
return Maybe<void>::Ok();
}
REGISTER_OP_EXPR_GRAD_FUNCTION("reduce_min", ReduceMaxOrMin);
REGISTER_OP_EXPR_GRAD_FUNCTION("reduce_max", ReduceMaxOrMin);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct ReshapeCaptureState : public AutoGradCaptureState {
DimVector input_shape_vec;
};
class ReshapeOpExprGrad : public OpExprGradFunction<ReshapeCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
return Maybe<void>::Ok();
}
Maybe<void> Capture(ReshapeCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->input_shape_vec = inputs.at(0)->shape()->dim_vec();
return Maybe<void>::Ok();
}
Maybe<void> Apply(const ReshapeCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
in_grads->resize(1);
Shape shape(ctx->input_shape_vec);
in_grads->at(0) = JUST(functional::Reshape(out_grads.at(0), shape));
return Maybe<void>::Ok();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION("reshape", ReshapeOpExprGrad);
} // namespace one
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace oneflow {
namespace one {
struct RoiAlignCaptureState : public AutoGradCaptureState {
float spatial_scale = 1.0;
int32_t pooled_h = 0;
int32_t pooled_w = 0;
int32_t sampling_ratio = -1;
bool aligned = false;
bool requires_grad = false;
};
class RoiAlign : public OpExprGradFunction<RoiAlignCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override {
const auto* fw_op_expr = dynamic_cast<const UserOpExpr*>(&op);
CHECK_NOTNULL_OR_RETURN(fw_op_expr); // NOLINT(maybe-need-error-msg)
base_attrs_ = MakeAttrMapFromUserOpConf(fw_op_expr->proto());
return Maybe<void>::Ok();
}
Maybe<void> Capture(RoiAlignCaptureState* ctx, const TensorTuple& inputs,
const TensorTuple& outputs, const AttrMap& attrs) const override {
ctx->requires_grad = inputs.at(0)->requires_grad();
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
ctx->SaveTensorForBackward(inputs.at(0));
ctx->SaveTensorForBackward(inputs.at(1));
ComposedAttrMap composed_attrs(attrs, base_attrs_);
ctx->spatial_scale = JUST(composed_attrs.GetAttr<float>("spatial_scale"));
ctx->pooled_h = JUST(composed_attrs.GetAttr<int32_t>("pooled_h"));
ctx->pooled_w = JUST(composed_attrs.GetAttr<int32_t>("pooled_w"));
ctx->sampling_ratio = JUST(composed_attrs.GetAttr<int32_t>("sampling_ratio"));
ctx->aligned = JUST(composed_attrs.GetAttr<bool>("aligned"));
return Maybe<void>::Ok();
}
Maybe<void> Apply(const RoiAlignCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (!ctx->requires_grad) { return Maybe<void>::Ok(); }
const auto& x_like = ctx->SavedTensors().at(0);
const auto& rois = ctx->SavedTensors().at(1);
in_grads->at(0) = JUST(
functional::RoiAlignGrad(out_grads.at(0), x_like, rois, ctx->spatial_scale, ctx->pooled_h,
ctx->pooled_w, ctx->sampling_ratio, ctx->aligned));
return Maybe<void>::Ok();
}
private:
AttrMap base_attrs_;
};
REGISTER_OP_EXPR_GRAD_FUNCTION("roi_align", RoiAlign);
} // namespace one
} // namespace oneflow
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment