Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Oneflow
Commits
21d47d0e
Commit
21d47d0e
authored
Oct 24, 2022
by
yuguo
Browse files
Oneflow 0.8 for DCU
parents
Changes
556
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2081 additions
and
0 deletions
+2081
-0
oneflow/core/autograd/gradient_funcs/identity.cpp
oneflow/core/autograd/gradient_funcs/identity.cpp
+48
-0
oneflow/core/autograd/gradient_funcs/kl_div.cpp
oneflow/core/autograd/gradient_funcs/kl_div.cpp
+72
-0
oneflow/core/autograd/gradient_funcs/l2_normalize.cpp
oneflow/core/autograd/gradient_funcs/l2_normalize.cpp
+79
-0
oneflow/core/autograd/gradient_funcs/layer_norm.cpp
oneflow/core/autograd/gradient_funcs/layer_norm.cpp
+134
-0
oneflow/core/autograd/gradient_funcs/log_softmax.cpp
oneflow/core/autograd/gradient_funcs/log_softmax.cpp
+80
-0
oneflow/core/autograd/gradient_funcs/masked_fill.cpp
oneflow/core/autograd/gradient_funcs/masked_fill.cpp
+57
-0
oneflow/core/autograd/gradient_funcs/math_binary_op.cpp
oneflow/core/autograd/gradient_funcs/math_binary_op.cpp
+68
-0
oneflow/core/autograd/gradient_funcs/math_unary_op.cpp
oneflow/core/autograd/gradient_funcs/math_unary_op.cpp
+71
-0
oneflow/core/autograd/gradient_funcs/matmul.cpp
oneflow/core/autograd/gradient_funcs/matmul.cpp
+147
-0
oneflow/core/autograd/gradient_funcs/max_pool.cpp
oneflow/core/autograd/gradient_funcs/max_pool.cpp
+109
-0
oneflow/core/autograd/gradient_funcs/median.cpp
oneflow/core/autograd/gradient_funcs/median.cpp
+108
-0
oneflow/core/autograd/gradient_funcs/narrow.cpp
oneflow/core/autograd/gradient_funcs/narrow.cpp
+90
-0
oneflow/core/autograd/gradient_funcs/nll.cpp
oneflow/core/autograd/gradient_funcs/nll.cpp
+96
-0
oneflow/core/autograd/gradient_funcs/normalization.cpp
oneflow/core/autograd/gradient_funcs/normalization.cpp
+175
-0
oneflow/core/autograd/gradient_funcs/normalization_add_relu.cpp
...w/core/autograd/gradient_funcs/normalization_add_relu.cpp
+223
-0
oneflow/core/autograd/gradient_funcs/padding.cpp
oneflow/core/autograd/gradient_funcs/padding.cpp
+128
-0
oneflow/core/autograd/gradient_funcs/partial_fc_sample.cpp
oneflow/core/autograd/gradient_funcs/partial_fc_sample.cpp
+78
-0
oneflow/core/autograd/gradient_funcs/reduce_ops.cpp
oneflow/core/autograd/gradient_funcs/reduce_ops.cpp
+185
-0
oneflow/core/autograd/gradient_funcs/reshape.cpp
oneflow/core/autograd/gradient_funcs/reshape.cpp
+57
-0
oneflow/core/autograd/gradient_funcs/roi_align.cpp
oneflow/core/autograd/gradient_funcs/roi_align.cpp
+76
-0
No files found.
Too many changes to show.
To preserve performance only
556 of 556+
files are displayed.
Plain diff
Email patch
oneflow/core/autograd/gradient_funcs/identity.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
namespace
oneflow
{
namespace
one
{
struct
IdentityCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
;
};
class
Identity
:
public
OpExprGradFunction
<
IdentityCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
IdentityCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
IdentityCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
if
(
ctx
->
requires_grad
)
{
in_grads
->
at
(
0
)
=
out_grads
.
at
(
0
);
}
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"identity"
,
Identity
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/kl_div.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
KLDivLossCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
bool
log_target
=
false
;
};
class
KLDivLoss
:
public
OpExprGradFunction
<
KLDivLossCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
KLDivLossCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
KLDivLossCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
KLDivLoss
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
KLDivLoss
::
Capture
(
KLDivLossCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
log_target
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"log_target"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
// input
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// target
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
KLDivLoss
::
Apply
(
const
KLDivLossCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
auto
&
dy
=
out_grads
.
at
(
0
);
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
target
=
ctx
->
SavedTensors
().
at
(
1
);
in_grads
->
resize
(
ctx
->
SavedTensors
().
size
());
in_grads
->
at
(
0
)
=
JUST
(
functional
::
KLDivLossGrad
(
dy
,
input
,
target
,
ctx
->
log_target
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"kl_div_loss"
,
KLDivLoss
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/l2_normalize.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
L2NormalizeCaptureState
:
public
AutoGradCaptureState
{
int64_t
axis
;
float
epsilon
;
bool
requires_grad
;
};
class
L2Normalize
:
public
OpExprGradFunction
<
L2NormalizeCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
L2NormalizeCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
L2NormalizeCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
L2Normalize
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
L2Normalize
::
Capture
(
L2NormalizeCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
// y
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
// square_x_sum
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"axis"
));
ctx
->
epsilon
=
JUST
(
composed_attrs
.
GetAttr
<
float
>
(
"epsilon"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
L2Normalize
::
Apply
(
const
L2NormalizeCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
in_grads
->
resize
(
1
);
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
const
auto
&
y
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
square_x_sum
=
ctx
->
SavedTensors
().
at
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
L2NormalizeGrad
(
out_grads
.
at
(
0
),
y
,
square_x_sum
,
ctx
->
axis
,
ctx
->
epsilon
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"l2_normalize"
,
L2Normalize
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/layer_norm.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
LayerNormCaptureState
:
public
AutoGradCaptureState
{
bool
center
=
true
;
bool
scale
=
true
;
int64_t
begin_norm_axis
=
1
;
int64_t
begin_params_axis
=
1
;
double
epsilon
=
1e-5
;
bool
x_requires_grad
=
true
;
bool
has_affine
=
true
;
size_t
gamma_index
=
0
;
size_t
x_index
=
1
;
size_t
mean_index
=
2
;
size_t
inv_variance_index
=
3
;
};
// y, mean, inv_variance =
// layer_norm(x, [gamma], [beta], center=False, scale=False, begin_norm_axis=1,
// begin_params_axis=-1, epsilon=1e-5)
class
LayerNorm
:
public
OpExprGradFunction
<
LayerNormCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
LayerNormCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
LayerNormCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
std
::
string
op_name_
;
};
Maybe
<
void
>
LayerNorm
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
op_name_
=
fw_op_expr
->
op_name
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LayerNorm
::
Capture
(
LayerNormCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
center
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"center"
));
ctx
->
scale
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"scale"
));
ctx
->
begin_norm_axis
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"begin_norm_axis"
));
ctx
->
begin_params_axis
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"begin_params_axis"
));
ctx
->
epsilon
=
JUST
(
composed_attrs
.
GetAttr
<
double
>
(
"epsilon"
));
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
ctx
->
center
+
ctx
->
scale
+
1
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
3
);
// NOLINT(maybe-need-error-msg)
bool
has_gamma_diff
=
ctx
->
scale
&&
inputs
.
at
(
1
)
->
requires_grad
();
bool
has_beta_diff
=
ctx
->
center
&&
inputs
.
at
(
2
)
->
requires_grad
();
ctx
->
has_affine
=
has_gamma_diff
&&
has_beta_diff
;
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
ctx
->
x_requires_grad
||
ctx
->
has_affine
)
{
ctx
->
x_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
mean_index
=
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
ctx
->
inv_variance_index
=
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
2
));
if
(
ctx
->
x_requires_grad
&&
ctx
->
scale
)
{
ctx
->
gamma_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// save gamma.
}
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LayerNorm
::
Apply
(
const
LayerNormCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
const
auto
&
saved_tensors
=
ctx
->
SavedTensors
();
in_grads
->
resize
(
ctx
->
center
+
ctx
->
scale
+
1
);
std
::
shared_ptr
<
Tensor
>
dy
=
out_grads
.
at
(
0
);
int64_t
begin_params_axis
=
ctx
->
begin_params_axis
;
if
(
begin_params_axis
<
0
)
{
begin_params_axis
+=
dy
->
shape
()
->
NumAxes
();
}
int64_t
begin_norm_axis
=
ctx
->
begin_norm_axis
;
if
(
begin_norm_axis
<
0
)
{
begin_norm_axis
+=
dy
->
shape
()
->
NumAxes
();
}
std
::
shared_ptr
<
Tensor
>
x
=
saved_tensors
.
at
(
ctx
->
x_index
);
std
::
shared_ptr
<
Tensor
>
mean
=
saved_tensors
.
at
(
ctx
->
mean_index
);
std
::
shared_ptr
<
Tensor
>
inv_variance
=
saved_tensors
.
at
(
ctx
->
inv_variance_index
);
if
(
ctx
->
has_affine
)
{
// Use LayerNormParamGrad(Tensor dy, Tensor x, Tensor mean, Tensor inv_variance, Int64
// begin_params_axis, Double epsilon).
const
auto
&
results
=
JUST
(
functional
::
LayerNormParamGrad
(
dy
,
x
,
mean
,
inv_variance
,
begin_params_axis
,
ctx
->
epsilon
));
in_grads
->
at
(
1
)
=
results
->
at
(
0
);
// For gamma.
in_grads
->
at
(
2
)
=
results
->
at
(
1
);
// For beta.
}
if
(
ctx
->
x_requires_grad
)
{
if
(
ctx
->
scale
)
{
std
::
shared_ptr
<
Tensor
>
gamma
=
saved_tensors
.
at
(
ctx
->
gamma_index
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
LayerNormAffineGrad
(
dy
,
x
,
mean
,
inv_variance
,
gamma
,
begin_norm_axis
,
ctx
->
epsilon
));
}
else
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
LayerNormGrad
(
dy
,
x
,
mean
,
inv_variance
,
begin_norm_axis
,
ctx
->
epsilon
));
}
}
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"layer_norm"
,
LayerNorm
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/log_softmax.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
namespace
oneflow
{
namespace
one
{
struct
LogSoftmaxCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
;
};
class
LogSoftmax
:
public
OpExprGradFunction
<
LogSoftmaxCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
LogSoftmaxCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
LogSoftmaxCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
Maybe
<
void
>
LogSoftmax
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
const
std
::
string
&
op_name
=
fw_op_expr
->
op_name
();
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
grad_op_
=
JUST
(
one
::
OpBuilder
(
"log_softmax_grad"
,
GradientOpName
(
op_name
))
.
Input
(
"prob"
)
.
Input
(
"dy"
)
.
Output
(
"dx"
)
.
Build
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LogSoftmax
::
Capture
(
LogSoftmaxCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
return
Maybe
<
void
>::
Ok
();
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LogSoftmax
::
Apply
(
const
LogSoftmaxCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
return
Maybe
<
void
>::
Ok
();
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
auto
&
dy
=
out_grads
.
at
(
0
);
const
auto
&
prob
=
ctx
->
SavedTensors
().
at
(
0
);
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
OpInterpUtil
::
Dispatch
<
Tensor
>
(
*
grad_op_
,
{
prob
,
dy
}));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"log_softmax"
,
LogSoftmax
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/masked_fill.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
MaskedFillCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
true
;
};
class
MaskedFill
:
public
OpExprGradFunction
<
MaskedFillCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
MaskedFillCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
MaskedFillCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
std
::
shared_ptr
<
oneflow
::
one
::
Tensor
>&
x
=
ctx
->
SavedTensors
().
at
(
0
);
const
std
::
shared_ptr
<
oneflow
::
one
::
Tensor
>&
mask
=
ctx
->
SavedTensors
().
at
(
1
);
std
::
shared_ptr
<
oneflow
::
one
::
Tensor
>
zero_out
=
JUST
(
functional
::
ZerosLike
(
x
));
in_grads
->
resize
(
2
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
Where
(
mask
,
zero_out
,
out_grads
.
at
(
0
)));
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"masked_fill"
,
MaskedFill
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/math_binary_op.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/user/ops/math_binary_elementwise_seq.h"
namespace
oneflow
{
namespace
one
{
struct
BinaryMathCaptureState
:
public
AutoGradCaptureState
{
bool
x_requires_grad
;
bool
y_requires_grad
;
};
typedef
Maybe
<
one
::
Tensor
>
(
*
BinaryBwFunc
)(
const
std
::
shared_ptr
<
one
::
Tensor
>&
,
const
std
::
shared_ptr
<
one
::
Tensor
>&
,
const
std
::
shared_ptr
<
one
::
Tensor
>&
);
template
<
BinaryBwFunc
BwXFunc
,
BinaryBwFunc
BwYFunc
>
class
BinaryMathOp
:
public
OpExprGradFunction
<
BinaryMathCaptureState
>
{
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
BinaryMathCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
ctx
->
y_requires_grad
=
inputs
.
at
(
1
)
->
requires_grad
();
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
BinaryMathCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
(
ctx
->
x_requires_grad
||
ctx
->
y_requires_grad
))
{
return
Maybe
<
void
>::
Ok
();
}
in_grads
->
resize
(
2
);
const
std
::
shared_ptr
<
one
::
Tensor
>&
x
=
ctx
->
SavedTensors
().
at
(
0
);
const
std
::
shared_ptr
<
one
::
Tensor
>&
y
=
ctx
->
SavedTensors
().
at
(
1
);
if
(
ctx
->
x_requires_grad
)
{
in_grads
->
at
(
0
)
=
JUST
(
BwXFunc
(
x
,
y
,
out_grads
.
at
(
0
)));
}
if
(
ctx
->
y_requires_grad
)
{
in_grads
->
at
(
1
)
=
JUST
(
BwYFunc
(
x
,
y
,
out_grads
.
at
(
0
)));
}
return
Maybe
<
void
>::
Ok
();
}
};
#define INSTANTIAT_AND_REGISTER_BINARY_MATHOP_CLASS(op_type_name, op_cls) \
class op_cls##Cls final \
: public BinaryMathOp<functional::op_cls##XGrad, functional::op_cls##YGrad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, op_cls##Cls);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_BINARY_MATHOP_CLASS
,
MATH_BINARY_ELEMENTWISE_FUNC_SEQ
);
#undef INSTANTIAT_AND_REGISTER_BINARY_MATHOP_CLASS
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/math_unary_op.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/user/ops/math_unary_elementwise_seq.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
UnaryMathCaptureState
:
public
AutoGradCaptureState
{
bool
x_requires_grad
;
};
typedef
Maybe
<
one
::
Tensor
>
(
*
UnaryBwFunc
)(
const
std
::
shared_ptr
<
one
::
Tensor
>&
,
const
std
::
shared_ptr
<
one
::
Tensor
>&
);
template
<
UnaryBwFunc
BwFunc
>
class
UnaryMathOp
:
public
OpExprGradFunction
<
UnaryMathCaptureState
>
{
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
ctx
->
x_requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
const
auto
&
x
=
ctx
->
SavedTensors
().
at
(
0
);
in_grads
->
at
(
0
)
=
JUST
(
BwFunc
(
x
,
out_grads
.
at
(
0
)));
return
Maybe
<
void
>::
Ok
();
}
protected:
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
#define INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS(op_type_name, op_cls) \
class op_cls##Cls final : public UnaryMathOp<functional::op_cls##Grad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, op_cls##Cls);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
MATH_UNARY_ELEMENTWISE_FUNC_SEQ
);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
OF_PP_MAKE_TUPLE_SEQ
(
"tanh"
,
Tanh
));
// higher order derivative
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
OF_PP_MAKE_TUPLE_SEQ
(
"sin_grad"
,
SinGrad
));
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
OF_PP_MAKE_TUPLE_SEQ
(
"cos_grad"
,
CosGrad
));
#undef INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/matmul.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
MatmulCaptureState
:
public
AutoGradCaptureState
{
bool
transpose_a
;
bool
transpose_b
;
double
alpha
;
bool
requires_grad_a
;
bool
requires_grad_b
;
size_t
a_index
;
size_t
b_index
;
};
class
Matmul
:
public
OpExprGradFunction
<
MatmulCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
protected:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
Matmul
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Matmul
::
Capture
(
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad_a
=
inputs
.
at
(
0
)
->
requires_grad
();
ctx
->
requires_grad_b
=
inputs
.
at
(
1
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
transpose_a
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"transpose_a"
));
ctx
->
transpose_b
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"transpose_b"
));
ctx
->
alpha
=
JUST
(
composed_attrs
.
GetAttr
<
double
>
(
"alpha"
));
if
(
ctx
->
requires_grad_a
)
{
ctx
->
b_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// input b
}
if
(
ctx
->
requires_grad_b
)
{
ctx
->
a_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
// input a
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Matmul
::
Apply
(
const
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
2
);
if
(
ctx
->
requires_grad_a
)
{
const
auto
&
input_b
=
ctx
->
SavedTensors
().
at
(
ctx
->
b_index
);
if
(
ctx
->
transpose_a
)
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
MatMul
(
input_b
,
out_grads
.
at
(
0
),
ctx
->
transpose_b
,
true
,
ctx
->
alpha
));
}
else
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
MatMul
(
out_grads
.
at
(
0
),
input_b
,
false
,
!
(
ctx
->
transpose_b
),
ctx
->
alpha
));
}
}
if
(
ctx
->
requires_grad_b
)
{
const
auto
&
input_a
=
ctx
->
SavedTensors
().
at
(
ctx
->
a_index
);
if
(
ctx
->
transpose_b
)
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
MatMul
(
out_grads
.
at
(
0
),
input_a
,
true
,
ctx
->
transpose_a
,
ctx
->
alpha
));
}
else
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
MatMul
(
input_a
,
out_grads
.
at
(
0
),
!
(
ctx
->
transpose_a
),
false
,
ctx
->
alpha
));
}
}
return
Maybe
<
void
>::
Ok
();
}
class
BroadcastMatmul
:
public
Matmul
{
public:
Maybe
<
void
>
Apply
(
const
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
};
Maybe
<
void
>
BroadcastMatmul
::
Apply
(
const
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
2
);
if
(
ctx
->
requires_grad_a
)
{
const
auto
&
input_b
=
ctx
->
SavedTensors
().
at
(
ctx
->
b_index
);
if
(
ctx
->
transpose_a
)
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
MatMul
(
input_b
,
out_grads
.
at
(
0
),
ctx
->
transpose_b
,
true
,
ctx
->
alpha
));
}
else
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
MatMul
(
out_grads
.
at
(
0
),
input_b
,
false
,
!
(
ctx
->
transpose_b
),
ctx
->
alpha
));
}
}
if
(
ctx
->
requires_grad_b
)
{
const
auto
&
input_a
=
ctx
->
SavedTensors
().
at
(
ctx
->
a_index
);
if
(
ctx
->
transpose_b
)
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
BroadcastMatmulGradB
(
out_grads
.
at
(
0
),
input_a
,
ctx
->
alpha
));
}
else
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
BroadcastMatmulGradB
(
input_a
,
out_grads
.
at
(
0
),
ctx
->
alpha
));
}
}
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"matmul"
,
Matmul
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"batch_matmul"
,
Matmul
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"broadcast_matmul"
,
BroadcastMatmul
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/max_pool.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
namespace
{
struct
MaxPoolCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
size_t
input_index
=
0
;
size_t
indice_index
=
0
;
std
::
string
data_format
;
std
::
vector
<
int32_t
>
padding
;
std
::
vector
<
int32_t
>
kernel_size
;
std
::
vector
<
int32_t
>
stride
;
std
::
vector
<
int32_t
>
dilation
;
bool
return_indices
=
false
;
bool
ceil_mode
=
false
;
};
class
MaxPoolNdGrad
:
public
OpExprGradFunction
<
MaxPoolCaptureState
>
{
public:
virtual
~
MaxPoolNdGrad
()
=
default
;
using
OpExprGradFunction
<
MaxPoolCaptureState
>::
Init
;
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
MaxPoolCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
MaxPoolCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
MaxPoolNdGrad
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
MaxPoolNdGrad
::
Capture
(
MaxPoolCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ctx
->
input_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
indice_index
=
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
data_format
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
string
>
(
"data_format"
));
ctx
->
padding
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"padding"
));
ctx
->
kernel_size
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"kernel_size"
));
ctx
->
stride
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"stride"
));
ctx
->
dilation
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"dilation"
));
ctx
->
return_indices
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"return_indices"
));
ctx
->
ceil_mode
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"ceil_mode"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
MaxPoolNdGrad
::
Apply
(
const
MaxPoolCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_LE_OR_RETURN
(
out_grads
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
int32_t
ndims
=
ctx
->
kernel_size
.
size
();
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
ctx
->
input_index
);
const
auto
&
indice
=
ctx
->
SavedTensors
().
at
(
ctx
->
indice_index
);
in_grads
->
resize
(
1
);
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
MaxPoolNdGrad
(
input
,
indice
,
out_grads
[
0
],
ndims
,
ctx
->
data_format
,
ctx
->
padding
,
ctx
->
kernel_size
,
ctx
->
stride
,
ctx
->
dilation
,
ctx
->
return_indices
,
ctx
->
ceil_mode
));
return
Maybe
<
void
>::
Ok
();
}
}
// namespace
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"max_pool_1d"
,
MaxPoolNdGrad
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"max_pool_2d"
,
MaxPoolNdGrad
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"max_pool_3d"
,
MaxPoolNdGrad
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/median.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/functional/sequence_function.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
struct
MedianCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
};
class
Median
:
public
OpExprGradFunction
<
MedianCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
MedianCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
requires_grad
=
JUST
(
VectorAt
(
inputs
,
0
))
->
requires_grad
();
if
(
ctx
->
requires_grad
)
{
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
0
)));
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
outputs
,
0
)));
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
MedianCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
ctx
->
requires_grad
)
{
const
auto
&
input
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
0
));
const
auto
&
output
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
1
));
const
auto
&
dy
=
JUST
(
VectorAt
(
out_grads
,
0
));
std
::
vector
<
int32_t
>
axis
(
input
->
ndim
());
std
::
iota
(
axis
.
begin
(),
axis
.
end
(),
0
);
const
auto
cast_like
=
JUST
(
functional
::
SequenceFunction
<
Maybe
<
Tensor
>
()
>
(
[
&
]()
{
return
functional
::
BroadcastLike
(
output
,
input
,
axis
);
})
.
then
(
std
::
bind
(
functional
::
BroadcastEqual
,
input
,
std
::
placeholders
::
_1
))
.
then
(
std
::
bind
(
functional
::
CastLike
,
std
::
placeholders
::
_1
,
input
))
.
call
());
const
auto
bcast_like_div
=
JUST
(
functional
::
SequenceFunction
<
Maybe
<
Tensor
>
()
>
(
[
&
]()
{
return
functional
::
ReduceSum
(
cast_like
,
axis
,
false
);
})
.
then
(
std
::
bind
(
functional
::
Div
,
dy
,
std
::
placeholders
::
_1
))
.
then
(
std
::
bind
(
functional
::
BroadcastLike
,
std
::
placeholders
::
_1
,
input
,
axis
))
.
call
());
in_grads
->
resize
(
1
);
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
Mul
(
bcast_like_div
,
cast_like
));
}
return
Maybe
<
void
>::
Ok
();
}
};
struct
MedianWithIndicesCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
};
class
MedianWithIndices
:
public
OpExprGradFunction
<
MedianWithIndicesCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
MedianWithIndicesCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
requires_grad
=
JUST
(
VectorAt
(
inputs
,
0
))
->
requires_grad
();
if
(
ctx
->
requires_grad
)
{
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
0
)));
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
outputs
,
1
)));
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
MedianWithIndicesCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
ctx
->
requires_grad
)
{
in_grads
->
resize
(
1
);
const
auto
&
input
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
0
));
const
auto
&
indices
=
JUST
(
functional
::
Unsqueeze
(
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
1
)),
-
1
));
const
auto
&
dout
=
JUST
(
functional
::
Unsqueeze
(
JUST
(
VectorAt
(
out_grads
,
0
)),
-
1
));
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
DimScatter
(
JUST
(
functional
::
Constant
(
*
(
input
->
shape
()),
Scalar
(
0
),
*
dout
->
dtype
(),
JUST
(
dout
->
device
()))),
-
1
,
indices
,
dout
));
}
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"median"
,
Median
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"median_with_indices"
,
MedianWithIndices
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/narrow.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/job/lazy_mode.h"
#include "oneflow/core/framework/nd_sbp.h"
namespace
oneflow
{
namespace
one
{
struct
NarrowCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
;
Shape
shape
;
int64_t
dim
;
int64_t
start
;
int64_t
length
;
};
class
Narrow
:
public
OpExprGradFunction
<
NarrowCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
NarrowCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
dim
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"dim"
));
ctx
->
start
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"start"
));
ctx
->
length
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"length"
));
if
(
LazyMode
::
is_enabled
())
{
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
}
else
{
ctx
->
shape
=
*
(
inputs
.
at
(
0
)
->
shape
());
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
NarrowCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
const
auto
&
dy
=
out_grads
.
at
(
0
);
if
(
ctx
->
requires_grad
)
{
std
::
shared_ptr
<
Tensor
>
like
;
if
(
LazyMode
::
is_enabled
())
{
like
=
ctx
->
SavedTensors
().
at
(
0
);
}
else
if
(
dy
->
is_local
())
{
like
=
JUST
(
functional
::
Empty
(
ctx
->
shape
,
dy
->
dtype
(),
JUST
(
dy
->
device
()),
/*pin_memory=*/
false
));
}
else
{
like
=
JUST
(
functional
::
ConsistentEmpty
(
ctx
->
shape
,
dy
->
dtype
(),
JUST
(
dy
->
parallel_desc
()),
*
JUST
(
private_details
::
RawGetSbpList
(
JUST
(
dy
->
nd_sbp
())))));
}
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
NarrowGrad
(
dy
,
like
,
ctx
->
dim
,
ctx
->
start
,
ctx
->
length
));
}
return
Maybe
<
void
>::
Ok
();
}
private:
AttrMap
base_attrs_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"narrow"
,
Narrow
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/nll.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
struct
NLLCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
int64_t
ignore_index
=
-
100
;
};
class
NLLGradFunction
:
public
OpExprGradFunction
<
NLLCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
NLLCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
NLLCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
NLLGradFunction
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
NLLGradFunction
::
Capture
(
NLLCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
auto
input
=
JUST
(
VectorAt
(
inputs
,
0
));
ctx
->
requires_grad
=
input
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
ignore_index
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"ignore_index"
));
ctx
->
SaveTensorForBackward
(
input
);
// input
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
1
)));
// target
if
(
inputs
.
size
()
==
3
)
{
ctx
->
SaveTensorForBackward
(
inputs
[
2
]);
// weight
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
NLLGradFunction
::
Apply
(
const
NLLCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
CHECK_GE_OR_RETURN
(
ctx
->
SavedTensors
().
size
(),
2
)
<<
Error
::
RuntimeError
()
<<
"The number of saved tensors is expected to be greater than or equal to 2, but got "
<<
ctx
->
SavedTensors
().
size
();
const
auto
&
out_grad
=
out_grads
[
0
];
const
auto
&
input
=
ctx
->
SavedTensors
()[
0
];
const
auto
&
target
=
ctx
->
SavedTensors
()[
1
];
in_grads
->
resize
(
ctx
->
SavedTensors
().
size
());
if
(
ctx
->
SavedTensors
().
size
()
==
2
)
{
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
NLLGrad
(
out_grad
,
input
,
target
,
NullOpt
,
ctx
->
ignore_index
));
}
else
{
// has weight
auto
weight
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
2
));
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
NLLGrad
(
out_grad
,
input
,
target
,
weight
,
ctx
->
ignore_index
));
}
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"nll"
,
NLLGradFunction
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/normalization.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
NormalizationGradCaptureState
:
public
AutoGradCaptureState
{
int32_t
axis
;
float
epsilon
;
bool
track_running_stats
;
bool
is_training
;
bool
x_requires_grad
;
bool
gamma_requires_grad
;
bool
beta_requires_grad
;
};
// training:
// y, mean, inv_variance = normalization(x, moving_mean, moving_variance, gamma, beta,
// axis=1, epsilon=0.01, momentum=0.9)
// y, mean, inv_variance = normalization(x, gamma, beta, axis=1, epsilon=0.01, momentum=0.9)
// inference:
// y = normalization(x, moving_mean, moving_variance, gamma, beta, axis=1, epsilon=0.01,
// momentum=0.9)
class
NormalizationGrad
:
public
OpExprGradFunction
<
NormalizationGradCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
NormalizationGradCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
// input_size may be 3 or 5, as inputs may be
// (x, gamma, beta) or (x, moving_mean, moving_variance, gamma, beta)
// ref to track_running_stats false/true
// output_size may be 1 or 3, as outputs may be
// (x, ) or (x, mean, inv_variance)
// ref to is_training false/true
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
std
::
shared_ptr
<
Tensor
>
gamma
,
beta
;
if
(
inputs
.
size
()
==
3
)
{
gamma
=
inputs
.
at
(
1
);
beta
=
inputs
.
at
(
2
);
ctx
->
track_running_stats
=
false
;
}
else
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
5
);
// NOLINT(maybe-need-error-msg)
gamma
=
inputs
.
at
(
3
);
beta
=
inputs
.
at
(
4
);
ctx
->
track_running_stats
=
true
;
}
ctx
->
gamma_requires_grad
=
gamma
->
requires_grad
();
ctx
->
beta_requires_grad
=
beta
->
requires_grad
();
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"axis"
));
ctx
->
epsilon
=
JUST
(
composed_attrs
.
GetAttr
<
float
>
(
"epsilon"
));
ctx
->
is_training
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"training"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
// x
ctx
->
SaveTensorForBackward
(
gamma
);
// gamma
if
(
ctx
->
is_training
||
!
ctx
->
track_running_stats
)
{
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
// mean
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
2
));
// inv_variance
}
else
{
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// moving_mean
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
2
));
// moving_variance
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
NormalizationGradCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
const
auto
&
x
=
ctx
->
SavedTensors
().
at
(
0
);
// x
const
auto
&
gamma
=
ctx
->
SavedTensors
().
at
(
1
);
// gamma
const
auto
&
y_grad
=
out_grads
.
at
(
0
);
std
::
shared_ptr
<
Tensor
>
mean
,
inv_variance
;
if
(
ctx
->
is_training
||
!
ctx
->
track_running_stats
)
{
mean
=
ctx
->
SavedTensors
().
at
(
2
);
// mean
inv_variance
=
ctx
->
SavedTensors
().
at
(
3
);
// inv_variance
}
else
{
const
auto
&
moving_mean
=
ctx
->
SavedTensors
().
at
(
2
);
// moving_mean
const
auto
&
moving_variance
=
ctx
->
SavedTensors
().
at
(
3
);
// moving_variance
const
auto
&
add_eps
=
JUST
(
functional
::
ScalarAdd
(
moving_variance
,
ctx
->
epsilon
,
/*alpha=*/
1
,
/*inplace=*/
false
));
mean
=
moving_mean
;
inv_variance
=
JUST
(
functional
::
Rsqrt
(
add_eps
));
}
const
auto
&
results
=
JUST
(
functional
::
NormalizationGrad
(
y_grad
,
x
,
mean
,
inv_variance
,
gamma
,
ctx
->
epsilon
,
ctx
->
axis
));
CHECK_EQ_OR_RETURN
(
results
->
size
(),
3
)
<<
Error
::
RuntimeError
()
<<
"The number of results is expected to be 3, but got "
<<
results
->
size
();
if
(
ctx
->
track_running_stats
)
{
// The normalization op has 5 inputs which are x, moving_mean, moving_variance, gamma and
// beta.
in_grads
->
resize
(
5
);
if
(
ctx
->
gamma_requires_grad
)
{
in_grads
->
at
(
3
)
=
results
->
at
(
1
);
// gamma_diff;
}
if
(
ctx
->
beta_requires_grad
)
{
in_grads
->
at
(
4
)
=
results
->
at
(
2
);
// beta_diff
}
}
else
{
// The normalization op has 3 inputs which are x, gamma and beta.
in_grads
->
resize
(
3
);
if
(
ctx
->
gamma_requires_grad
)
{
in_grads
->
at
(
1
)
=
results
->
at
(
1
);
// gamma_diff;
}
if
(
ctx
->
beta_requires_grad
)
{
in_grads
->
at
(
2
)
=
results
->
at
(
2
);
// beta_diff
}
}
if
(
!
ctx
->
x_requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
if
(
ctx
->
is_training
)
{
in_grads
->
at
(
0
)
=
results
->
at
(
0
);
return
Maybe
<
void
>::
Ok
();
}
Shape
shape
;
for
(
int
i
=
0
;
i
<
x
->
shape
()
->
NumAxes
();
++
i
)
{
if
(
i
!=
ctx
->
axis
)
{
shape
.
emplace_back
(
1
);
}
else
{
shape
.
emplace_back
(
x
->
shape
()
->
At
(
ctx
->
axis
));
}
}
const
auto
&
reshaped_gamma
=
JUST
(
functional
::
Reshape
(
gamma
,
shape
));
const
auto
&
reshaped_inv_variance
=
JUST
(
functional
::
Reshape
(
inv_variance
,
shape
));
std
::
shared_ptr
<
Tensor
>
y_grad_fp32
=
y_grad
;
bool
is_fp16
=
y_grad
->
dtype
()
->
data_type
()
==
DataType
::
kFloat16
;
if
(
is_fp16
)
{
y_grad_fp32
=
JUST
(
functional
::
Cast
(
y_grad
,
DType
::
Float
(),
/*pin_memory=*/
false
));
}
const
auto
&
dy_mul_gamma
=
JUST
(
functional
::
Mul
(
reshaped_gamma
,
y_grad_fp32
));
const
auto
&
dy_mul_inv_var
=
JUST
(
functional
::
Mul
(
dy_mul_gamma
,
reshaped_inv_variance
));
if
(
is_fp16
)
{
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
Cast
(
dy_mul_inv_var
,
DType
::
Float16
(),
/*pin_memory=*/
false
));
}
else
{
(
*
in_grads
)[
0
]
=
dy_mul_inv_var
;
}
return
Maybe
<
void
>::
Ok
();
}
private:
AttrMap
base_attrs_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"normalization"
,
NormalizationGrad
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/normalization_add_relu.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/dtype.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
NormalizationAddReluGradCaptureState
:
public
AutoGradCaptureState
{
int32_t
axis
=
1
;
float
epsilon
=
1e-5
;
bool
track_running_stats
=
true
;
bool
is_training
=
true
;
bool
has_addend
=
false
;
bool
x_requires_grad
=
true
;
bool
addend_requires_grad
=
true
;
bool
gamma_requires_grad
=
true
;
bool
beta_requires_grad
=
true
;
};
// training:
// y, mean, inv_variance = normalization_add_relu(x, Optional(add_end), moving_mean,
// moving_variance, gamma, beta, axis=1, epsilon=0.01, momentum=0.9) y, mean, inv_variance =
// normalization_add_relu(x, Optional(add_end), gamma, beta, axis=1, epsilon=0.01, momentum=0.9)
// inference:
// y = normalization_add_relu(x, Optional(add_end), moving_mean, moving_variance, gamma, beta,
// axis=1, epsilon=0.01, momentum=0.9)
class
NormalizationAddReluGrad
:
public
OpExprGradFunction
<
NormalizationAddReluGradCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
NormalizationAddReluGradCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
// input_size may be 3/4/5/6, as inputs may be
// (x, gamma, beta) or (x, moving_mean, moving_variance, gamma, beta)
// (x, addend, gamma, beta) or (x, addend, moving_mean, moving_variance, gamma, beta)
// ref to track_running_stats false/true
// output_size may be 2 or 4, as outputs may be
// (x, reserve_space) or (x, reserve_space, mean, inv_variance)
// ref to is_training false/true
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
std
::
shared_ptr
<
Tensor
>
add_end
,
gamma
,
beta
;
if
(
inputs
.
size
()
==
3
||
inputs
.
size
()
==
5
)
{
add_end
=
nullptr
;
if
(
inputs
.
size
()
==
3
)
{
gamma
=
inputs
.
at
(
1
);
beta
=
inputs
.
at
(
2
);
ctx
->
track_running_stats
=
false
;
}
else
{
gamma
=
inputs
.
at
(
3
);
beta
=
inputs
.
at
(
4
);
ctx
->
track_running_stats
=
true
;
}
ctx
->
has_addend
=
false
;
}
else
if
(
inputs
.
size
()
==
4
||
inputs
.
size
()
==
6
)
{
add_end
=
inputs
.
at
(
1
);
if
(
inputs
.
size
()
==
4
)
{
gamma
=
inputs
.
at
(
2
);
beta
=
inputs
.
at
(
3
);
ctx
->
track_running_stats
=
false
;
}
else
{
gamma
=
inputs
.
at
(
4
);
beta
=
inputs
.
at
(
5
);
ctx
->
track_running_stats
=
true
;
}
ctx
->
has_addend
=
true
;
ctx
->
addend_requires_grad
=
inputs
.
at
(
1
)
->
requires_grad
();
}
ctx
->
gamma_requires_grad
=
gamma
->
requires_grad
();
ctx
->
beta_requires_grad
=
beta
->
requires_grad
();
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"axis"
));
ctx
->
epsilon
=
JUST
(
composed_attrs
.
GetAttr
<
float
>
(
"epsilon"
));
ctx
->
is_training
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"training"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
// x 0
ctx
->
SaveTensorForBackward
(
gamma
);
// gamma 1
ctx
->
SaveTensorForBackward
(
beta
);
// beta 2
if
(
ctx
->
is_training
||
!
ctx
->
track_running_stats
)
{
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
2
));
// mean 3
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
3
));
// inv_variance 4
}
else
{
if
(
inputs
.
size
()
==
5
)
{
// without add_end
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// moving_mean 3
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
2
));
// moving_variance 4
}
else
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
6
);
// NOLINT(maybe-need-error-msg)
// with add_end
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
2
));
// moving_mean 3
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
3
));
// moving_variance 4
}
}
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
// y 5
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
// reserve space 6
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
NormalizationAddReluGradCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
const
auto
&
x
=
ctx
->
SavedTensors
().
at
(
0
);
// x
const
auto
&
gamma
=
ctx
->
SavedTensors
().
at
(
1
);
// gamma
const
auto
&
beta
=
ctx
->
SavedTensors
().
at
(
2
);
// beta
const
auto
&
y_grad
=
out_grads
.
at
(
0
);
std
::
shared_ptr
<
Tensor
>
mean
,
inv_variance
;
if
(
ctx
->
is_training
||
!
ctx
->
track_running_stats
)
{
mean
=
ctx
->
SavedTensors
().
at
(
3
);
// mean
inv_variance
=
ctx
->
SavedTensors
().
at
(
4
);
// inv_variance
}
else
{
const
auto
&
moving_mean
=
ctx
->
SavedTensors
().
at
(
3
);
// moving_mean
const
auto
&
moving_variance
=
ctx
->
SavedTensors
().
at
(
4
);
// moving_variance
const
auto
&
add_eps
=
JUST
(
functional
::
ScalarAdd
(
moving_variance
,
ctx
->
epsilon
,
/*alpha=*/
1
,
/*inplace=*/
false
));
mean
=
moving_mean
;
inv_variance
=
JUST
(
functional
::
Rsqrt
(
add_eps
));
}
const
auto
&
y
=
ctx
->
SavedTensors
().
at
(
5
);
const
auto
&
reserve_space
=
ctx
->
SavedTensors
().
at
(
6
);
const
auto
&
results
=
JUST
(
functional
::
NormalizationAddReluGrad
(
x
,
y_grad
,
mean
,
inv_variance
,
gamma
,
beta
,
reserve_space
,
y
,
ctx
->
axis
,
ctx
->
epsilon
,
ctx
->
has_addend
));
CHECK_EQ_OR_RETURN
(
results
->
size
(),
(
ctx
->
has_addend
?
4
:
3
))
<<
Error
::
RuntimeError
()
<<
"The number of results is expected to be "
<<
(
ctx
->
has_addend
?
4
:
3
)
<<
", but got "
<<
results
->
size
();
// here output includes "gamma_diff" "beta_diff" "dx" "addend_diff"
if
(
ctx
->
track_running_stats
)
{
// The normalization op has 5 inputs which are x, moving_mean, moving_variance, gamma and
// beta. or 6 inputs: x, add_end, moving_mean, moving_variance, gamma and beta.
if
(
ctx
->
has_addend
)
{
in_grads
->
resize
(
6
);
if
(
ctx
->
gamma_requires_grad
)
{
in_grads
->
at
(
4
)
=
results
->
at
(
1
);
// gamma_diff;
}
if
(
ctx
->
beta_requires_grad
)
{
in_grads
->
at
(
5
)
=
results
->
at
(
2
);
// beta_diff
}
if
(
ctx
->
addend_requires_grad
)
{
in_grads
->
at
(
1
)
=
results
->
at
(
3
);
// add_end_diff
}
}
else
{
in_grads
->
resize
(
5
);
if
(
ctx
->
gamma_requires_grad
)
{
in_grads
->
at
(
3
)
=
results
->
at
(
1
);
// gamma_diff;
}
if
(
ctx
->
beta_requires_grad
)
{
in_grads
->
at
(
4
)
=
results
->
at
(
2
);
// beta_diff
}
}
}
else
{
// The normalization op has 3 inputs which are x, addend, gamma and beta.
// or has 4 inputs which are x, addend, gamma and beta.
if
(
ctx
->
has_addend
)
{
in_grads
->
resize
(
4
);
if
(
ctx
->
addend_requires_grad
)
{
in_grads
->
at
(
1
)
=
results
->
at
(
3
);
// addend_diff
}
if
(
ctx
->
gamma_requires_grad
)
{
in_grads
->
at
(
2
)
=
results
->
at
(
1
);
// gamma_diff;
}
if
(
ctx
->
beta_requires_grad
)
{
in_grads
->
at
(
3
)
=
results
->
at
(
2
);
// beta_diff
}
}
else
{
in_grads
->
resize
(
3
);
if
(
ctx
->
gamma_requires_grad
)
{
in_grads
->
at
(
1
)
=
results
->
at
(
1
);
// gamma_diff;
}
if
(
ctx
->
beta_requires_grad
)
{
in_grads
->
at
(
2
)
=
results
->
at
(
2
);
// beta_diff
}
}
}
if
(
!
ctx
->
x_requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
if
(
ctx
->
is_training
)
{
in_grads
->
at
(
0
)
=
results
->
at
(
0
);
return
Maybe
<
void
>::
Ok
();
}
// todo(zzk): add eval mode.
return
Maybe
<
void
>::
Ok
();
}
private:
AttrMap
base_attrs_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"normalization_add_relu"
,
NormalizationAddReluGrad
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/padding.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
struct
Pad2dCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
;
std
::
vector
<
int64_t
>
paddings
;
};
class
Pad2d
:
public
OpExprGradFunction
<
Pad2dCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
Pad2dCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
JUST
(
VectorAt
(
inputs
,
0
))
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
paddings
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int64_t
>>
(
"padding"
));
return
Maybe
<
void
>::
Ok
();
}
private:
AttrMap
base_attrs_
;
};
class
ReflectionPad2d
:
public
Pad2d
{
public:
Maybe
<
void
>
Apply
(
const
Pad2dCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
if
(
ctx
->
requires_grad
)
{
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
PadGrad
(
JUST
(
VectorAt
(
out_grads
,
0
)),
ctx
->
paddings
,
"reflect"
,
0
));
}
return
Maybe
<
void
>::
Ok
();
}
};
class
ReplicationPad2d
:
public
Pad2d
{
public:
Maybe
<
void
>
Apply
(
const
Pad2dCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
if
(
ctx
->
requires_grad
)
{
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
PadGrad
(
JUST
(
VectorAt
(
out_grads
,
0
)),
ctx
->
paddings
,
"replicate"
,
0
));
}
return
Maybe
<
void
>::
Ok
();
}
};
struct
ConstantPadNdCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
;
std
::
vector
<
int64_t
>
paddings
;
};
class
ConstantPadNd
:
public
OpExprGradFunction
<
ConstantPadNdCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
ConstantPadNdCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
std
::
shared_ptr
<
Tensor
>&
input_0
=
JUST
(
VectorAt
(
inputs
,
0
));
ctx
->
requires_grad
=
input_0
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
paddings
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int64_t
>>
(
"padding"
));
for
(
int
i
=
0
;
i
<
ctx
->
paddings
.
size
();
i
++
)
{
ctx
->
paddings
[
i
]
=
-
ctx
->
paddings
[
i
];
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
ConstantPadNdCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
if
(
ctx
->
requires_grad
)
{
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
Pad
(
JUST
(
VectorAt
(
out_grads
,
0
)),
ctx
->
paddings
,
"constant"
,
Scalar
(
0
)));
}
return
Maybe
<
void
>::
Ok
();
}
private:
AttrMap
base_attrs_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"pad"
,
ConstantPadNd
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reflection_pad2d"
,
ReflectionPad2d
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"replication_pad2d"
,
ReplicationPad2d
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/partial_fc_sample.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
PartialFCSampleState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
int32_t
index_sampled_label
=
-
1
;
int32_t
index_weight
=
-
1
;
};
class
PartialFCSample
:
public
OpExprGradFunction
<
PartialFCSampleState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
PartialFCSampleState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
PartialFCSampleState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
PartialFCSample
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
PartialFCSample
::
Capture
(
PartialFCSampleState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ctx
->
index_sampled_label
=
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
// sampled_label
ctx
->
index_weight
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
PartialFCSample
::
Apply
(
const
PartialFCSampleState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
3
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
2
);
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
const
auto
&
diff_sampled_weight
=
out_grads
.
at
(
2
);
// diff of sampled_weight
const
auto
&
sampled_tensor
=
ctx
->
SavedTensors
().
at
(
ctx
->
index_sampled_label
);
const
auto
&
weight
=
ctx
->
SavedTensors
().
at
(
ctx
->
index_weight
);
const
auto
&
out_tensors_of_op0
=
JUST
(
functional
::
DistributedPariticalFCSampleDisableBoxing
(
diff_sampled_weight
,
sampled_tensor
));
const
auto
&
out_tensors_of_op1
=
JUST
(
functional
::
UnsortedSegmentSumLike
(
out_tensors_of_op0
->
at
(
0
),
out_tensors_of_op0
->
at
(
1
),
weight
,
0
));
in_grads
->
at
(
0
)
=
out_tensors_of_op1
;
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"distributed_partial_fc_sample"
,
PartialFCSample
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/reduce_ops.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/functional/sequence_function.h"
namespace
oneflow
{
namespace
one
{
struct
ReduceSumCaptureState
:
public
AutoGradCaptureState
{
std
::
vector
<
int32_t
>
axis
;
};
class
ReduceSum
:
public
OpExprGradFunction
<
ReduceSumCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
ReduceSumCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
ReduceSumCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
ReduceSum
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceSum
::
Capture
(
ReduceSumCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"axis"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceSum
::
Apply
(
const
ReduceSumCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
dy
=
out_grads
.
at
(
0
);
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
BroadcastLike
(
dy
,
input
,
ctx
->
axis
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_sum"
,
ReduceSum
);
struct
ReduceProdOpInterpState
:
public
AutoGradCaptureState
{
std
::
vector
<
int32_t
>
axis
;
bool
requires_grad
;
};
class
ReduceProdOp
:
public
OpExprGradFunction
<
ReduceProdOpInterpState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
ReduceProdOpInterpState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
ReduceProdOpInterpState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
ReduceProdOp
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceProdOp
::
Capture
(
ReduceProdOpInterpState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"axis"
));
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceProdOp
::
Apply
(
const
ReduceProdOpInterpState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
output
=
ctx
->
SavedTensors
().
at
(
1
);
const
auto
&
dy
=
out_grads
.
at
(
0
);
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
SequenceFunction
<
Maybe
<
Tensor
>
()
>
([
&
]()
{
return
functional
::
Mul
(
dy
,
output
);
})
.
then
(
std
::
bind
(
functional
::
BroadcastLike
,
std
::
placeholders
::
_1
,
input
,
ctx
->
axis
))
.
then
(
std
::
bind
(
functional
::
Div
,
std
::
placeholders
::
_1
,
input
))
.
call
());
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_prod"
,
ReduceProdOp
);
struct
ReduceMaxOrMinCaptureState
:
public
AutoGradCaptureState
{
std
::
vector
<
int32_t
>
axis
;
bool
keepdims
;
};
class
ReduceMaxOrMin
:
public
OpExprGradFunction
<
ReduceMaxOrMinCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
ReduceMaxOrMinCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
ReduceMaxOrMinCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
ReduceMaxOrMin
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceMaxOrMin
::
Capture
(
ReduceMaxOrMinCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"axis"
));
ctx
->
keepdims
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"keepdims"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceMaxOrMin
::
Apply
(
const
ReduceMaxOrMinCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
output
=
ctx
->
SavedTensors
().
at
(
1
);
const
auto
&
dy
=
out_grads
.
at
(
0
);
const
auto
cast_like
=
JUST
(
functional
::
SequenceFunction
<
Maybe
<
Tensor
>
()
>
(
[
&
]()
{
return
functional
::
BroadcastLike
(
output
,
input
,
ctx
->
axis
);
})
.
then
(
std
::
bind
(
functional
::
BroadcastEqual
,
input
,
std
::
placeholders
::
_1
))
.
then
(
std
::
bind
(
functional
::
CastLike
,
std
::
placeholders
::
_1
,
input
))
.
call
());
const
auto
&
bcast_like_div
=
JUST
(
functional
::
SequenceFunction
<
Maybe
<
Tensor
>
()
>
(
[
&
]()
{
return
functional
::
ReduceSum
(
cast_like
,
ctx
->
axis
,
ctx
->
keepdims
);
})
.
then
(
std
::
bind
(
functional
::
Div
,
dy
,
std
::
placeholders
::
_1
))
.
then
(
std
::
bind
(
functional
::
BroadcastLike
,
std
::
placeholders
::
_1
,
input
,
ctx
->
axis
))
.
call
());
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
Mul
(
bcast_like_div
,
cast_like
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_min"
,
ReduceMaxOrMin
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_max"
,
ReduceMaxOrMin
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/reshape.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
ReshapeCaptureState
:
public
AutoGradCaptureState
{
DimVector
input_shape_vec
;
};
class
ReshapeOpExprGrad
:
public
OpExprGradFunction
<
ReshapeCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
ReshapeCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
input_shape_vec
=
inputs
.
at
(
0
)
->
shape
()
->
dim_vec
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
ReshapeCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
in_grads
->
resize
(
1
);
Shape
shape
(
ctx
->
input_shape_vec
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
Reshape
(
out_grads
.
at
(
0
),
shape
));
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reshape"
,
ReshapeOpExprGrad
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/roi_align.cpp
0 → 100644
View file @
21d47d0e
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
RoiAlignCaptureState
:
public
AutoGradCaptureState
{
float
spatial_scale
=
1.0
;
int32_t
pooled_h
=
0
;
int32_t
pooled_w
=
0
;
int32_t
sampling_ratio
=
-
1
;
bool
aligned
=
false
;
bool
requires_grad
=
false
;
};
class
RoiAlign
:
public
OpExprGradFunction
<
RoiAlignCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
RoiAlignCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
spatial_scale
=
JUST
(
composed_attrs
.
GetAttr
<
float
>
(
"spatial_scale"
));
ctx
->
pooled_h
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"pooled_h"
));
ctx
->
pooled_w
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"pooled_w"
));
ctx
->
sampling_ratio
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"sampling_ratio"
));
ctx
->
aligned
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"aligned"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
RoiAlignCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
const
auto
&
x_like
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
rois
=
ctx
->
SavedTensors
().
at
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
RoiAlignGrad
(
out_grads
.
at
(
0
),
x_like
,
rois
,
ctx
->
spatial_scale
,
ctx
->
pooled_h
,
ctx
->
pooled_w
,
ctx
->
sampling_ratio
,
ctx
->
aligned
));
return
Maybe
<
void
>::
Ok
();
}
private:
AttrMap
base_attrs_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"roi_align"
,
RoiAlign
);
}
// namespace one
}
// namespace oneflow
Prev
1
…
16
17
18
19
20
21
22
23
24
…
28
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment