Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Oneflow
Commits
a715222c
Commit
a715222c
authored
Feb 28, 2023
by
yuguo
Browse files
0.9.1-rocm
parent
f262efc9
Changes
469
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1207 additions
and
79 deletions
+1207
-79
oneflow/core/autograd/gradient_funcs/global_to_global.cpp
oneflow/core/autograd/gradient_funcs/global_to_global.cpp
+81
-0
oneflow/core/autograd/gradient_funcs/gradient_accumulation.cpp
...ow/core/autograd/gradient_funcs/gradient_accumulation.cpp
+184
-0
oneflow/core/autograd/gradient_funcs/graph_feed_and_fetch.cpp
...low/core/autograd/gradient_funcs/graph_feed_and_fetch.cpp
+49
-0
oneflow/core/autograd/gradient_funcs/group_norm.cpp
oneflow/core/autograd/gradient_funcs/group_norm.cpp
+122
-0
oneflow/core/autograd/gradient_funcs/identity.cpp
oneflow/core/autograd/gradient_funcs/identity.cpp
+10
-1
oneflow/core/autograd/gradient_funcs/inv.cpp
oneflow/core/autograd/gradient_funcs/inv.cpp
+51
-0
oneflow/core/autograd/gradient_funcs/kl_div.cpp
oneflow/core/autograd/gradient_funcs/kl_div.cpp
+19
-12
oneflow/core/autograd/gradient_funcs/layer_norm.cpp
oneflow/core/autograd/gradient_funcs/layer_norm.cpp
+4
-4
oneflow/core/autograd/gradient_funcs/linalg_cross.cpp
oneflow/core/autograd/gradient_funcs/linalg_cross.cpp
+80
-0
oneflow/core/autograd/gradient_funcs/log_softmax.cpp
oneflow/core/autograd/gradient_funcs/log_softmax.cpp
+3
-19
oneflow/core/autograd/gradient_funcs/math_unary_op.cpp
oneflow/core/autograd/gradient_funcs/math_unary_op.cpp
+87
-11
oneflow/core/autograd/gradient_funcs/matmul.cpp
oneflow/core/autograd/gradient_funcs/matmul.cpp
+170
-15
oneflow/core/autograd/gradient_funcs/matrix_vector_product.cpp
...ow/core/autograd/gradient_funcs/matrix_vector_product.cpp
+94
-0
oneflow/core/autograd/gradient_funcs/max_unpool.cpp
oneflow/core/autograd/gradient_funcs/max_unpool.cpp
+89
-0
oneflow/core/autograd/gradient_funcs/median.cpp
oneflow/core/autograd/gradient_funcs/median.cpp
+4
-4
oneflow/core/autograd/gradient_funcs/narrow.cpp
oneflow/core/autograd/gradient_funcs/narrow.cpp
+2
-2
oneflow/core/autograd/gradient_funcs/one_embedding_fused_lookup.cpp
...re/autograd/gradient_funcs/one_embedding_fused_lookup.cpp
+68
-0
oneflow/core/autograd/gradient_funcs/padding.cpp
oneflow/core/autograd/gradient_funcs/padding.cpp
+13
-11
oneflow/core/autograd/gradient_funcs/reduce_ops.cpp
oneflow/core/autograd/gradient_funcs/reduce_ops.cpp
+1
-0
oneflow/core/autograd/gradient_funcs/reduce_sum_like.cpp
oneflow/core/autograd/gradient_funcs/reduce_sum_like.cpp
+76
-0
No files found.
Too many changes to show.
To preserve performance only
469 of 469+
files are displayed.
Plain diff
Email patch
oneflow/core/autograd/gradient_funcs/global_to_global.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/id_util.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/nd_sbp.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/optional.h"
namespace
oneflow
{
namespace
one
{
struct
GlobalToGlobalState
:
public
AutoGradCaptureState
{
Symbol
<
ParallelDesc
>
parallel_desc
;
Symbol
<
NdSbp
>
nd_sbp
;
};
class
GlobalToGlobalGradFunction
:
public
OpExprGradFunction
<
GlobalToGlobalState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
GlobalToGlobalOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
grad_nd_sbp_
=
fw_op_expr
->
grad_nd_sbp
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
GlobalToGlobalState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
OpExprInterpContext
&
interp_ctx
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
parallel_desc
=
JUST
(
inputs
.
at
(
0
)
->
parallel_desc
());
ctx
->
nd_sbp
=
JUST
(
inputs
.
at
(
0
)
->
nd_sbp
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
GlobalToGlobalState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
auto
&
out_grad
=
out_grads
.
at
(
0
);
CHECK_OR_RETURN
(
out_grad
->
is_global
())
<<
Error
::
RuntimeError
()
<<
"Expected global tensor for global_to_global but got local tensor"
;
in_grads
->
resize
(
1
);
const
auto
&
grad_nd_sbp
=
grad_nd_sbp_
.
value_or
(
JUST
(
out_grad
->
nd_sbp
()));
const
auto
&
grad_sbp_list
=
JUST
(
GetSbpList
(
grad_nd_sbp
));
if
(
LazyMode
::
is_enabled
())
{
(
*
in_grads
)[
0
]
=
JUST
(
one
::
functional
::
ToGlobal
(
out_grad
,
ctx
->
parallel_desc
,
*
grad_sbp_list
,
{},
/* check_meta */
false
,
/*copy=*/
false
));
}
else
{
const
auto
&
grad_grad_sbp_list
=
JUST
(
GetSbpList
(
ctx
->
nd_sbp
));
(
*
in_grads
)[
0
]
=
JUST
(
one
::
functional
::
ToGlobal
(
out_grad
,
ctx
->
parallel_desc
,
*
grad_sbp_list
,
*
grad_grad_sbp_list
,
/* check_meta */
false
,
/*copy=*/
false
));
}
return
Maybe
<
void
>::
Ok
();
}
private:
Optional
<
Symbol
<
NdSbp
>>
grad_nd_sbp_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"global_to_global"
,
GlobalToGlobalGradFunction
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/gradient_accumulation.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
GradAccRepeatCaptureState
:
public
AutoGradCaptureState
{
int32_t
repeat_num
=
1
;
};
class
GradAccRepeat
:
public
OpExprGradFunction
<
GradAccRepeatCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
GradAccRepeatCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
GradAccRepeatCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
GradAccRepeat
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccRepeat
::
Capture
(
GradAccRepeatCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
repeat_num
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"repeat_num"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccRepeat
::
Apply
(
const
GradAccRepeatCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
GradAccCollect
(
out_grads
[
0
],
ctx
->
repeat_num
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"repeat"
,
GradAccRepeat
);
struct
GradAccCollectCaptureState
:
public
AutoGradCaptureState
{
int32_t
max_acc_num
=
1
;
};
class
GradAccCollect
:
public
OpExprGradFunction
<
GradAccCollectCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
GradAccCollectCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
GradAccCollectCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
GradAccCollect
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccCollect
::
Capture
(
GradAccCollectCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
max_acc_num
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"max_acc_num"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccCollect
::
Apply
(
const
GradAccCollectCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
GradAccRepeat
(
out_grads
[
0
],
ctx
->
max_acc_num
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"acc"
,
GradAccCollect
);
struct
GradAccPackCaptureState
:
public
AutoGradCaptureState
{
int32_t
pack_num
=
1
;
};
class
GradAccPack
:
public
OpExprGradFunction
<
GradAccPackCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
GradAccPackCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
GradAccPackCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
GradAccPack
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccPack
::
Capture
(
GradAccPackCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
pack_num
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"pack_num"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccPack
::
Apply
(
const
GradAccPackCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
GradAccUnpack
(
out_grads
[
0
],
ctx
->
pack_num
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"pack"
,
GradAccPack
);
struct
GradAccUnpackCaptureState
:
public
AutoGradCaptureState
{
int32_t
unpack_num
=
1
;
};
class
GradAccUnpack
:
public
OpExprGradFunction
<
GradAccUnpackCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
GradAccUnpackCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
GradAccUnpackCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
GradAccUnpack
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccUnpack
::
Capture
(
GradAccUnpackCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
unpack_num
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"unpack_num"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GradAccUnpack
::
Apply
(
const
GradAccUnpackCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
GradAccPack
(
out_grads
[
0
],
ctx
->
unpack_num
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"unpack"
,
GradAccUnpack
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/graph_feed_and_fetch.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/job/lazy_mode.h"
namespace
oneflow
{
namespace
one
{
struct
GraphFeedAndFetchCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
};
class
GraphFeedAndFetch
:
public
OpExprGradFunction
<
GraphFeedAndFetchCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
GraphFeedAndFetchCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
GraphFeedAndFetchCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
if
(
ctx
->
requires_grad
)
{
in_grads
->
at
(
0
)
=
out_grads
.
at
(
0
);
}
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"graph_feed_and_fetch"
,
GraphFeedAndFetch
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/group_norm.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
namespace
oneflow
{
namespace
one
{
struct
GroupNormCaptureState
:
public
AutoGradCaptureState
{
double
epsilon
=
1e-5
;
bool
x_requires_grad
=
true
;
bool
affine
=
true
;
int32_t
num_groups
=
1
;
size_t
x_index
=
0
;
size_t
mean_index
=
1
;
size_t
inv_variance_index
=
2
;
size_t
gamma_index
=
3
;
std
::
string
data_format
;
std
::
string
activation
;
};
class
GroupNorm
:
public
OpExprGradFunction
<
GroupNormCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
GroupNormCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
GroupNormCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
std
::
string
op_name_
;
};
Maybe
<
void
>
GroupNorm
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
op_name_
=
fw_op_expr
->
op_name
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GroupNorm
::
Capture
(
GroupNormCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
affine
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"affine"
));
ctx
->
epsilon
=
JUST
(
composed_attrs
.
GetAttr
<
double
>
(
"epsilon"
));
ctx
->
num_groups
=
JUST
(
composed_attrs
.
GetAttr
<
int32_t
>
(
"num_groups"
));
ctx
->
data_format
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
string
>
(
"data_format"
));
ctx
->
activation
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
string
>
(
"activation"
));
if
(
ctx
->
affine
)
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
3
);
// NOLINT(maybe-need-error-msg)
}
else
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
}
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
3
);
// NOLINT(maybe-need-error-msg)
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
ctx
->
x_requires_grad
||
ctx
->
affine
)
{
ctx
->
x_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
mean_index
=
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
1
));
ctx
->
inv_variance_index
=
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
2
));
if
(
ctx
->
affine
)
{
ctx
->
gamma_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// save gamma.
}
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
GroupNorm
::
Apply
(
const
GroupNormCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
CHECK_EQ_OR_RETURN
(
ctx
->
data_format
,
"channels_first"
);
CHECK_EQ_OR_RETURN
(
ctx
->
activation
,
"none"
);
const
auto
&
saved_tensors
=
ctx
->
SavedTensors
();
if
(
ctx
->
affine
)
{
in_grads
->
resize
(
3
);
}
else
{
in_grads
->
resize
(
1
);
}
const
auto
&
dy
=
out_grads
.
at
(
0
);
const
auto
&
x
=
saved_tensors
.
at
(
ctx
->
x_index
);
const
auto
&
mean
=
saved_tensors
.
at
(
ctx
->
mean_index
);
const
auto
&
inv_variance
=
saved_tensors
.
at
(
ctx
->
inv_variance_index
);
if
(
ctx
->
affine
)
{
const
auto
&
results
=
JUST
(
functional
::
GroupNormParamGrad
(
dy
,
x
,
mean
,
inv_variance
));
in_grads
->
at
(
1
)
=
results
->
at
(
0
);
// For gamma.
in_grads
->
at
(
2
)
=
results
->
at
(
1
);
// For beta.
}
if
(
ctx
->
x_requires_grad
)
{
if
(
ctx
->
affine
)
{
std
::
shared_ptr
<
Tensor
>
gamma
=
saved_tensors
.
at
(
ctx
->
gamma_index
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
GroupNormGrad
(
dy
,
x
,
mean
,
inv_variance
,
gamma
,
ctx
->
num_groups
,
ctx
->
epsilon
));
}
else
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
GroupNormGrad
(
dy
,
x
,
mean
,
inv_variance
,
NullOpt
,
ctx
->
num_groups
,
ctx
->
epsilon
));
}
}
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"group_norm"
,
GroupNorm
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/identity.cpp
View file @
a715222c
...
...
@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/job/lazy_mode.h"
namespace
oneflow
{
namespace
one
{
...
...
@@ -37,7 +38,15 @@ class Identity : public OpExprGradFunction<IdentityCaptureState> {
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
if
(
ctx
->
requires_grad
)
{
in_grads
->
at
(
0
)
=
out_grads
.
at
(
0
);
}
if
(
ctx
->
requires_grad
)
{
if
(
LazyMode
::
is_enabled
())
{
// requires an intermediate node to avoid redundant memory copy or commnet
// communication in lazy mode
in_grads
->
at
(
0
)
=
JUST
(
functional
::
Identity
(
out_grads
.
at
(
0
)));
}
else
{
in_grads
->
at
(
0
)
=
out_grads
.
at
(
0
);
}
}
return
Maybe
<
void
>::
Ok
();
}
};
...
...
oneflow/core/autograd/gradient_funcs/inv.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
struct
InvCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
};
class
Inv
:
public
OpExprGradFunction
<
InvCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
InvCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
requires_grad
=
JUST
(
VectorAt
(
inputs
,
0
))
->
requires_grad
();
if
(
ctx
->
requires_grad
)
{
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
outputs
,
0
)));
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
InvCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
ctx
->
requires_grad
)
{
const
auto
&
output
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
0
));
const
auto
&
dy
=
JUST
(
VectorAt
(
out_grads
,
0
));
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
Negative
(
JUST
(
functional
::
MatMul
(
output
,
JUST
(
functional
::
MatMul
(
dy
,
output
,
false
,
true
,
1.0
)),
true
,
false
,
1.0
))));
}
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"inv"
,
Inv
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/kl_div.cpp
View file @
a715222c
...
...
@@ -20,7 +20,8 @@ namespace oneflow {
namespace
one
{
struct
KLDivLossCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
bool
input_requires_grad
=
false
;
bool
target_requires_grad
=
false
;
bool
log_target
=
false
;
};
...
...
@@ -44,25 +45,31 @@ Maybe<void> KLDivLoss::Init(const OpExpr& op) {
}
Maybe
<
void
>
KLDivLoss
::
Capture
(
KLDivLossCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
ctx
->
input_requires_grad
=
inputs
[
0
]
->
requires_grad
();
ctx
->
target_requires_grad
=
inputs
[
1
]
->
requires_grad
();
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
log_target
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"log_target"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
)
);
// input
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
)
);
// target
ctx
->
SaveTensorForBackward
(
inputs
[
0
]
);
// input
ctx
->
SaveTensorForBackward
(
inputs
[
1
]
);
// target
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
KLDivLoss
::
Apply
(
const
KLDivLossCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
ctx
->
SavedTensors
().
size
(),
2
);
// NOLINT(maybe-need-error-msg)
const
auto
&
dy
=
out_grads
[
0
];
const
auto
&
input
=
ctx
->
SavedTensors
()[
0
];
const
auto
&
target
=
ctx
->
SavedTensors
()[
1
];
in_grads
->
resize
(
2
);
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
auto
&
dy
=
out_grads
.
at
(
0
);
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
target
=
ctx
->
SavedTensors
().
at
(
1
);
in_grads
->
resize
(
ctx
->
SavedTensors
().
size
(
));
in_grads
->
at
(
0
)
=
JUST
(
functional
::
KLDivLossGrad
(
dy
,
input
,
target
,
ctx
->
log_target
));
if
(
ctx
->
input_requires_grad
)
{
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
KLDivLossGrad
(
dy
,
input
,
target
,
ctx
->
log_target
)
);
}
if
(
ctx
->
target_requires_grad
)
{
(
*
in_grads
)[
1
]
=
JUST
(
functional
::
KLDivLossTargetGrad
(
dy
,
input
,
target
,
ctx
->
log_target
));
}
return
Maybe
<
void
>::
Ok
();
}
...
...
oneflow/core/autograd/gradient_funcs/layer_norm.cpp
View file @
a715222c
...
...
@@ -108,10 +108,10 @@ Maybe<void> LayerNorm::Apply(const LayerNormCaptureState* ctx, const TensorTuple
std
::
shared_ptr
<
Tensor
>
inv_variance
=
saved_tensors
.
at
(
ctx
->
inv_variance_index
);
if
(
ctx
->
has_affine
)
{
// Use LayerNormParamGrad(Tensor dy, Tensor x, Tensor mean, Tensor inv_variance,
Int64
// begin_params_axis
, Double epsilon).
const
auto
&
results
=
JUST
(
functional
::
LayerNormParamGrad
(
dy
,
x
,
mean
,
inv_variance
,
begin_params_axis
,
ctx
->
epsilon
));
// Use LayerNormParamGrad(Tensor dy, Tensor x, Tensor mean, Tensor inv_variance,
//
Int64
begin_params_axis
)
const
auto
&
results
=
JUST
(
functional
::
LayerNormParamGrad
(
dy
,
x
,
mean
,
inv_variance
,
begin_params_axis
));
in_grads
->
at
(
1
)
=
results
->
at
(
0
);
// For gamma.
in_grads
->
at
(
2
)
=
results
->
at
(
1
);
// For beta.
}
...
...
oneflow/core/autograd/gradient_funcs/linalg_cross.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/common/just.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional_api.yaml.h"
namespace
oneflow
{
namespace
one
{
struct
LinalgCrossCaptureState
:
public
AutoGradCaptureState
{
int64_t
dim
=
-
1
;
bool
input_requires_grad
=
false
;
bool
other_requires_grad
=
false
;
};
class
LinalgCross
:
public
OpExprGradFunction
<
LinalgCrossCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
LinalgCrossCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
LinalgCrossCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
LinalgCross
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LinalgCross
::
Capture
(
LinalgCrossCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
input_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
ctx
->
other_requires_grad
=
inputs
.
at
(
1
)
->
requires_grad
();
if
(
ctx
->
input_requires_grad
)
{
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
}
if
(
ctx
->
other_requires_grad
)
{
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
dim
=
JUST
(
composed_attrs
.
GetAttr
<
int64_t
>
(
"dim"
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LinalgCross
::
Apply
(
const
LinalgCrossCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
in_grads
->
resize
(
ctx
->
SavedTensors
().
size
());
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
if
(
ctx
->
input_requires_grad
)
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
LinalgCross
(
ctx
->
SavedTensors
().
at
(
0
),
out_grads
.
at
(
0
),
ctx
->
dim
));
}
if
(
ctx
->
other_requires_grad
)
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
LinalgCross
(
out_grads
.
at
(
0
),
ctx
->
SavedTensors
().
at
(
ctx
->
input_requires_grad
?
1
:
0
),
ctx
->
dim
));
}
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"linalg_cross"
,
LinalgCross
);
}
// namespace one
}
// namespace oneflow
\ No newline at end of file
oneflow/core/autograd/gradient_funcs/log_softmax.cpp
View file @
a715222c
...
...
@@ -38,39 +38,23 @@ class LogSoftmax : public OpExprGradFunction<LogSoftmaxCaptureState> {
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
Maybe
<
void
>
LogSoftmax
::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
const
std
::
string
&
op_name
=
fw_op_expr
->
op_name
();
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
grad_op_
=
JUST
(
one
::
OpBuilder
(
"log_softmax_grad"
,
GradientOpName
(
op_name
))
.
Input
(
"prob"
)
.
Input
(
"dy"
)
.
Output
(
"dx"
)
.
Build
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LogSoftmax
::
Init
(
const
OpExpr
&
op
)
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LogSoftmax
::
Capture
(
LogSoftmaxCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
return
Maybe
<
void
>::
Ok
();
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
LogSoftmax
::
Apply
(
const
LogSoftmaxCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
return
Maybe
<
void
>::
Ok
();
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
auto
&
dy
=
out_grads
.
at
(
0
);
const
auto
&
prob
=
ctx
->
SavedTensors
().
at
(
0
);
const
auto
&
y
=
ctx
->
SavedTensors
().
at
(
0
);
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
OpInterpUtil
::
Dispatch
<
Tensor
>
(
*
grad_op_
,
{
prob
,
dy
}
));
in_grads
->
at
(
0
)
=
JUST
(
functional
::
LogSoftmaxGrad
(
dy
,
y
));
return
Maybe
<
void
>::
Ok
();
}
...
...
oneflow/core/autograd/gradient_funcs/math_unary_op.cpp
View file @
a715222c
...
...
@@ -30,7 +30,7 @@ typedef Maybe<one::Tensor> (*UnaryBwFunc)(const std::shared_ptr<one::Tensor>&,
const
std
::
shared_ptr
<
one
::
Tensor
>&
);
template
<
UnaryBwFunc
BwFunc
>
class
UnaryMathOp
:
public
OpExprGradFunction
<
UnaryMathCaptureState
>
{
class
UnaryMath
BwdWithDyX
Op
:
public
OpExprGradFunction
<
UnaryMathCaptureState
>
{
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
...
...
@@ -52,20 +52,96 @@ class UnaryMathOp : public OpExprGradFunction<UnaryMathCaptureState> {
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
#define INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS(op_type_name, op_cls) \
class op_cls##Cls final : public UnaryMathOp<functional::op_cls##Grad> {}; \
template
<
UnaryBwFunc
BwFunc
>
class
UnaryMathBwdWithDyYOp
:
public
OpExprGradFunction
<
UnaryMathCaptureState
>
{
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
ctx
->
SaveTensorForBackward
(
outputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
ctx
->
x_requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
const
auto
&
y
=
ctx
->
SavedTensors
().
at
(
0
);
in_grads
->
at
(
0
)
=
JUST
(
BwFunc
(
y
,
out_grads
.
at
(
0
)));
return
Maybe
<
void
>::
Ok
();
}
protected:
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
class
UnaryMathBwdWithFillZeroOp
:
public
OpExprGradFunction
<
UnaryMathCaptureState
>
{
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
ctx
->
x_requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
in_grads
->
at
(
0
)
=
JUST
(
functional
::
ZerosLike
(
out_grads
[
0
]));
return
Maybe
<
void
>::
Ok
();
}
protected:
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
#define INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_X_CLASS(op_type_name, op_cls) \
class op_cls##Cls final : public UnaryMathBwdWithDyXOp<functional::op_cls##Grad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, op_cls##Cls);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
MATH_UNARY_ELEMENTWISE_FUNC_SEQ
);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_X_CLASS
,
MATH_UNARY_ELEMENTWISE_PRIMITIVE_FUNC_BWD_WITH_DY_X_SEQ
);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_X_CLASS
,
OF_PP_MAKE_TUPLE_SEQ
(
"tanh"
,
Tanh
));
// higher order derivative
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
OF_PP_MAKE_TUPLE_SEQ
(
"sin_grad"
,
SinGrad
));
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
,
OF_PP_MAKE_TUPLE_SEQ
(
"cos_grad"
,
CosGrad
));
#undef INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_X_CLASS
#define INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_Y_CLASS(op_type_name, op_cls) \
class op_cls##Cls final : public UnaryMathBwdWithDyYOp<functional::op_cls##Grad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, op_cls##Cls);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_Y_CLASS
,
MATH_UNARY_ELEMENTWISE_FUNC_BWD_WITH_DY_Y_SEQ
);
#undef INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_DY_Y_CLASS
#define INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_FILL_CLASS(op_type_name, op_cls) \
class op_cls##Cls final : public UnaryMathBwdWithDyYOp<functional::op_cls##Grad> {}; \
REGISTER_OP_EXPR_GRAD_FUNCTION(op_type_name, UnaryMathBwdWithFillZeroOp);
OF_PP_FOR_EACH_TUPLE
(
INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_FILL_CLASS
,
MATH_UNARY_ELEMENTWISE_FUNC_BWD_WITH_FILL_SEQ
);
#undef INSTANTIAT_AND_REGISTER_UNARY_MATHOP_WITH_FILL_CLASS
class
NegativeOp
:
public
OpExprGradFunction
<
UnaryMathCaptureState
>
{
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
ctx
->
x_requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
UnaryMathCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
if
(
!
ctx
->
x_requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
in_grads
->
at
(
0
)
=
JUST
(
functional
::
Negative
(
out_grads
[
0
]));
return
Maybe
<
void
>::
Ok
();
}
protected:
std
::
shared_ptr
<
OpExpr
>
grad_op_
;
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"negative"
,
NegativeOp
);
#undef INSTANTIAT_AND_REGISTER_UNARY_MATHOP_CLASS
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/matmul.cpp
View file @
a715222c
...
...
@@ -18,6 +18,7 @@ limitations under the License.
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
...
...
@@ -102,40 +103,194 @@ Maybe<void> Matmul::Apply(const MatmulCaptureState* ctx, const TensorTuple& out_
return
Maybe
<
void
>::
Ok
();
}
class
BroadcastMatmul
:
public
Matmul
{
struct
BroadcastMatmulCaptureState
:
public
AutoGradCaptureState
{
bool
transpose_a
=
false
;
bool
transpose_b
=
false
;
double
alpha
=
1.0
;
bool
requires_grad_a
=
true
;
bool
requires_grad_b
=
true
;
size_t
a_index
=
0
;
size_t
b_index
=
1
;
bool
broadcast_a
=
false
;
bool
broadcast_b
=
false
;
int64_t
b_num_axes
=
0
;
};
class
BroadcastMatmul
:
public
OpExprGradFunction
<
BroadcastMatmulCaptureState
>
{
public:
Maybe
<
void
>
Apply
(
const
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
BroadcastMatmulCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
BroadcastMatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
protected:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
BroadcastMatmul
::
Apply
(
const
MatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
Maybe
<
void
>
BroadcastMatmul
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
)
<<
"fw_op_expr should not be null. "
;
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
BroadcastMatmul
::
Capture
(
BroadcastMatmulCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad_a
=
JUST
(
VectorAt
(
inputs
,
0
))
->
requires_grad
();
ctx
->
requires_grad_b
=
JUST
(
VectorAt
(
inputs
,
1
))
->
requires_grad
();
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
const
auto
a_shape
=
JUST
(
VectorAt
(
inputs
,
0
))
->
shape
();
const
auto
b_shape
=
JUST
(
VectorAt
(
inputs
,
1
))
->
shape
();
const
int64_t
a_num_axes
=
a_shape
->
NumAxes
();
const
int64_t
b_num_axes
=
b_shape
->
NumAxes
();
const
size_t
num_max_batch_dims
=
std
::
max
(
a_num_axes
,
b_num_axes
)
-
2
;
auto
MakeGetBatchDim
=
[
num_max_batch_dims
](
size_t
num_dims
,
const
Shape
&
shape_dim
)
{
const
int64_t
num_batch_dims
=
num_dims
-
2
;
const
int64_t
num_padding_dims
=
num_max_batch_dims
-
num_batch_dims
;
return
[
num_padding_dims
,
shape_dim
](
size_t
index
)
{
return
index
<
num_padding_dims
?
1
:
shape_dim
.
At
(
index
-
num_padding_dims
);
};
};
auto
GetABatchDim
=
MakeGetBatchDim
(
a_num_axes
,
*
a_shape
);
auto
GetBBatchDim
=
MakeGetBatchDim
(
b_num_axes
,
*
b_shape
);
bool
broadcast_a
=
false
;
bool
broadcast_b
=
false
;
for
(
int32_t
i
=
0
;
i
<
num_max_batch_dims
;
i
++
)
{
if
(
GetABatchDim
(
i
)
<
GetBBatchDim
(
i
)
||
a_num_axes
<
b_num_axes
)
{
broadcast_a
=
true
;
break
;
}
}
for
(
int32_t
i
=
0
;
i
<
num_max_batch_dims
;
i
++
)
{
if
(
GetBBatchDim
(
i
)
<
GetABatchDim
(
i
)
||
b_num_axes
<
a_num_axes
)
{
broadcast_b
=
true
;
break
;
}
}
if
(
b_num_axes
==
2
&&
!
ctx
->
transpose_a
)
{
// In this case, we can directly use `broadcast_matmul_grad_b` OP to generate Grad instead of
// broadcast_matmul+reduce_sum_like.
broadcast_b
=
false
;
}
ctx
->
broadcast_a
=
broadcast_a
;
ctx
->
broadcast_b
=
broadcast_b
;
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
transpose_a
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"transpose_a"
));
ctx
->
transpose_b
=
JUST
(
composed_attrs
.
GetAttr
<
bool
>
(
"transpose_b"
));
ctx
->
alpha
=
JUST
(
composed_attrs
.
GetAttr
<
double
>
(
"alpha"
));
if
(
ctx
->
requires_grad_a
)
{
ctx
->
b_index
=
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
1
)));
// input b
if
(
broadcast_a
)
{
ctx
->
a_index
=
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
0
)));
// input a
}
}
if
(
ctx
->
requires_grad_b
)
{
ctx
->
b_num_axes
=
JUST
(
VectorAt
(
inputs
,
1
))
->
shape
()
->
NumAxes
();
ctx
->
a_index
=
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
0
)));
// input a
if
(
broadcast_b
)
{
ctx
->
b_index
=
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
1
)));
// input b
}
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
BroadcastMatmul
::
Apply
(
const
BroadcastMatmulCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
)
<<
"Out grad size should be equal to 1. "
;
in_grads
->
resize
(
2
);
const
auto
out_shape
=
JUST
(
VectorAt
(
out_grads
,
0
))
->
shape
();
const
int64_t
out_num_axes
=
out_shape
->
NumAxes
();
const
size_t
num_max_batch_dims
=
out_num_axes
-
2
;
auto
MakeGetBatchDim
=
[
num_max_batch_dims
](
size_t
num_dims
,
const
Shape
&
shape_dim
)
{
const
int64_t
num_batch_dims
=
num_dims
-
2
;
const
int64_t
num_padding_dims
=
num_max_batch_dims
-
num_batch_dims
;
return
[
num_padding_dims
,
shape_dim
](
size_t
index
)
{
return
index
<
num_padding_dims
?
1
:
shape_dim
.
At
(
index
-
num_padding_dims
);
};
};
auto
GetOutBatchDim
=
MakeGetBatchDim
(
out_num_axes
,
*
out_shape
);
if
(
ctx
->
requires_grad_a
)
{
std
::
shared_ptr
<
Tensor
>
broadcast_grad_a
;
const
auto
&
input_b
=
ctx
->
SavedTensors
().
at
(
ctx
->
b_index
);
if
(
ctx
->
transpose_a
)
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
MatMul
(
input_b
,
out_grads
.
at
(
0
),
ctx
->
transpose_b
,
true
,
ctx
->
alpha
));
broadcast_grad_a
=
JUST
(
functional
::
MatMul
(
input_b
,
JUST
(
VectorAt
(
out_grads
,
0
)),
ctx
->
transpose_b
,
true
,
ctx
->
alpha
));
}
else
{
in_grads
->
at
(
0
)
=
JUST
(
functional
::
MatMul
(
out_grads
.
at
(
0
),
input_b
,
false
,
!
(
ctx
->
transpose_b
),
ctx
->
alpha
));
broadcast_grad_a
=
JUST
(
functional
::
MatMul
(
JUST
(
VectorAt
(
out_grads
,
0
)),
input_b
,
false
,
!
(
ctx
->
transpose_b
),
ctx
->
alpha
));
}
if
(
ctx
->
broadcast_a
)
{
const
auto
&
input_a
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
ctx
->
a_index
));
const
auto
a_shape
=
input_a
->
shape
();
const
int64_t
a_num_axes
=
a_shape
->
NumAxes
();
std
::
vector
<
int32_t
>
a_reduce_vec
;
auto
GetABatchDim
=
MakeGetBatchDim
(
a_num_axes
,
*
a_shape
);
const
int64_t
a_out_num_dim_differ
=
out_num_axes
-
a_num_axes
;
for
(
int32_t
i
=
0
;
i
<
out_num_axes
-
2
;
i
++
)
{
if
(
GetOutBatchDim
(
i
)
>
GetABatchDim
(
i
)
||
(
GetOutBatchDim
(
i
)
==
1
&&
i
<
a_out_num_dim_differ
))
{
a_reduce_vec
.
push_back
(
i
);
}
}
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
ReduceSumLike
(
broadcast_grad_a
,
input_a
,
a_reduce_vec
));
}
else
{
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
broadcast_grad_a
;
}
}
if
(
ctx
->
requires_grad_b
)
{
const
auto
&
input_a
=
ctx
->
SavedTensors
().
at
(
ctx
->
a_index
);
if
(
ctx
->
transpose_b
)
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
BroadcastMatmulGradB
(
out_grads
.
at
(
0
),
input_a
,
ctx
->
alpha
));
if
(
ctx
->
b_num_axes
==
2
&&
!
ctx
->
transpose_a
)
{
if
(
ctx
->
transpose_b
)
{
JUST
(
VectorAt
(
*
in_grads
,
1
))
=
JUST
(
functional
::
BroadcastMatmulGradB
(
JUST
(
VectorAt
(
out_grads
,
0
)),
input_a
,
ctx
->
alpha
));
}
else
{
JUST
(
VectorAt
(
*
in_grads
,
1
))
=
JUST
(
functional
::
BroadcastMatmulGradB
(
input_a
,
JUST
(
VectorAt
(
out_grads
,
0
)),
ctx
->
alpha
));
}
}
else
{
in_grads
->
at
(
1
)
=
JUST
(
functional
::
BroadcastMatmulGradB
(
input_a
,
out_grads
.
at
(
0
),
ctx
->
alpha
));
std
::
shared_ptr
<
Tensor
>
broadcast_grad_b
;
if
(
ctx
->
transpose_b
)
{
broadcast_grad_b
=
JUST
(
functional
::
MatMul
(
JUST
(
VectorAt
(
out_grads
,
0
)),
input_a
,
true
,
ctx
->
transpose_a
,
ctx
->
alpha
));
}
else
{
broadcast_grad_b
=
JUST
(
functional
::
MatMul
(
input_a
,
JUST
(
VectorAt
(
out_grads
,
0
)),
!
ctx
->
transpose_a
,
false
,
ctx
->
alpha
));
}
if
(
ctx
->
broadcast_b
)
{
const
auto
&
input_b
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
ctx
->
b_index
));
const
auto
b_shape
=
input_b
->
shape
();
std
::
vector
<
int32_t
>
b_reduce_vec
;
auto
GetBBatchDim
=
MakeGetBatchDim
(
ctx
->
b_num_axes
,
*
b_shape
);
const
int64_t
b_out_num_dim_differ
=
out_num_axes
-
ctx
->
b_num_axes
;
for
(
int32_t
i
=
0
;
i
<
out_num_axes
-
2
;
i
++
)
{
if
(
GetOutBatchDim
(
i
)
>
GetBBatchDim
(
i
)
||
(
GetOutBatchDim
(
i
)
==
1
&&
i
<
b_out_num_dim_differ
))
{
b_reduce_vec
.
push_back
(
i
);
}
}
JUST
(
VectorAt
(
*
in_grads
,
1
))
=
JUST
(
functional
::
ReduceSumLike
(
broadcast_grad_b
,
input_b
,
b_reduce_vec
));
}
else
{
JUST
(
VectorAt
(
*
in_grads
,
1
))
=
broadcast_grad_b
;
}
}
}
return
Maybe
<
void
>::
Ok
();
}
...
...
oneflow/core/autograd/gradient_funcs/matrix_vector_product.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
struct
MatrixVectorProductCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad_a
=
false
;
bool
requires_grad_b
=
false
;
size_t
a_index
=
0
;
size_t
b_index
=
1
;
};
class
MatrixVectorProduct
:
public
OpExprGradFunction
<
MatrixVectorProductCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
MatrixVectorProductCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
MatrixVectorProductCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
protected:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
MatrixVectorProduct
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
)
<<
"fw_op_expr should not be null. "
;
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
MatrixVectorProduct
::
Capture
(
MatrixVectorProductCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad_a
=
JUST
(
VectorAt
(
inputs
,
0
))
->
requires_grad
();
ctx
->
requires_grad_b
=
JUST
(
VectorAt
(
inputs
,
1
))
->
requires_grad
();
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
if
(
ctx
->
requires_grad_a
)
{
ctx
->
b_index
=
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
1
)));
// input b
}
if
(
ctx
->
requires_grad_b
)
{
ctx
->
a_index
=
ctx
->
SaveTensorForBackward
(
JUST
(
VectorAt
(
inputs
,
0
)));
// input a
}
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
MatrixVectorProduct
::
Apply
(
const
MatrixVectorProductCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad_a
&&
!
ctx
->
requires_grad_b
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
)
<<
"Out grad size should be equal to 1. "
;
in_grads
->
resize
(
2
);
if
(
ctx
->
requires_grad_a
)
{
const
auto
&
input_b
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
ctx
->
b_index
));
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
MatrixVectorProductGradA
(
JUST
(
VectorAt
(
out_grads
,
0
)),
input_b
));
}
if
(
ctx
->
requires_grad_b
)
{
const
auto
&
input_a
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
ctx
->
a_index
));
JUST
(
VectorAt
(
*
in_grads
,
1
))
=
JUST
(
functional
::
MatrixVectorProductGradB
(
JUST
(
VectorAt
(
out_grads
,
0
)),
input_a
));
}
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"matrix_vector_product"
,
MatrixVectorProduct
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/max_unpool.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
namespace
oneflow
{
namespace
one
{
namespace
{
struct
MaxUnpoolCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
size_t
input_index
=
0
;
size_t
indices_index
=
0
;
};
using
FuncType
=
decltype
(
functional
::
MaxUnpool1dGrad
);
template
<
FuncType
F
>
class
MaxUnpoolNdGrad
:
public
OpExprGradFunction
<
MaxUnpoolCaptureState
>
{
public:
virtual
~
MaxUnpoolNdGrad
()
=
default
;
using
OpExprGradFunction
<
MaxUnpoolCaptureState
>::
Init
;
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
MaxUnpoolCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
MaxUnpoolCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
template
<
FuncType
F
>
Maybe
<
void
>
MaxUnpoolNdGrad
<
F
>::
Init
(
const
OpExpr
&
op
)
{
const
auto
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
template
<
FuncType
F
>
Maybe
<
void
>
MaxUnpoolNdGrad
<
F
>::
Capture
(
MaxUnpoolCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ctx
->
input_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
ctx
->
indices_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
return
Maybe
<
void
>::
Ok
();
}
template
<
FuncType
F
>
Maybe
<
void
>
MaxUnpoolNdGrad
<
F
>::
Apply
(
const
MaxUnpoolCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
CHECK_LE_OR_RETURN
(
out_grads
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
const
auto
&
input
=
ctx
->
SavedTensors
().
at
(
ctx
->
input_index
);
const
auto
&
indices
=
ctx
->
SavedTensors
().
at
(
ctx
->
indices_index
);
in_grads
->
resize
(
2
);
(
*
in_grads
)[
0
]
=
JUST
(
F
(
input
,
indices
,
out_grads
[
0
]));
return
Maybe
<
void
>::
Ok
();
}
}
// namespace
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"max_unpool_1d"
,
MaxUnpoolNdGrad
<
functional
::
MaxUnpool1dGrad
>
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"max_unpool_2d"
,
MaxUnpoolNdGrad
<
functional
::
MaxUnpool2dGrad
>
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"max_unpool_3d"
,
MaxUnpoolNdGrad
<
functional
::
MaxUnpool3dGrad
>
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/median.cpp
View file @
a715222c
...
...
@@ -92,10 +92,10 @@ class MedianWithIndices : public OpExprGradFunction<MedianWithIndicesCaptureStat
const
auto
&
input
=
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
0
));
const
auto
&
indices
=
JUST
(
functional
::
Unsqueeze
(
JUST
(
VectorAt
(
ctx
->
SavedTensors
(),
1
)),
-
1
));
const
auto
&
dout
=
JUST
(
functional
::
Unsqueeze
(
JUST
(
VectorAt
(
out_grads
,
0
)),
-
1
));
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
DimScatter
(
JUST
(
functional
::
Constant
(
*
(
input
->
shape
()),
Scalar
(
0
),
*
dout
->
dtype
(),
JUST
(
dout
->
device
()))),
-
1
,
indices
,
dout
));
JUST
(
VectorAt
(
*
in_grads
,
0
))
=
JUST
(
functional
::
DimScatterUpdate
(
JUST
(
functional
::
Constant
(
*
(
input
->
shape
()),
Scalar
(
0
),
*
dout
->
dtype
(),
JUST
(
dout
->
device
()))),
-
1
,
indices
,
dout
,
/*inplace*/
false
));
}
return
Maybe
<
void
>::
Ok
();
}
...
...
oneflow/core/autograd/gradient_funcs/narrow.cpp
View file @
a715222c
...
...
@@ -71,8 +71,8 @@ class Narrow : public OpExprGradFunction<NarrowCaptureState> {
functional
::
Empty
(
ctx
->
shape
,
dy
->
dtype
(),
JUST
(
dy
->
device
()),
/*pin_memory=*/
false
));
}
else
{
like
=
JUST
(
functional
::
Consistent
Empty
(
ctx
->
shape
,
dy
->
dtype
(),
JUST
(
dy
->
parallel_desc
()),
*
JUST
(
private_details
::
RawGetSbpList
(
JUST
(
dy
->
nd_sbp
())))));
functional
::
Global
Empty
(
ctx
->
shape
,
dy
->
dtype
(),
JUST
(
dy
->
parallel_desc
()),
*
JUST
(
private_details
::
RawGetSbpList
(
JUST
(
dy
->
nd_sbp
())))));
}
in_grads
->
resize
(
1
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
NarrowGrad
(
dy
,
like
,
ctx
->
dim
,
ctx
->
start
,
ctx
->
length
));
...
...
oneflow/core/autograd/gradient_funcs/one_embedding_fused_lookup.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/common/container_util.h"
namespace
oneflow
{
namespace
one
{
struct
OneEmbeddingFusedLookupCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
{};
std
::
string
embedding_name
{};
int64_t
line_size
{};
int64_t
embedding_size
{};
int
shadow_index
{};
int
ids_index
{};
int
input_num
{};
};
class
OneEmbeddingFusedLookup
:
public
OpExprGradFunction
<
OneEmbeddingFusedLookupCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
OneEmbeddingFusedLookupCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_GE_OR_RETURN
(
inputs
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
// shadow
ctx
->
shadow_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
// shadow
ctx
->
ids_index
=
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
1
));
// id
ctx
->
embedding_name
=
JUST
(
attrs
.
GetAttr
<
std
::
string
>
(
"embedding_name"
));
ctx
->
line_size
=
JUST
(
attrs
.
GetAttr
<
int64_t
>
(
"line_size"
));
ctx
->
embedding_size
=
JUST
(
attrs
.
GetAttr
<
int64_t
>
(
"embedding_size"
));
ctx
->
input_num
=
inputs
.
size
();
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Apply
(
const
OneEmbeddingFusedLookupCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
in_grads
->
resize
(
ctx
->
input_num
);
const
auto
&
saved_tensors
=
ctx
->
SavedTensors
();
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
if
(
ctx
->
requires_grad
)
{
JUST
(
functional
::
OneEmbeddingFusedLookupGrad
(
saved_tensors
.
at
(
ctx
->
ids_index
),
JUST
(
VectorAt
(
out_grads
,
0
)),
ctx
->
embedding_name
,
ctx
->
line_size
,
ctx
->
embedding_size
));
(
*
in_grads
)[
0
]
=
JUST
(
functional
::
ZerosLike
(
saved_tensors
.
at
(
ctx
->
shadow_index
)));
}
return
Maybe
<
void
>::
Ok
();
}
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"one_embedding_fused_lookup"
,
OneEmbeddingFusedLookup
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/padding.cpp
View file @
a715222c
...
...
@@ -20,12 +20,12 @@ limitations under the License.
namespace
oneflow
{
namespace
one
{
struct
Pad
2
dCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
;
std
::
vector
<
int64_t
>
paddings
;
struct
Pad
N
dCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
std
::
vector
<
int64_t
>
paddings
{}
;
};
class
Pad
2
d
:
public
OpExprGradFunction
<
Pad
2
dCaptureState
>
{
class
Pad
N
d
:
public
OpExprGradFunction
<
Pad
N
dCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
...
...
@@ -34,7 +34,7 @@ class Pad2d : public OpExprGradFunction<Pad2dCaptureState> {
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
Capture
(
Pad
2
dCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
Maybe
<
void
>
Capture
(
Pad
N
dCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
...
...
@@ -50,9 +50,9 @@ class Pad2d : public OpExprGradFunction<Pad2dCaptureState> {
AttrMap
base_attrs_
;
};
class
ReflectionPad
2
d
:
public
Pad
2
d
{
class
ReflectionPad
N
d
:
public
Pad
N
d
{
public:
Maybe
<
void
>
Apply
(
const
Pad
2
dCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
Maybe
<
void
>
Apply
(
const
Pad
N
dCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
...
...
@@ -64,9 +64,9 @@ class ReflectionPad2d : public Pad2d {
}
};
class
ReplicationPad
2
d
:
public
Pad
2
d
{
class
ReplicationPad
N
d
:
public
Pad
N
d
{
public:
Maybe
<
void
>
Apply
(
const
Pad
2
dCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
Maybe
<
void
>
Apply
(
const
Pad
N
dCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
{
CHECK_EQ_OR_RETURN
(
out_grads
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
in_grads
->
resize
(
1
);
...
...
@@ -121,8 +121,10 @@ class ConstantPadNd : public OpExprGradFunction<ConstantPadNdCaptureState> {
};
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"pad"
,
ConstantPadNd
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reflection_pad2d"
,
ReflectionPad2d
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"replication_pad2d"
,
ReplicationPad2d
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reflection_pad1d"
,
ReflectionPadNd
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reflection_pad2d"
,
ReflectionPadNd
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"replication_pad1d"
,
ReplicationPadNd
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"replication_pad2d"
,
ReplicationPadNd
);
}
// namespace one
}
// namespace oneflow
oneflow/core/autograd/gradient_funcs/reduce_ops.cpp
View file @
a715222c
...
...
@@ -64,6 +64,7 @@ Maybe<void> ReduceSum::Apply(const ReduceSumCaptureState* ctx, const TensorTuple
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_sum"
,
ReduceSum
);
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_nansum"
,
ReduceSum
);
struct
ReduceProdOpInterpState
:
public
AutoGradCaptureState
{
std
::
vector
<
int32_t
>
axis
;
...
...
oneflow/core/autograd/gradient_funcs/reduce_sum_like.cpp
0 → 100644
View file @
a715222c
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_expr.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/functional/functional_api.yaml.h"
namespace
oneflow
{
namespace
one
{
struct
ReduceSumLikeCaptureState
:
public
AutoGradCaptureState
{
bool
requires_grad
=
false
;
std
::
vector
<
int32_t
>
axis
;
};
class
ReduceSumLike
:
public
OpExprGradFunction
<
ReduceSumLikeCaptureState
>
{
public:
Maybe
<
void
>
Init
(
const
OpExpr
&
op
)
override
;
Maybe
<
void
>
Capture
(
ReduceSumLikeCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
override
;
Maybe
<
void
>
Apply
(
const
ReduceSumLikeCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
override
;
private:
AttrMap
base_attrs_
;
};
Maybe
<
void
>
ReduceSumLike
::
Init
(
const
OpExpr
&
op
)
{
const
UserOpExpr
*
fw_op_expr
=
dynamic_cast
<
const
UserOpExpr
*>
(
&
op
);
CHECK_NOTNULL_OR_RETURN
(
fw_op_expr
);
// NOLINT(maybe-need-error-msg)
base_attrs_
=
MakeAttrMapFromUserOpConf
(
fw_op_expr
->
proto
());
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceSumLike
::
Capture
(
ReduceSumLikeCaptureState
*
ctx
,
const
TensorTuple
&
inputs
,
const
TensorTuple
&
outputs
,
const
AttrMap
&
attrs
)
const
{
CHECK_EQ_OR_RETURN
(
inputs
.
size
(),
2
);
// NOLINT(maybe-need-error-msg)
CHECK_EQ_OR_RETURN
(
outputs
.
size
(),
1
);
// NOLINT(maybe-need-error-msg)
ctx
->
requires_grad
=
inputs
.
at
(
0
)
->
requires_grad
();
CHECK_OR_RETURN
(
!
inputs
.
at
(
1
)
->
requires_grad
())
<<
Error
::
RuntimeError
()
<<
"like tensor does not require grad"
;
if
(
!
ctx
->
requires_grad
)
{
return
Maybe
<
void
>::
Ok
();
}
ComposedAttrMap
composed_attrs
(
attrs
,
base_attrs_
);
ctx
->
axis
=
JUST
(
composed_attrs
.
GetAttr
<
std
::
vector
<
int32_t
>>
(
"axis"
));
ctx
->
SaveTensorForBackward
(
inputs
.
at
(
0
));
return
Maybe
<
void
>::
Ok
();
}
Maybe
<
void
>
ReduceSumLike
::
Apply
(
const
ReduceSumLikeCaptureState
*
ctx
,
const
TensorTuple
&
out_grads
,
TensorTuple
*
in_grads
)
const
{
const
auto
&
x
=
ctx
->
SavedTensors
().
at
(
0
);
in_grads
->
resize
(
2
);
in_grads
->
at
(
0
)
=
JUST
(
functional
::
BroadcastLike
(
out_grads
.
at
(
0
),
x
,
ctx
->
axis
));
return
Maybe
<
void
>::
Ok
();
}
REGISTER_OP_EXPR_GRAD_FUNCTION
(
"reduce_sum_like"
,
ReduceSumLike
);
}
// namespace one
}
// namespace oneflow
Prev
1
…
7
8
9
10
11
12
13
14
15
…
24
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment