"vscode:/vscode.git/clone" did not exist on "5626e20b2bad5d181e9727434681b22ab0f1ac98"
Commit dbe08e9b authored by yuguo960516yuguo's avatar yuguo960516yuguo
Browse files

2.4.2

parent b5499578
......@@ -67,6 +67,9 @@ std::vector<phi::MetaTensor> MakeMetaTensor(
std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<phi::DenseTensor*>& tensors);
std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<const phi::SelectedRows*>& tensors);
phi::MetaTensor MakeMetaTensor(
const paddle::optional<phi::SelectedRows>& tensor);
......@@ -79,6 +82,9 @@ phi::MetaTensor MakeMetaTensor(
std::vector<phi::MetaTensor> MakeMetaTensor(
const paddle::optional<std::vector<const phi::DenseTensor*>>& tensors);
std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<const phi::TensorBase*>& tensors);
/* ------------------ for output ----------------------- */
phi::DenseTensor* SetKernelOutput(Tensor* out);
......
......@@ -84,6 +84,8 @@ def main(
backward_api_dict = to_named_dict(backward_apis)
for api in apis:
if api['name'][-1] == '_':
api['name'] = api['name'][:-1]
api['op_name'] = SPARSE_OP_PREFIX + api['name']
api['name'] = api['op_name']
if api["backward"] is not None:
......
......@@ -102,10 +102,7 @@
- op : add_n
args : (Tensor[] x)
output : Tensor
infer_meta :
func : AddNInferMeta
kernel :
func : add_n
invoke : add_n_impl(x)
backward : add_n_grad
- op : addmm
......
......@@ -101,7 +101,7 @@
atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_op : batch_norm_grad
forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
forward : batch_norm_ (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta :
......
......@@ -87,7 +87,7 @@
layout : x
backward : atanh_grad
- op : batch_norm
- op : batch_norm_
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta :
......@@ -95,7 +95,7 @@
kernel :
func : batch_norm_coo {sparse_coo, dense, dense, dense, dense -> sparse_coo, dense, dense, dense, dense, dense}
data_type : x
view : (mean -> mean_out), (variance -> variance_out)
inplace : (mean -> mean_out), (variance -> variance_out)
backward : batch_norm_grad
- op : cast
......
......@@ -108,8 +108,12 @@ class ArgumentMappingContext {
virtual bool IsDenseTensorInput(const std::string& name) const = 0;
virtual bool IsDenseTensorInputs(const std::string& name) const = 0;
virtual bool IsSelectedRowsInput(const std::string& name) const = 0;
virtual bool IsSparseCooTensorInput(const std::string& name) const = 0;
virtual bool IsSparseCsrTensorInput(const std::string& name) const = 0;
virtual bool IsSelectedRowsInputs(const std::string& name) const = 0;
// For compatibility with LoDTensorArray
virtual bool IsDenseTensorVectorInput(const std::string& name) const = 0;
......
......@@ -100,6 +100,24 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
default_tensor_layout,
default_key.dtype(),
arg_type);
} else if (arg_type == std::type_index(typeid(
const std::vector<const SelectedRows*>&))) {
args_def->AppendInput(default_key.backend(),
default_tensor_layout,
default_key.dtype(),
arg_type);
} else if (arg_type == std::type_index(typeid(
const std::vector<const TensorBase*>&))) {
args_def->AppendInput(default_key.backend(),
default_tensor_layout,
default_key.dtype(),
arg_type);
} else if (arg_type == std::type_index(typeid(
const std::vector<const TensorArray*>&))) {
args_def->AppendInput(default_key.backend(),
default_tensor_layout,
default_key.dtype(),
arg_type);
} else if (arg_type == std::type_index(typeid(const SelectedRows&))) {
args_def->AppendInput(default_key.backend(),
default_tensor_layout,
......
......@@ -270,6 +270,8 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor);
PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SelectedRows);
PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor);
PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(TensorBase);
PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(SelectedRows);
PD_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRows);
PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_MULTI_INPUT(DenseTensor);
......
......@@ -39,7 +39,11 @@ int64_t MetaTensor::numel() const {
DDim MetaTensor::dims() const {
ValidCheck(*this);
return tensor_->dims();
if (phi::SelectedRows::classof(tensor_)) {
return static_cast<SelectedRows*>(tensor_)->GetCompleteDims();
} else {
return tensor_->dims();
}
}
DataType MetaTensor::dtype() const {
......@@ -61,9 +65,7 @@ void MetaTensor::set_dims(const DDim& dims) {
StringTensorUtils::GetMutableMeta(static_cast<StringTensor*>(tensor_))
->dims = dims;
} else if (phi::SelectedRows::classof(tensor_)) {
DenseTensorUtils::GetMutableMeta(
static_cast<SelectedRows*>(tensor_)->mutable_value())
->dims = dims;
static_cast<SelectedRows*>(tensor_)->set_height(dims[0]);
} else if (phi::SparseCooTensor::classof(tensor_)) {
DenseTensorUtils::GetMutableMeta(static_cast<SparseCooTensor*>(tensor_))
->dims = dims;
......@@ -164,7 +166,13 @@ void MetaTensor::share_meta(const MetaTensor& meta_tensor) {
}
}
TensorBase* MetaTensor::tensor() const { return tensor_; }
bool MetaTensor::is_dense() const { return DenseTensor::classof(tensor_); }
bool MetaTensor::is_selected_rows() const {
return SelectedRows::classof(tensor_);
}
bool MetaTensor::is_tensor_array() const { return false; }
void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
......@@ -174,7 +182,6 @@ void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
bool is_sparse_coo = phi::SparseCooTensor::classof(tensor_);
bool is_sparse_csr = phi::SparseCsrTensor::classof(tensor_);
if (is_dense_tensor || is_selected_rows || is_sparse_coo || is_sparse_csr) {
set_dims(meta_tensor.dims());
if (is_selected_rows) {
const auto in_tensor_base = meta_tensor.tensor();
PADDLE_ENFORCE_EQ(
......@@ -186,6 +193,11 @@ void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
auto* selected_rows_in = static_cast<SelectedRows*>(in_tensor_base);
selected_rows_out->set_rows(selected_rows_in->rows());
selected_rows_out->set_height(selected_rows_in->height());
DenseTensorUtils::GetMutableMeta(
static_cast<SelectedRows*>(tensor_)->mutable_value())
->dims = selected_rows_in->mutable_value()->dims();
} else {
set_dims(meta_tensor.dims());
}
} else {
PADDLE_THROW(phi::errors::Unimplemented(
......@@ -212,6 +224,4 @@ const LoD& MetaTensor::lod() const {
}
}
TensorBase* MetaTensor::tensor() const { return tensor_; }
} // namespace phi
......@@ -68,9 +68,9 @@ class MetaTensor {
virtual bool initialized() const;
virtual bool is_selected_rows() const;
virtual bool is_dense() const;
// TODO(YuanRisheng) This API is for compatible with
// Fluid
// TODO(YuanRisheng) This API is for compatible with Fluid
// and it will be deleted in the future.
virtual bool is_tensor_array() const;
......
......@@ -132,10 +132,7 @@ class SelectedRows : public TensorBase,
/// \brief Returns the dims of the tensor.
/// \return The dims of the tensor.
const DDim& dims() const noexcept override {
return impl_->dims();
// return phi::make_ddim(dims);
}
const DDim& dims() const noexcept override { return impl_->dims(); }
/// \brief Returns the data type of the tensor.
/// \return The data type of the tensor.
......
......@@ -19,8 +19,6 @@ limitations under the License. */
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
namespace phi {
#define _PhiForEachDataTypeHelper_(callback, cpp_type, data_type) \
......
......@@ -301,6 +301,10 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
phi::DDim in_dim({0});
for (size_t i = 0; i < x.size(); ++i) {
auto x_dim = x[i]->dims();
// x_dim.size() == 1 means the real dim of selected rows is [0]
if (x[i]->is_selected_rows() && x_dim.size() == 1) {
continue;
}
if (phi::product(x_dim) == 0) {
continue;
}
......@@ -355,6 +359,31 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
out->share_lod(*x[0]);
}
// TODO(YuanRisheng) This InferMeta is used in Fluid
// and will be deleted in the future.
void AddNTensorArrayInferMeta(const std::vector<const MetaTensor*>& x,
MetaTensor* out,
MetaConfig config) {
int64_t max_length = 0;
bool has_tensor_array = false;
for (auto input : x) {
if (input->is_tensor_array()) {
has_tensor_array = true;
// if input is lod_tensor_array, dims() will return its size (one element)
max_length =
input->dims()[0] > max_length ? input->dims()[0] : max_length;
}
}
if (has_tensor_array) {
if (out->is_tensor_array()) {
out->set_dims(make_ddim({max_length}));
}
} else {
AddNInferMeta(x, out, config);
}
}
void AucInferMeta(const MetaTensor& input,
const MetaTensor& label,
const MetaTensor& stat_pos,
......@@ -2161,6 +2190,14 @@ void MultiplexInferMeta(const std::vector<const MetaTensor*>& ins,
phi::errors::PreconditionNotMet(
"All the candidate tensors must have the same size."));
}
PADDLE_ENFORCE_GE(
in_dim[0],
ids_dim[0],
phi::errors::InvalidArgument("The 2nd-dim of input cannot be smaller "
"than batchSize of the index tensor."));
in_dim[0] = ids_dim[0];
out->set_dims(in_dim);
out->set_dtype(ins[0]->dtype());
}
......
......@@ -123,6 +123,10 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
MetaTensor* out,
MetaConfig config = MetaConfig());
void AddNTensorArrayInferMeta(const std::vector<const MetaTensor*>& x,
MetaTensor* out,
MetaConfig config);
void AucInferMeta(const MetaTensor& input,
const MetaTensor& label,
const MetaTensor& stat_pos,
......
......@@ -3184,11 +3184,11 @@ void FillSplitOutDims(const MetaTensor& x,
(*out)[i]->set_dtype(x.dtype());
(*out)[i]->set_dims(out_dims[i]);
(*out)[i]->set_layout(x.layout());
(*out)[i]->share_lod(x);
} else {
(*out)[i]->set_dtype(x.dtype());
(*out)[i]->set_dims(out_dims[i]);
(*out)[i]->set_layout(x.layout());
(*out)[i]->share_lod(x);
}
}
}
......@@ -3219,11 +3219,11 @@ void SplitInferMeta(const MetaTensor& x,
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
}
}
} else {
......@@ -3310,11 +3310,11 @@ void SplitWithNumInferMeta(const MetaTensor& x,
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
}
}
} else {
......
......@@ -15,12 +15,20 @@
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_array.h"
namespace phi {
// Note(YuanRisheng): std::vector<const TensorBase*> shouldn't be widely used in
// PHI. Here, we use it to be compatible with Fluid.
template <typename T, typename Context>
void AddNKernel(const Context& dev_ctx,
const std::vector<const DenseTensor*>& x,
const std::vector<const TensorBase*>& x,
DenseTensor* out);
template <typename T, typename Context>
void AddNArrayKernel(const Context& dev_ctx,
const std::vector<const TensorArray*>& x,
TensorArray* out);
} // namespace phi
......@@ -12,24 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/add_n_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/impl/add_n_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void AddNKernel(const Context& dev_ctx,
const std::vector<const DenseTensor*>& x,
const std::vector<const TensorBase*>& x,
DenseTensor* out) {
size_t in_num = x.size();
bool in_place = out == x[0];
auto* out_ptr = dev_ctx.template Alloc<T>(out);
if (in_num >= 1 && x[0]->initialized()) {
if (x[0]->numel() > 0) {
in_place = (x[0]->data<T>() == out_ptr);
dev_ctx.template Alloc<T>(out);
bool in_place = false;
if (x.size() > 0 && x[0]->initialized() && DenseTensor::classof(x[0])) {
if ((static_cast<const DenseTensor*>(x[0]))->Holder() == out->Holder()) {
in_place = true;
}
}
......@@ -37,9 +34,11 @@ void AddNKernel(const Context& dev_ctx,
auto& place = *dev_ctx.eigen_device();
int start = in_place ? 1 : 0;
if (!in_place) {
if ((in_num >= 2) && x[0]->initialized() && x[1]->initialized()) {
auto& in_0 = *x[0];
auto& in_1 = *x[1];
if ((in_num >= 2) && DenseTensor::classof(x[0]) &&
DenseTensor::classof(x[1]) && x[0]->initialized() &&
x[1]->initialized()) {
auto& in_0 = *(static_cast<const DenseTensor*>(x[0]));
auto& in_1 = *(static_cast<const DenseTensor*>(x[1]));
if (in_0.numel() && in_1.numel()) {
auto in_0_e = EigenVector<T>::Flatten(in_0);
auto in_1_e = EigenVector<T>::Flatten(in_1);
......@@ -49,20 +48,33 @@ void AddNKernel(const Context& dev_ctx,
}
if (start != 2) {
VLOG(10) << "Fill with constant = 0 in sum kernel.";
funcs::SetConstant<Context, T> constant_functor;
phi::funcs::SetConstant<Context, T> constant_functor;
constant_functor(dev_ctx, out, static_cast<T>(0));
}
}
paddle::operators::math::SelectedRowsAddToTensor<Context, T> functor;
// If in_place, just skip the first tensor
for (size_t i = start; i < in_num; i++) {
auto& in_t = *x[i];
if (!in_t.initialized() || in_t.numel() == 0) {
continue;
if (DenseTensor::classof(x[i])) {
auto& in_t = *(static_cast<const DenseTensor*>(x[i]));
if (!in_t.initialized() || in_t.numel() == 0) {
continue;
}
auto in = EigenVector<T>::Flatten(in_t);
result.device(place) = result + in;
} else if (SelectedRows::classof(x[i])) {
auto& in_t = *(static_cast<const SelectedRows*>(x[i]));
functor(dev_ctx, in_t, out);
} else {
PADDLE_THROW(phi::errors::InvalidArgument(
"Expected type of Input(X) of %d-th must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
x[i]->type_info().name()));
}
auto in = EigenVector<T>::Flatten(in_t);
result.device(place) = result + in;
}
VLOG(10) << "end add_n kernel";
}
} // namespace phi
......@@ -76,3 +88,13 @@ PD_REGISTER_KERNEL(add_n,
int,
phi::dtype::bfloat16,
int64_t) {}
PD_REGISTER_KERNEL(add_n_array,
CPU,
ALL_LAYOUT,
phi::AddNArrayKernel,
float,
double,
int,
phi::dtype::bfloat16,
int64_t) {}
......@@ -37,7 +37,7 @@ void MultiplexKernel(const Context& ctx,
auto rows = ins[0]->dims()[0];
auto cols = ins[0]->numel() / rows;
auto index = ids.data<int32_t>();
for (auto i = 0; i < rows; i++) {
for (auto i = 0; i < ids.dims()[0]; i++) {
int32_t k = index[i];
PADDLE_ENFORCE_GE(
k, 0, errors::PreconditionNotMet("index must be nonnegative."));
......
......@@ -20,6 +20,7 @@
#include "paddle/phi/kernels/funcs/math_function.h"
DECLARE_bool(enable_cublas_tensor_op_math);
DECLARE_bool(gemm_use_half_precision_compute_type);
namespace phi {
namespace funcs {
......@@ -720,6 +721,14 @@ inline void Blas<phi::GPUContext>::GEMM(CBLAS_TRANSPOSE transA,
float h_alpha = static_cast<float>(alpha);
float h_beta = static_cast<float>(beta);
rocblas_datatype compute_type = rocblas_datatype_f32_r;
if (FLAGS_gemm_use_half_precision_compute_type == true) {
compute_type = rocblas_datatype_f16_r;
}
VLOG(4) << "use_half_precision_compute_type: "
<< FLAGS_gemm_use_half_precision_compute_type;
auto &cuda_ctx = const_cast<phi::GPUContext &>(context_);
CUBlas<phi::dtype::float16>::GEMM_EX(&cuda_ctx,
cuTransB,
......@@ -738,7 +747,7 @@ inline void Blas<phi::GPUContext>::GEMM(CBLAS_TRANSPOSE transA,
C,
rocblas_datatype_f16_r,
N,
rocblas_datatype_f32_r);
compute_type);
}
template <>
......
......@@ -18,6 +18,8 @@ limitations under the License. */
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace phi {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment