2.4.2

dbe08e9b · yuguo960516yuguo · b5499578 · dbe08e9b · dbe08e9b · dbe08e9b
Commit dbe08e9b authored Jun 12, 2023 by yuguo960516yuguo
20 changed files
--- a/paddle/phi/api/lib/api_gen_utils.h
+++ b/paddle/phi/api/lib/api_gen_utils.h
@@ -67,6 +67,9 @@ std::vector<phi::MetaTensor> MakeMetaTensor(
 std::vector<phi::MetaTensor> MakeMetaTensor(
    const std::vector<phi::DenseTensor*>& tensors);

+std::vector<phi::MetaTensor> MakeMetaTensor(
+    const std::vector<const phi::SelectedRows*>& tensors);
+
 phi::MetaTensor MakeMetaTensor(
    const paddle::optional<phi::SelectedRows>& tensor);

@@ -79,6 +82,9 @@ phi::MetaTensor MakeMetaTensor(
 std::vector<phi::MetaTensor> MakeMetaTensor(
    const paddle::optional<std::vector<const phi::DenseTensor*>>& tensors);

+std::vector<phi::MetaTensor> MakeMetaTensor(
+    const std::vector<const phi::TensorBase*>& tensors);
+
 /* ------------------ for output ----------------------- */

 phi::DenseTensor* SetKernelOutput(Tensor* out);

--- a/paddle/phi/api/yaml/generator/generate_sparse_op.py
+++ b/paddle/phi/api/yaml/generator/generate_sparse_op.py
@@ -84,6 +84,8 @@ def main(
    backward_api_dict = to_named_dict(backward_apis)

    for api in apis:
+        if api['name'][-1] == '_':
+            api['name'] = api['name'][:-1]
        api['op_name'] = SPARSE_OP_PREFIX + api['name']
        api['name'] = api['op_name']
        if api["backward"] is not None:

--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -102,10 +102,7 @@
 - op : add_n
  args : (Tensor[] x)
  output : Tensor
-  infer_meta :
-    func : AddNInferMeta
-  kernel :
-    func : add_n
+  invoke : add_n_impl(x)
  backward : add_n_grad

 - op : addmm

--- a/paddle/phi/api/yaml/sparse_backward.yaml
+++ b/paddle/phi/api/yaml/sparse_backward.yaml
@@ -101,7 +101,7 @@
           atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

 - backward_op : batch_norm_grad
-  forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
+  forward : batch_norm_ (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
  args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
  output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
  infer_meta :

--- a/paddle/phi/api/yaml/sparse_ops.yaml
+++ b/paddle/phi/api/yaml/sparse_ops.yaml
@@ -87,7 +87,7 @@
    layout : x
  backward : atanh_grad

- op : batch_norm
+- op : batch_norm_
  args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
  output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
  infer_meta :
@@ -95,7 +95,7 @@
  kernel :
    func : batch_norm_coo {sparse_coo, dense, dense, dense, dense -> sparse_coo, dense, dense, dense, dense, dense}
    data_type : x
-  view : (mean -> mean_out), (variance -> variance_out)
+  inplace : (mean -> mean_out), (variance -> variance_out)
  backward : batch_norm_grad

 - op : cast

--- a/paddle/phi/core/compat/arg_map_context.h
+++ b/paddle/phi/core/compat/arg_map_context.h
@@ -108,8 +108,12 @@ class ArgumentMappingContext {
  virtual bool IsDenseTensorInput(const std::string& name) const = 0;
  virtual bool IsDenseTensorInputs(const std::string& name) const = 0;
  virtual bool IsSelectedRowsInput(const std::string& name) const = 0;
+
  virtual bool IsSparseCooTensorInput(const std::string& name) const = 0;
  virtual bool IsSparseCsrTensorInput(const std::string& name) const = 0;
+
+  virtual bool IsSelectedRowsInputs(const std::string& name) const = 0;
+
  // For compatibility with LoDTensorArray
  virtual bool IsDenseTensorVectorInput(const std::string& name) const = 0;


--- a/paddle/phi/core/kernel_registry.h
+++ b/paddle/phi/core/kernel_registry.h
@@ -100,6 +100,24 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
                              default_tensor_layout,
                              default_key.dtype(),
                              arg_type);
+      } else if (arg_type == std::type_index(typeid(
+                                 const std::vector<const SelectedRows*>&))) {
+        args_def->AppendInput(default_key.backend(),
+                              default_tensor_layout,
+                              default_key.dtype(),
+                              arg_type);
+      } else if (arg_type == std::type_index(typeid(
+                                 const std::vector<const TensorBase*>&))) {
+        args_def->AppendInput(default_key.backend(),
+                              default_tensor_layout,
+                              default_key.dtype(),
+                              arg_type);
+      } else if (arg_type == std::type_index(typeid(
+                                 const std::vector<const TensorArray*>&))) {
+        args_def->AppendInput(default_key.backend(),
+                              default_tensor_layout,
+                              default_key.dtype(),
+                              arg_type);
      } else if (arg_type == std::type_index(typeid(const SelectedRows&))) {
        args_def->AppendInput(default_key.backend(),
                              default_tensor_layout,

--- a/paddle/phi/core/kernel_utils.h
+++ b/paddle/phi/core/kernel_utils.h
@@ -270,6 +270,8 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
  PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor);
  PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SelectedRows);
  PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor);
+  PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(TensorBase);
+  PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(SelectedRows);
  PD_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRows);
  PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_MULTI_INPUT(DenseTensor);


--- a/paddle/phi/core/meta_tensor.cc
+++ b/paddle/phi/core/meta_tensor.cc
@@ -39,7 +39,11 @@ int64_t MetaTensor::numel() const {

 DDim MetaTensor::dims() const {
  ValidCheck(*this);
-  return tensor_->dims();
+  if (phi::SelectedRows::classof(tensor_)) {
+    return static_cast<SelectedRows*>(tensor_)->GetCompleteDims();
+  } else {
+    return tensor_->dims();
+  }
 }

 DataType MetaTensor::dtype() const {
@@ -61,9 +65,7 @@ void MetaTensor::set_dims(const DDim& dims) {
    StringTensorUtils::GetMutableMeta(static_cast<StringTensor*>(tensor_))
        ->dims = dims;
  } else if (phi::SelectedRows::classof(tensor_)) {
-    DenseTensorUtils::GetMutableMeta(
-        static_cast<SelectedRows*>(tensor_)->mutable_value())
-        ->dims = dims;
+    static_cast<SelectedRows*>(tensor_)->set_height(dims[0]);
  } else if (phi::SparseCooTensor::classof(tensor_)) {
    DenseTensorUtils::GetMutableMeta(static_cast<SparseCooTensor*>(tensor_))
        ->dims = dims;
@@ -164,7 +166,13 @@ void MetaTensor::share_meta(const MetaTensor& meta_tensor) {
  }
 }

+TensorBase* MetaTensor::tensor() const { return tensor_; }
+
 bool MetaTensor::is_dense() const { return DenseTensor::classof(tensor_); }
+bool MetaTensor::is_selected_rows() const {
+  return SelectedRows::classof(tensor_);
+}
+
 bool MetaTensor::is_tensor_array() const { return false; }

 void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
@@ -174,7 +182,6 @@ void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
  bool is_sparse_coo = phi::SparseCooTensor::classof(tensor_);
  bool is_sparse_csr = phi::SparseCsrTensor::classof(tensor_);
  if (is_dense_tensor || is_selected_rows || is_sparse_coo || is_sparse_csr) {
-    set_dims(meta_tensor.dims());
    if (is_selected_rows) {
      const auto in_tensor_base = meta_tensor.tensor();
      PADDLE_ENFORCE_EQ(
@@ -186,6 +193,11 @@ void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
      auto* selected_rows_in = static_cast<SelectedRows*>(in_tensor_base);
      selected_rows_out->set_rows(selected_rows_in->rows());
      selected_rows_out->set_height(selected_rows_in->height());
+      DenseTensorUtils::GetMutableMeta(
+          static_cast<SelectedRows*>(tensor_)->mutable_value())
+          ->dims = selected_rows_in->mutable_value()->dims();
+    } else {
+      set_dims(meta_tensor.dims());
    }
  } else {
    PADDLE_THROW(phi::errors::Unimplemented(
@@ -212,6 +224,4 @@ const LoD& MetaTensor::lod() const {
  }
 }

-TensorBase* MetaTensor::tensor() const { return tensor_; }
-
 }  // namespace phi
--- a/paddle/phi/core/meta_tensor.h
+++ b/paddle/phi/core/meta_tensor.h
@@ -68,9 +68,9 @@ class MetaTensor {

  virtual bool initialized() const;

+  virtual bool is_selected_rows() const;
  virtual bool is_dense() const;
-  // TODO(YuanRisheng) This API is for compatible with
-  // Fluid
+  // TODO(YuanRisheng) This API is for compatible with Fluid
  //  and it will be deleted in the future.
  virtual bool is_tensor_array() const;


--- a/paddle/phi/core/selected_rows.h
+++ b/paddle/phi/core/selected_rows.h
@@ -132,10 +132,7 @@ class SelectedRows : public TensorBase,

  /// \brief Returns the dims of the tensor.
  /// \return The dims of the tensor.
-  const DDim& dims() const noexcept override {
-    return impl_->dims();
-    // return phi::make_ddim(dims);
-  }
+  const DDim& dims() const noexcept override { return impl_->dims(); }

  /// \brief Returns the data type of the tensor.
  /// \return The data type of the tensor.

--- a/paddle/phi/core/utils/data_type.h
+++ b/paddle/phi/core/utils/data_type.h
@@ -19,8 +19,6 @@ limitations under the License. */

 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/core/enforce.h"
-#include "paddle/phi/kernels/funcs/eigen/extensions.h"
-
 namespace phi {

 #define _PhiForEachDataTypeHelper_(callback, cpp_type, data_type) \

--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -301,6 +301,10 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
  phi::DDim in_dim({0});
  for (size_t i = 0; i < x.size(); ++i) {
    auto x_dim = x[i]->dims();
+    // x_dim.size() == 1 means the real dim of selected rows is [0]
+    if (x[i]->is_selected_rows() && x_dim.size() == 1) {
+      continue;
+    }
    if (phi::product(x_dim) == 0) {
      continue;
    }
@@ -355,6 +359,31 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
  out->share_lod(*x[0]);
 }

+// TODO(YuanRisheng) This InferMeta is used in Fluid
+//                   and will be deleted in the future.
+void AddNTensorArrayInferMeta(const std::vector<const MetaTensor*>& x,
+                              MetaTensor* out,
+                              MetaConfig config) {
+  int64_t max_length = 0;
+  bool has_tensor_array = false;
+  for (auto input : x) {
+    if (input->is_tensor_array()) {
+      has_tensor_array = true;
+      // if input is lod_tensor_array, dims() will return its size (one element)
+      max_length =
+          input->dims()[0] > max_length ? input->dims()[0] : max_length;
+    }
+  }
+
+  if (has_tensor_array) {
+    if (out->is_tensor_array()) {
+      out->set_dims(make_ddim({max_length}));
+    }
+  } else {
+    AddNInferMeta(x, out, config);
+  }
+}
+
 void AucInferMeta(const MetaTensor& input,
                  const MetaTensor& label,
                  const MetaTensor& stat_pos,
@@ -2161,6 +2190,14 @@ void MultiplexInferMeta(const std::vector<const MetaTensor*>& ins,
        phi::errors::PreconditionNotMet(
            "All the candidate tensors must have the same size."));
  }
+
+  PADDLE_ENFORCE_GE(
+      in_dim[0],
+      ids_dim[0],
+      phi::errors::InvalidArgument("The 2nd-dim of input cannot be smaller "
+                                   "than batchSize of the index tensor."));
+
+  in_dim[0] = ids_dim[0];
  out->set_dims(in_dim);
  out->set_dtype(ins[0]->dtype());
 }

--- a/paddle/phi/infermeta/multiary.h
+++ b/paddle/phi/infermeta/multiary.h
@@ -123,6 +123,10 @@ void AddNInferMeta(const std::vector<const MetaTensor*>& x,
                   MetaTensor* out,
                   MetaConfig config = MetaConfig());

+void AddNTensorArrayInferMeta(const std::vector<const MetaTensor*>& x,
+                              MetaTensor* out,
+                              MetaConfig config);
+
 void AucInferMeta(const MetaTensor& input,
                  const MetaTensor& label,
                  const MetaTensor& stat_pos,

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -3184,11 +3184,11 @@ void FillSplitOutDims(const MetaTensor& x,
      (*out)[i]->set_dtype(x.dtype());
      (*out)[i]->set_dims(out_dims[i]);
      (*out)[i]->set_layout(x.layout());
+      (*out)[i]->share_lod(x);
    } else {
      (*out)[i]->set_dtype(x.dtype());
      (*out)[i]->set_dims(out_dims[i]);
      (*out)[i]->set_layout(x.layout());
-      (*out)[i]->share_lod(x);
    }
  }
 }
@@ -3219,11 +3219,11 @@ void SplitInferMeta(const MetaTensor& x,
        out[i]->set_dtype(x.dtype());
        out[i]->set_dims(out_dims[i]);
        out[i]->set_layout(x.layout());
+        out[i]->share_lod(x);
      } else {
        out[i]->set_dtype(x.dtype());
        out[i]->set_dims(out_dims[i]);
        out[i]->set_layout(x.layout());
-        out[i]->share_lod(x);
      }
    }
  } else {
@@ -3310,11 +3310,11 @@ void SplitWithNumInferMeta(const MetaTensor& x,
        out[i]->set_dtype(x.dtype());
        out[i]->set_dims(out_dims[i]);
        out[i]->set_layout(x.layout());
+        out[i]->share_lod(x);
      } else {
        out[i]->set_dtype(x.dtype());
        out[i]->set_dims(out_dims[i]);
        out[i]->set_layout(x.layout());
-        out[i]->share_lod(x);
      }
    }
  } else {

--- a/paddle/phi/kernels/add_n_kernel.h
+++ b/paddle/phi/kernels/add_n_kernel.h
@@ -15,12 +15,20 @@
 #pragma once

 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/tensor_array.h"

 namespace phi {

+// Note(YuanRisheng): std::vector<const TensorBase*> shouldn't be widely used in
+// PHI. Here, we use it to be compatible with Fluid.
 template <typename T, typename Context>
 void AddNKernel(const Context& dev_ctx,
-                const std::vector<const DenseTensor*>& x,
+                const std::vector<const TensorBase*>& x,
                DenseTensor* out);

+template <typename T, typename Context>
+void AddNArrayKernel(const Context& dev_ctx,
+                     const std::vector<const TensorArray*>& x,
+                     TensorArray* out);
+
 }  // namespace phi
--- a/paddle/phi/kernels/cpu/add_n_kernel.cc
+++ b/paddle/phi/kernels/cpu/add_n_kernel.cc
@@ -12,24 +12,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/phi/kernels/add_n_kernel.h"
-
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/funcs/eigen/common.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/impl/add_n_kernel_impl.h"

 namespace phi {

 template <typename T, typename Context>
 void AddNKernel(const Context& dev_ctx,
-                const std::vector<const DenseTensor*>& x,
+                const std::vector<const TensorBase*>& x,
                DenseTensor* out) {
  size_t in_num = x.size();
-  bool in_place = out == x[0];
-  auto* out_ptr = dev_ctx.template Alloc<T>(out);
-  if (in_num >= 1 && x[0]->initialized()) {
-    if (x[0]->numel() > 0) {
-      in_place = (x[0]->data<T>() == out_ptr);
+  dev_ctx.template Alloc<T>(out);
+
+  bool in_place = false;
+  if (x.size() > 0 && x[0]->initialized() && DenseTensor::classof(x[0])) {
+    if ((static_cast<const DenseTensor*>(x[0]))->Holder() == out->Holder()) {
+      in_place = true;
    }
  }

@@ -37,9 +34,11 @@ void AddNKernel(const Context& dev_ctx,
  auto& place = *dev_ctx.eigen_device();
  int start = in_place ? 1 : 0;
  if (!in_place) {
-    if ((in_num >= 2) && x[0]->initialized() && x[1]->initialized()) {
-      auto& in_0 = *x[0];
-      auto& in_1 = *x[1];
+    if ((in_num >= 2) && DenseTensor::classof(x[0]) &&
+        DenseTensor::classof(x[1]) && x[0]->initialized() &&
+        x[1]->initialized()) {
+      auto& in_0 = *(static_cast<const DenseTensor*>(x[0]));
+      auto& in_1 = *(static_cast<const DenseTensor*>(x[1]));
      if (in_0.numel() && in_1.numel()) {
        auto in_0_e = EigenVector<T>::Flatten(in_0);
        auto in_1_e = EigenVector<T>::Flatten(in_1);
@@ -49,20 +48,33 @@ void AddNKernel(const Context& dev_ctx,
    }
    if (start != 2) {
      VLOG(10) << "Fill with constant = 0 in sum kernel.";
-      funcs::SetConstant<Context, T> constant_functor;
+      phi::funcs::SetConstant<Context, T> constant_functor;
      constant_functor(dev_ctx, out, static_cast<T>(0));
    }
  }

+  paddle::operators::math::SelectedRowsAddToTensor<Context, T> functor;
  // If in_place, just skip the first tensor
  for (size_t i = start; i < in_num; i++) {
-    auto& in_t = *x[i];
-    if (!in_t.initialized() || in_t.numel() == 0) {
-      continue;
+    if (DenseTensor::classof(x[i])) {
+      auto& in_t = *(static_cast<const DenseTensor*>(x[i]));
+      if (!in_t.initialized() || in_t.numel() == 0) {
+        continue;
+      }
+      auto in = EigenVector<T>::Flatten(in_t);
+      result.device(place) = result + in;
+    } else if (SelectedRows::classof(x[i])) {
+      auto& in_t = *(static_cast<const SelectedRows*>(x[i]));
+      functor(dev_ctx, in_t, out);
+    } else {
+      PADDLE_THROW(phi::errors::InvalidArgument(
+          "Expected type of Input(X) of %d-th must be Tensor, "
+          "SelectedRows. But got "
+          "unsupport type: %s.",
+          x[i]->type_info().name()));
    }
-    auto in = EigenVector<T>::Flatten(in_t);
-    result.device(place) = result + in;
  }
+  VLOG(10) << "end add_n kernel";
 }

 }  // namespace phi
@@ -76,3 +88,13 @@ PD_REGISTER_KERNEL(add_n,
                   int,
                   phi::dtype::bfloat16,
                   int64_t) {}
+
+PD_REGISTER_KERNEL(add_n_array,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::AddNArrayKernel,
+                   float,
+                   double,
+                   int,
+                   phi::dtype::bfloat16,
+                   int64_t) {}
--- a/paddle/phi/kernels/cpu/multiplex_kernel.cc
+++ b/paddle/phi/kernels/cpu/multiplex_kernel.cc
@@ -37,7 +37,7 @@ void MultiplexKernel(const Context& ctx,
  auto rows = ins[0]->dims()[0];
  auto cols = ins[0]->numel() / rows;
  auto index = ids.data<int32_t>();
-  for (auto i = 0; i < rows; i++) {
+  for (auto i = 0; i < ids.dims()[0]; i++) {
    int32_t k = index[i];
    PADDLE_ENFORCE_GE(
        k, 0, errors::PreconditionNotMet("index must be nonnegative."));

--- a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h
+++ b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h
@@ -20,6 +20,7 @@
 #include "paddle/phi/kernels/funcs/math_function.h"

 DECLARE_bool(enable_cublas_tensor_op_math);
+DECLARE_bool(gemm_use_half_precision_compute_type);

 namespace phi {
 namespace funcs {
@@ -720,6 +721,14 @@ inline void Blas<phi::GPUContext>::GEMM(CBLAS_TRANSPOSE transA,
  float h_alpha = static_cast<float>(alpha);
  float h_beta = static_cast<float>(beta);

+  rocblas_datatype compute_type = rocblas_datatype_f32_r;
+
+  if (FLAGS_gemm_use_half_precision_compute_type == true) {
+    compute_type = rocblas_datatype_f16_r;
+  }
+  VLOG(4) << "use_half_precision_compute_type: "
+            << FLAGS_gemm_use_half_precision_compute_type;
+
  auto &cuda_ctx = const_cast<phi::GPUContext &>(context_);
  CUBlas<phi::dtype::float16>::GEMM_EX(&cuda_ctx,
                                       cuTransB,
@@ -738,7 +747,7 @@ inline void Blas<phi::GPUContext>::GEMM(CBLAS_TRANSPOSE transA,
                                       C,
                                       rocblas_datatype_f16_r,
                                       N,
-                                       rocblas_datatype_f32_r);
+                                       compute_type);
 }

 template <>

--- a/paddle/phi/kernels/funcs/eigen/eigen_function.h
+++ b/paddle/phi/kernels/funcs/eigen/eigen_function.h
@@ -18,6 +18,8 @@ limitations under the License. */
 #ifndef NOMINMAX
 #define NOMINMAX
 #endif
+
+#include "paddle/phi/kernels/funcs/eigen/extensions.h"
 #include "unsupported/Eigen/CXX11/Tensor"

 namespace phi {