#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/amp_auto_cast.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#pragma GCC diagnostic ignored "-Wunused-variable"

std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> fused_elemwise_add_activation_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fused_elemwise_add_activation dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fused_elemwise_add_activation";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fused_elemwise_add_activation", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fused_elemwise_add_activation");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "fused_elemwise_add_activation");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fused_elemwise_add_activation_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "IntermediateOut", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fused_elemwise_add_activation", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor IntermediateOut;
  egr::EagerUtils::GetOutput(outs["IntermediateOut"][0], &IntermediateOut);

  {
    paddle::platform::RecordEvent node_creation_record_event("fused_elemwise_add_activation node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_IntermediateOut = egr::EagerUtils::autograd_meta(&IntermediateOut);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fused_elemwise_add_activation "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_IntermediateOut);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fused_elemwise_add_activationGradNodeCompat>(new fused_elemwise_add_activationGradNodeCompat(2, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_IntermediateOut, 1);
      grad_node->SetGradInMeta(IntermediateOut, 1);

    }
  }

  return std::make_tuple(Out,IntermediateOut);

}


std::vector<paddle::experimental::Tensor> pull_sparse_v2_dygraph_function(const std::vector<paddle::experimental::Tensor>& Ids,const std::vector<paddle::experimental::Tensor>& W, size_t OutNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pull_sparse_v2 dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pull_sparse_v2";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { Ids,W };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pull_sparse_v2", amp_tensors_vector);

    auto NEW_Ids = egr::AmpAutoCasts("Ids", Ids, amp_dst_dtype, "pull_sparse_v2");
    auto NEW_W = egr::AmpAutoCasts("W", W, amp_dst_dtype, "pull_sparse_v2");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pull_sparse_v2_dygraph_function( NEW_Ids, NEW_W, OutNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Ids", egr::EagerUtils::TrySyncToVars(Ids) },{ "W", egr::EagerUtils::TrySyncToVars(W) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::CreateVars(OutNum) } };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_Ids = egr::EagerUtils::nullable_autograd_meta(Ids);
  std::vector<egr::AutogradMeta*> p_autograd_W = egr::EagerUtils::nullable_autograd_meta(W);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_Ids, &p_autograd_W);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pull_sparse_v2", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> Out;
  egr::EagerUtils::GetOutputs(outs["Out"], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pull_sparse_v2 node_creation", paddle::platform::TracerEventType::Operator, 1);
    std::vector<egr::AutogradMeta*> p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pull_sparse_v2 "; 
      egr::EagerUtils::PassStopGradient(false, &p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pull_sparse_v2GradNodeCompat>(new pull_sparse_v2GradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIds(Ids);
      grad_node->SetTensorWrapperW(W);

      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(&p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,std::vector<paddle::experimental::Tensor>,std::vector<paddle::experimental::Tensor>> einsum_dygraph_function(const std::vector<paddle::experimental::Tensor>& Operands, size_t InnerCacheNum, size_t XShapeNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("einsum dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: einsum";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { Operands };

    auto amp_dst_dtype = egr::GetAmpDestDtype("einsum", amp_tensors_vector);

    auto NEW_Operands = egr::AmpAutoCasts("Operands", Operands, amp_dst_dtype, "einsum");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return einsum_dygraph_function( NEW_Operands, InnerCacheNum, XShapeNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Operands", egr::EagerUtils::TrySyncToVars(Operands) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "InnerCache", egr::EagerUtils::CreateVars(InnerCacheNum) },{ "XShape", egr::EagerUtils::CreateVars(XShapeNum) } };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_Operands = egr::EagerUtils::nullable_autograd_meta(Operands);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_Operands);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("einsum", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  std::vector<paddle::experimental::Tensor> InnerCache;
  egr::EagerUtils::GetOutputs(outs["InnerCache"], &InnerCache);
  std::vector<paddle::experimental::Tensor> XShape;
  egr::EagerUtils::GetOutputs(outs["XShape"], &XShape);

  {
    paddle::platform::RecordEvent node_creation_record_event("einsum node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    std::vector<egr::AutogradMeta*> p_autograd_InnerCache = egr::EagerUtils::autograd_meta(&InnerCache);
    std::vector<egr::AutogradMeta*> p_autograd_XShape = egr::EagerUtils::autograd_meta(&XShape);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for einsum "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, &p_autograd_InnerCache, &p_autograd_XShape);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<einsumGradNodeCompat>(new einsumGradNodeCompat(3, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperInnerCache(InnerCache);
      grad_node->SetTensorWrapperXShape(XShape);

      grad_node->SetGradOutMeta(Operands, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_InnerCache, 1);
      grad_node->SetGradInMeta(InnerCache, 1);
      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_XShape, 2);
      grad_node->SetGradInMeta(XShape, 2);

    }
  }

  return std::make_tuple(Out,InnerCache,XShape);

}


paddle::experimental::Tensor frobenius_norm_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("frobenius_norm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: frobenius_norm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("frobenius_norm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "frobenius_norm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return frobenius_norm_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("frobenius_norm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("frobenius_norm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for frobenius_norm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<frobenius_normGradNodeCompat>(new frobenius_normGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor crop_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y,const paddle::experimental::Tensor& Offsets, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("crop dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: crop";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(Y.initialized()) amp_tensors_vector.push_back({ Y });
    if(Offsets.initialized()) amp_tensors_vector.push_back({ Offsets });

    auto amp_dst_dtype = egr::GetAmpDestDtype("crop", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "crop");
    auto NEW_Y = ((Y.initialized()) ? egr::AmpAutoCast("Y", Y, amp_dst_dtype, "crop") : Y);
    auto NEW_Offsets = ((Offsets.initialized()) ? egr::AmpAutoCast("Offsets", Offsets, amp_dst_dtype, "crop") : Offsets);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return crop_dygraph_function( NEW_X, NEW_Y, NEW_Offsets, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(Y.initialized()) ins["Y"] = egr::EagerUtils::TrySyncToVars(Y);
  if(Offsets.initialized()) ins["Offsets"] = egr::EagerUtils::TrySyncToVars(Offsets);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);
  egr::AutogradMeta* p_autograd_Offsets = egr::EagerUtils::nullable_autograd_meta(Offsets);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y, p_autograd_Offsets);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("crop", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("crop node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for crop "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<cropGradNodeCompat>(new cropGradNodeCompat(1, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOffsets(Offsets);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> cross_entropy2_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Label, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("cross_entropy2 dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: cross_entropy2";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Label} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("cross_entropy2", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "cross_entropy2");
    auto NEW_Label = egr::AmpAutoCast("Label", Label, amp_dst_dtype, "cross_entropy2");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return cross_entropy2_dygraph_function( NEW_X, NEW_Label, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Label", egr::EagerUtils::TrySyncToVars(Label) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Y", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "XShape", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "MatchX", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Label = egr::EagerUtils::nullable_autograd_meta(Label);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Label);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("cross_entropy2", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Y;
  egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
  paddle::experimental::Tensor XShape;
  egr::EagerUtils::GetOutput(outs["XShape"][0], &XShape);
  paddle::experimental::Tensor MatchX;
  egr::EagerUtils::GetOutput(outs["MatchX"][0], &MatchX);

  {
    paddle::platform::RecordEvent node_creation_record_event("cross_entropy2 node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y);
    egr::AutogradMeta* p_autograd_XShape = egr::EagerUtils::autograd_meta(&XShape);
    egr::AutogradMeta* p_autograd_MatchX = egr::EagerUtils::autograd_meta(&MatchX);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for cross_entropy2 "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Y, p_autograd_XShape, p_autograd_MatchX);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<cross_entropy2GradNodeCompat>(new cross_entropy2GradNodeCompat(3, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperLabel(Label);
      grad_node->SetTensorWrapperMatchX(MatchX);
      grad_node->SetTensorWrapperXShape(XShape);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 0);
      egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
      grad_node->SetGradInMeta(Y, 0);
      egr::EagerUtils::CheckAndRetainGrad(Y);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_XShape, 1);
      egr::EagerUtils::SetHistory(p_autograd_XShape, grad_node);
      grad_node->SetGradInMeta(XShape, 1);
      egr::EagerUtils::CheckAndRetainGrad(XShape);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_MatchX, 2);
      egr::EagerUtils::SetHistory(p_autograd_MatchX, grad_node);
      grad_node->SetGradInMeta(MatchX, 2);
      egr::EagerUtils::CheckAndRetainGrad(MatchX);

    }
  }

  return std::make_tuple(Y,XShape,MatchX);

}


paddle::experimental::Tensor skip_layernorm_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y,const paddle::experimental::Tensor& Scale,const paddle::experimental::Tensor& Bias, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("skip_layernorm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: skip_layernorm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y},{Scale},{Bias} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("skip_layernorm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "skip_layernorm");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "skip_layernorm");
    auto NEW_Scale = egr::AmpAutoCast("Scale", Scale, amp_dst_dtype, "skip_layernorm");
    auto NEW_Bias = egr::AmpAutoCast("Bias", Bias, amp_dst_dtype, "skip_layernorm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return skip_layernorm_dygraph_function( NEW_X, NEW_Y, NEW_Scale, NEW_Bias, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) },{ "Scale", egr::EagerUtils::TrySyncToVars(Scale) },{ "Bias", egr::EagerUtils::TrySyncToVars(Bias) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("skip_layernorm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> tdm_child_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& TreeInfo, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("tdm_child dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: tdm_child";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{TreeInfo} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("tdm_child", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "tdm_child");
    auto NEW_TreeInfo = egr::AmpAutoCast("TreeInfo", TreeInfo, amp_dst_dtype, "tdm_child");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return tdm_child_dygraph_function( NEW_X, NEW_TreeInfo, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "TreeInfo", egr::EagerUtils::TrySyncToVars(TreeInfo) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Child", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "LeafMask", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("tdm_child", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Child;
  egr::EagerUtils::GetOutput(outs["Child"][0], &Child);
  paddle::experimental::Tensor LeafMask;
  egr::EagerUtils::GetOutput(outs["LeafMask"][0], &LeafMask);


  return std::make_tuple(Child,LeafMask);

}


paddle::experimental::Tensor fused_embedding_seq_pool_dygraph_function(const paddle::experimental::Tensor& W,const paddle::experimental::Tensor& Ids, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fused_embedding_seq_pool dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fused_embedding_seq_pool";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {W},{Ids} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fused_embedding_seq_pool", amp_tensors_vector);

    auto NEW_W = egr::AmpAutoCast("W", W, amp_dst_dtype, "fused_embedding_seq_pool");
    auto NEW_Ids = egr::AmpAutoCast("Ids", Ids, amp_dst_dtype, "fused_embedding_seq_pool");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fused_embedding_seq_pool_dygraph_function( NEW_W, NEW_Ids, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "W", egr::EagerUtils::TrySyncToVars(W) },{ "Ids", egr::EagerUtils::TrySyncToVars(Ids) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_W = egr::EagerUtils::nullable_autograd_meta(W);
  egr::AutogradMeta* p_autograd_Ids = egr::EagerUtils::nullable_autograd_meta(Ids);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_W, p_autograd_Ids);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fused_embedding_seq_pool", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("fused_embedding_seq_pool node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fused_embedding_seq_pool "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fused_embedding_seq_poolGradNodeCompat>(new fused_embedding_seq_poolGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIds(Ids);
      grad_node->SetTensorWrapperW(W);

      grad_node->SetGradOutMeta(W, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> kthvalue_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("kthvalue dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: kthvalue";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("kthvalue", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "kthvalue");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return kthvalue_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Indices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("kthvalue", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Indices;
  egr::EagerUtils::GetOutput(outs["Indices"][0], &Indices);

  {
    paddle::platform::RecordEvent node_creation_record_event("kthvalue node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Indices = egr::EagerUtils::autograd_meta(&Indices);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for kthvalue "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Indices);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<kthvalueGradNodeCompat>(new kthvalueGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndices(Indices);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Indices, 1);
      egr::EagerUtils::SetHistory(p_autograd_Indices, grad_node);
      grad_node->SetGradInMeta(Indices, 1);
      egr::EagerUtils::CheckAndRetainGrad(Indices);

    }
  }

  return std::make_tuple(Out,Indices);

}


paddle::experimental::Tensor erf_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("erf dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: erf";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("erf", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "erf");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return erf_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("erf", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("erf node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for erf "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<erfGradNodeCompat>(new erfGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> yolo_box_post_dygraph_function(const paddle::experimental::Tensor& Boxes0,const paddle::experimental::Tensor& Boxes1,const paddle::experimental::Tensor& Boxes2,const paddle::experimental::Tensor& ImageShape,const paddle::experimental::Tensor& ImageScale, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("yolo_box_post dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: yolo_box_post";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Boxes0},{Boxes1},{Boxes2},{ImageShape},{ImageScale} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("yolo_box_post", amp_tensors_vector);

    auto NEW_Boxes0 = egr::AmpAutoCast("Boxes0", Boxes0, amp_dst_dtype, "yolo_box_post");
    auto NEW_Boxes1 = egr::AmpAutoCast("Boxes1", Boxes1, amp_dst_dtype, "yolo_box_post");
    auto NEW_Boxes2 = egr::AmpAutoCast("Boxes2", Boxes2, amp_dst_dtype, "yolo_box_post");
    auto NEW_ImageShape = egr::AmpAutoCast("ImageShape", ImageShape, amp_dst_dtype, "yolo_box_post");
    auto NEW_ImageScale = egr::AmpAutoCast("ImageScale", ImageScale, amp_dst_dtype, "yolo_box_post");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return yolo_box_post_dygraph_function( NEW_Boxes0, NEW_Boxes1, NEW_Boxes2, NEW_ImageShape, NEW_ImageScale, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Boxes0", egr::EagerUtils::TrySyncToVars(Boxes0) },{ "Boxes1", egr::EagerUtils::TrySyncToVars(Boxes1) },{ "Boxes2", egr::EagerUtils::TrySyncToVars(Boxes2) },{ "ImageShape", egr::EagerUtils::TrySyncToVars(ImageShape) },{ "ImageScale", egr::EagerUtils::TrySyncToVars(ImageScale) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "NmsRoisNum", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("yolo_box_post", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor NmsRoisNum;
  egr::EagerUtils::GetOutput(outs["NmsRoisNum"][0], &NmsRoisNum);


  return std::make_tuple(Out,NmsRoisNum);

}


paddle::experimental::Tensor logsumexp_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("logsumexp dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: logsumexp";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("logsumexp", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "logsumexp");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return logsumexp_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("logsumexp", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("logsumexp node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for logsumexp "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<logsumexpGradNodeCompat>(new logsumexpGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor trilinear_interp_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& OutSize, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("trilinear_interp dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: trilinear_interp";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(OutSize.initialized()) amp_tensors_vector.push_back({ OutSize });

    auto amp_dst_dtype = egr::GetAmpDestDtype("trilinear_interp", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "trilinear_interp");
    auto NEW_OutSize = ((OutSize.initialized()) ? egr::AmpAutoCast("OutSize", OutSize, amp_dst_dtype, "trilinear_interp") : OutSize);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return trilinear_interp_dygraph_function( NEW_X, NEW_OutSize, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(OutSize.initialized()) ins["OutSize"] = egr::EagerUtils::TrySyncToVars(OutSize);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_OutSize = egr::EagerUtils::nullable_autograd_meta(OutSize);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_OutSize);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("trilinear_interp", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("trilinear_interp node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for trilinear_interp "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<trilinear_interpGradNodeCompat>(new trilinear_interpGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOutSize(OutSize);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor fusion_seqpool_concat_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fusion_seqpool_concat dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fusion_seqpool_concat";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fusion_seqpool_concat", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "fusion_seqpool_concat");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fusion_seqpool_concat_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fusion_seqpool_concat", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor alloc_float_status_dygraph_function( const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("alloc_float_status dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: alloc_float_status";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return alloc_float_status_dygraph_function( attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = {  };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "FloatStatus", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("alloc_float_status", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor FloatStatus;
  egr::EagerUtils::GetOutput(outs["FloatStatus"][0], &FloatStatus);


  return FloatStatus;

}


paddle::experimental::Tensor sequence_concat_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sequence_concat dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sequence_concat";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("sequence_concat", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "sequence_concat");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sequence_concat_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sequence_concat", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("sequence_concat node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for sequence_concat "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<sequence_concatGradNodeCompat>(new sequence_concatGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor fusion_seqpool_cvm_concat_dygraph_function(const std::vector<paddle::experimental::Tensor>& X,const paddle::experimental::Tensor& CVM, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fusion_seqpool_cvm_concat dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fusion_seqpool_cvm_concat";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X,{CVM} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fusion_seqpool_cvm_concat", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "fusion_seqpool_cvm_concat");
    auto NEW_CVM = egr::AmpAutoCast("CVM", CVM, amp_dst_dtype, "fusion_seqpool_cvm_concat");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fusion_seqpool_cvm_concat_dygraph_function( NEW_X, NEW_CVM, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "CVM", egr::EagerUtils::TrySyncToVars(CVM) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fusion_seqpool_cvm_concat", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor unpool3d_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Indices, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("unpool3d dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: unpool3d";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Indices} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("unpool3d", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "unpool3d");
    auto NEW_Indices = egr::AmpAutoCast("Indices", Indices, amp_dst_dtype, "unpool3d");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return unpool3d_dygraph_function( NEW_X, NEW_Indices, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Indices", egr::EagerUtils::TrySyncToVars(Indices) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Indices = egr::EagerUtils::nullable_autograd_meta(Indices);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Indices);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("unpool3d", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("unpool3d node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for unpool3d "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<unpool3dGradNodeCompat>(new unpool3dGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndices(Indices);
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor similarity_focus_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("similarity_focus dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: similarity_focus";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("similarity_focus", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "similarity_focus");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return similarity_focus_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("similarity_focus", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor c_allreduce_max_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("c_allreduce_max dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: c_allreduce_max";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("c_allreduce_max", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "c_allreduce_max");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return c_allreduce_max_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("c_allreduce_max", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor c_allreduce_max__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("c_allreduce_max dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: c_allreduce_max";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("c_allreduce_max", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";


  return X;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> argsort_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("argsort dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: argsort";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("argsort", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "argsort");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return argsort_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Indices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("argsort", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Indices;
  egr::EagerUtils::GetOutput(outs["Indices"][0], &Indices);

  {
    paddle::platform::RecordEvent node_creation_record_event("argsort node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Indices = egr::EagerUtils::autograd_meta(&Indices);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for argsort "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Indices);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<argsortGradNodeCompat>(new argsortGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndices(Indices);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Indices, 1);
      egr::EagerUtils::SetHistory(p_autograd_Indices, grad_node);
      grad_node->SetGradInMeta(Indices, 1);
      egr::EagerUtils::CheckAndRetainGrad(Indices);

    }
  }

  return std::make_tuple(Out,Indices);

}


paddle::experimental::Tensor sequence_expand_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sequence_expand dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sequence_expand";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("sequence_expand", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "sequence_expand");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "sequence_expand");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sequence_expand_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sequence_expand", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("sequence_expand node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for sequence_expand "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<sequence_expandGradNodeCompat>(new sequence_expandGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> sgd_dygraph_function(const paddle::experimental::Tensor& Param,const paddle::experimental::Tensor& LearningRate,const paddle::experimental::Tensor& Grad,const paddle::experimental::Tensor& MasterParam, paddle::experimental::Tensor* ParamOutVar, paddle::experimental::Tensor* MasterParamOutVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sgd dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sgd";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Param},{LearningRate},{Grad} };
    if(MasterParam.initialized()) amp_tensors_vector.push_back({ MasterParam });

    auto amp_dst_dtype = egr::GetAmpDestDtype("sgd", amp_tensors_vector);

    auto NEW_Param = egr::AmpAutoCast("Param", Param, amp_dst_dtype, "sgd");
    auto NEW_LearningRate = egr::AmpAutoCast("LearningRate", LearningRate, amp_dst_dtype, "sgd");
    auto NEW_Grad = egr::AmpAutoCast("Grad", Grad, amp_dst_dtype, "sgd");
    auto NEW_MasterParam = ((MasterParam.initialized()) ? egr::AmpAutoCast("MasterParam", MasterParam, amp_dst_dtype, "sgd") : MasterParam);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sgd_dygraph_function( NEW_Param, NEW_LearningRate, NEW_Grad, NEW_MasterParam, ParamOutVar, MasterParamOutVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Param", egr::EagerUtils::TrySyncToVars(Param) },{ "LearningRate", egr::EagerUtils::TrySyncToVars(LearningRate) },{ "Grad", egr::EagerUtils::TrySyncToVars(Grad) } };
  if(MasterParam.initialized()) ins["MasterParam"] = egr::EagerUtils::TrySyncToVars(MasterParam);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "ParamOut", ins["Param"] } };

  if (ins.count("MasterParam")) outs["MasterParamOut"] = ins["MasterParam"];

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sgd", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  egr::EagerUtils::GetOutput(outs["ParamOut"][0], ParamOutVar);
  paddle::experimental::Tensor& ParamOut = *ParamOutVar;
  if (outs.count("MasterParamOut"))  egr::EagerUtils::GetOutput(outs["MasterParamOut"][0], MasterParamOutVar);
  paddle::experimental::Tensor& MasterParamOut = *MasterParamOutVar;


  return std::make_tuple(ParamOut,MasterParamOut);

}


paddle::experimental::Tensor exponential_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("exponential dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: exponential";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("exponential", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "exponential");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return exponential_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("exponential", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("exponential node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for exponential "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<exponentialGradNodeCompat>(new exponentialGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor exponential__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("exponential dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: exponential";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("exponential", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("exponential node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for exponential "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<exponentialGradNodeCompat>(new exponentialGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor bilinear_interp_v2_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("bilinear_interp_v2 dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: bilinear_interp_v2";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("bilinear_interp_v2", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "bilinear_interp_v2");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return bilinear_interp_v2_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("bilinear_interp_v2", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("bilinear_interp_v2 node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for bilinear_interp_v2 "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<bilinear_interp_v2GradNodeCompat>(new bilinear_interp_v2GradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor atanh_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("atanh dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: atanh";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("atanh", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "atanh");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return atanh_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("atanh", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("atanh node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for atanh "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<atanhGradNodeCompat>(new atanhGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor clip_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("clip dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: clip";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("clip", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "clip");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return clip_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("clip", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("clip node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for clip "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<clipGradNodeCompat>(new clipGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor clip__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("clip dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: clip";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("clip", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("clip node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for clip "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<clipGradNodeCompat>(new clipGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor deformable_conv_v1_dygraph_function(const paddle::experimental::Tensor& Input,const paddle::experimental::Tensor& Offset,const paddle::experimental::Tensor& Filter, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("deformable_conv_v1 dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: deformable_conv_v1";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input},{Offset},{Filter} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("deformable_conv_v1", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "deformable_conv_v1");
    auto NEW_Offset = egr::AmpAutoCast("Offset", Offset, amp_dst_dtype, "deformable_conv_v1");
    auto NEW_Filter = egr::AmpAutoCast("Filter", Filter, amp_dst_dtype, "deformable_conv_v1");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return deformable_conv_v1_dygraph_function( NEW_Input, NEW_Offset, NEW_Filter, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) },{ "Offset", egr::EagerUtils::TrySyncToVars(Offset) },{ "Filter", egr::EagerUtils::TrySyncToVars(Filter) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Output", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input = egr::EagerUtils::nullable_autograd_meta(Input);
  egr::AutogradMeta* p_autograd_Offset = egr::EagerUtils::nullable_autograd_meta(Offset);
  egr::AutogradMeta* p_autograd_Filter = egr::EagerUtils::nullable_autograd_meta(Filter);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input, p_autograd_Offset, p_autograd_Filter);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("deformable_conv_v1", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Output;
  egr::EagerUtils::GetOutput(outs["Output"][0], &Output);

  {
    paddle::platform::RecordEvent node_creation_record_event("deformable_conv_v1 node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Output = egr::EagerUtils::autograd_meta(&Output);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for deformable_conv_v1 "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Output);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<deformable_conv_v1GradNodeCompat>(new deformable_conv_v1GradNodeCompat(1, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperFilter(Filter);
      grad_node->SetTensorWrapperInput(Input);
      grad_node->SetTensorWrapperOffset(Offset);

      grad_node->SetGradOutMeta(Input, 0);
      grad_node->SetGradOutMeta(Offset, 1);
      grad_node->SetGradOutMeta(Filter, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Output, 0);
      egr::EagerUtils::SetHistory(p_autograd_Output, grad_node);
      grad_node->SetGradInMeta(Output, 0);
      egr::EagerUtils::CheckAndRetainGrad(Output);

    }
  }

  return Output;

}


paddle::experimental::Tensor hinge_loss_dygraph_function(const paddle::experimental::Tensor& Logits,const paddle::experimental::Tensor& Labels, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("hinge_loss dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: hinge_loss";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Logits},{Labels} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("hinge_loss", amp_tensors_vector);

    auto NEW_Logits = egr::AmpAutoCast("Logits", Logits, amp_dst_dtype, "hinge_loss");
    auto NEW_Labels = egr::AmpAutoCast("Labels", Labels, amp_dst_dtype, "hinge_loss");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return hinge_loss_dygraph_function( NEW_Logits, NEW_Labels, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Logits", egr::EagerUtils::TrySyncToVars(Logits) },{ "Labels", egr::EagerUtils::TrySyncToVars(Labels) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Loss", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Logits = egr::EagerUtils::nullable_autograd_meta(Logits);
  egr::AutogradMeta* p_autograd_Labels = egr::EagerUtils::nullable_autograd_meta(Labels);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Logits, p_autograd_Labels);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("hinge_loss", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Loss;
  egr::EagerUtils::GetOutput(outs["Loss"][0], &Loss);

  {
    paddle::platform::RecordEvent node_creation_record_event("hinge_loss node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Loss = egr::EagerUtils::autograd_meta(&Loss);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for hinge_loss "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Loss);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<hinge_lossGradNodeCompat>(new hinge_lossGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperLabels(Labels);
      grad_node->SetTensorWrapperLogits(Logits);

      grad_node->SetGradOutMeta(Logits, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Loss, 0);
      egr::EagerUtils::SetHistory(p_autograd_Loss, grad_node);
      grad_node->SetGradInMeta(Loss, 0);
      egr::EagerUtils::CheckAndRetainGrad(Loss);

    }
  }

  return Loss;

}


paddle::experimental::Tensor determinant_dygraph_function(const paddle::experimental::Tensor& Input, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("determinant dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: determinant";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("determinant", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "determinant");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return determinant_dygraph_function( NEW_Input, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input = egr::EagerUtils::nullable_autograd_meta(Input);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("determinant", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("determinant node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for determinant "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<determinantGradNodeCompat>(new determinantGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperInput(Input);
      grad_node->SetTensorWrapperOut(Out);

      grad_node->SetGradOutMeta(Input, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor conv2d_transpose_dygraph_function(const paddle::experimental::Tensor& Input,const paddle::experimental::Tensor& Filter, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("conv2d_transpose dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: conv2d_transpose";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input},{Filter} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("conv2d_transpose", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "conv2d_transpose");
    auto NEW_Filter = egr::AmpAutoCast("Filter", Filter, amp_dst_dtype, "conv2d_transpose");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return conv2d_transpose_dygraph_function( NEW_Input, NEW_Filter, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) },{ "Filter", egr::EagerUtils::TrySyncToVars(Filter) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Output", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input = egr::EagerUtils::nullable_autograd_meta(Input);
  egr::AutogradMeta* p_autograd_Filter = egr::EagerUtils::nullable_autograd_meta(Filter);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input, p_autograd_Filter);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("conv2d_transpose", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Output;
  egr::EagerUtils::GetOutput(outs["Output"][0], &Output);

  {
    paddle::platform::RecordEvent node_creation_record_event("conv2d_transpose node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Output = egr::EagerUtils::autograd_meta(&Output);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for conv2d_transpose "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Output);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<conv2d_transposeGradNodeCompat>(new conv2d_transposeGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperFilter(Filter);
      grad_node->SetTensorWrapperInput(Input);

      grad_node->SetGradOutMeta(Input, 0);
      grad_node->SetGradOutMeta(Filter, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Output, 0);
      egr::EagerUtils::SetHistory(p_autograd_Output, grad_node);
      grad_node->SetGradInMeta(Output, 0);
      egr::EagerUtils::CheckAndRetainGrad(Output);

    }
  }

  return Output;

}


paddle::experimental::Tensor memcpy_d2h_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("memcpy_d2h dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: memcpy_d2h";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("memcpy_d2h", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "memcpy_d2h");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return memcpy_d2h_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("memcpy_d2h", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor softsign_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("softsign dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: softsign";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("softsign", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "softsign");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return softsign_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("softsign", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("softsign node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for softsign "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<softsignGradNodeCompat>(new softsignGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> fake_quantize_dequantize_abs_max_dygraph_function(const paddle::experimental::Tensor& X, paddle::experimental::Tensor* OutVar, paddle::experimental::Tensor* OutScaleVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fake_quantize_dequantize_abs_max dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fake_quantize_dequantize_abs_max";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fake_quantize_dequantize_abs_max", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fake_quantize_dequantize_abs_max");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fake_quantize_dequantize_abs_max_dygraph_function( NEW_X, OutVar, OutScaleVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::TrySyncToVars(OutVar) },{ "OutScale", egr::EagerUtils::TrySyncToVars(OutScaleVar) } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fake_quantize_dequantize_abs_max", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  egr::EagerUtils::GetOutput(outs["Out"][0], OutVar);
  paddle::experimental::Tensor& Out = *OutVar;
  egr::EagerUtils::GetOutput(outs["OutScale"][0], OutScaleVar);
  paddle::experimental::Tensor& OutScale = *OutScaleVar;

  {
    paddle::platform::RecordEvent node_creation_record_event("fake_quantize_dequantize_abs_max node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_OutScale = egr::EagerUtils::autograd_meta(&OutScale);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fake_quantize_dequantize_abs_max "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_OutScale);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fake_quantize_dequantize_abs_maxGradNodeCompat>(new fake_quantize_dequantize_abs_maxGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_OutScale, 1);
      egr::EagerUtils::SetHistory(p_autograd_OutScale, grad_node);
      grad_node->SetGradInMeta(OutScale, 1);
      egr::EagerUtils::CheckAndRetainGrad(OutScale);

    }
  }

  return std::make_tuple(Out,OutScale);

}


std::vector<paddle::experimental::Tensor> broadcast_tensors_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, size_t OutNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("broadcast_tensors dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: broadcast_tensors";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("broadcast_tensors", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "broadcast_tensors");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return broadcast_tensors_dygraph_function( NEW_X, OutNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::CreateVars(OutNum) } };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("broadcast_tensors", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> Out;
  egr::EagerUtils::GetOutputs(outs["Out"], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("broadcast_tensors node_creation", paddle::platform::TracerEventType::Operator, 1);
    std::vector<egr::AutogradMeta*> p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for broadcast_tensors "; 
      egr::EagerUtils::PassStopGradient(false, &p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<broadcast_tensorsGradNodeCompat>(new broadcast_tensorsGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(&p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor cholesky_solve_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("cholesky_solve dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: cholesky_solve";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("cholesky_solve", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "cholesky_solve");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "cholesky_solve");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return cholesky_solve_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("cholesky_solve", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("cholesky_solve node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for cholesky_solve "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<cholesky_solveGradNodeCompat>(new cholesky_solveGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor grid_sampler_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Grid, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("grid_sampler dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: grid_sampler";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Grid} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("grid_sampler", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "grid_sampler");
    auto NEW_Grid = egr::AmpAutoCast("Grid", Grid, amp_dst_dtype, "grid_sampler");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return grid_sampler_dygraph_function( NEW_X, NEW_Grid, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Grid", egr::EagerUtils::TrySyncToVars(Grid) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Output", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Grid = egr::EagerUtils::nullable_autograd_meta(Grid);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Grid);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("grid_sampler", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Output;
  egr::EagerUtils::GetOutput(outs["Output"][0], &Output);

  {
    paddle::platform::RecordEvent node_creation_record_event("grid_sampler node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Output = egr::EagerUtils::autograd_meta(&Output);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for grid_sampler "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Output);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<grid_samplerGradNodeCompat>(new grid_samplerGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperGrid(Grid);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Grid, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Output, 0);
      egr::EagerUtils::SetHistory(p_autograd_Output, grad_node);
      grad_node->SetGradInMeta(Output, 0);
      egr::EagerUtils::CheckAndRetainGrad(Output);

    }
  }

  return Output;

}


paddle::experimental::Tensor fft_c2r_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fft_c2r dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fft_c2r";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fft_c2r", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fft_c2r");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fft_c2r_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fft_c2r", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("fft_c2r node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fft_c2r "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fft_c2rGradNodeCompat>(new fft_c2rGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> pyramid_hash_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& W,const paddle::experimental::Tensor& WhiteList,const paddle::experimental::Tensor& BlackList, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pyramid_hash dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pyramid_hash";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{W},{WhiteList},{BlackList} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pyramid_hash", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "pyramid_hash");
    auto NEW_W = egr::AmpAutoCast("W", W, amp_dst_dtype, "pyramid_hash");
    auto NEW_WhiteList = egr::AmpAutoCast("WhiteList", WhiteList, amp_dst_dtype, "pyramid_hash");
    auto NEW_BlackList = egr::AmpAutoCast("BlackList", BlackList, amp_dst_dtype, "pyramid_hash");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pyramid_hash_dygraph_function( NEW_X, NEW_W, NEW_WhiteList, NEW_BlackList, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "W", egr::EagerUtils::TrySyncToVars(W) },{ "WhiteList", egr::EagerUtils::TrySyncToVars(WhiteList) },{ "BlackList", egr::EagerUtils::TrySyncToVars(BlackList) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "DropPos", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "X_Temp_Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_W = egr::EagerUtils::nullable_autograd_meta(W);
  egr::AutogradMeta* p_autograd_WhiteList = egr::EagerUtils::nullable_autograd_meta(WhiteList);
  egr::AutogradMeta* p_autograd_BlackList = egr::EagerUtils::nullable_autograd_meta(BlackList);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_W, p_autograd_WhiteList, p_autograd_BlackList);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pyramid_hash", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor DropPos;
  egr::EagerUtils::GetOutput(outs["DropPos"][0], &DropPos);
  paddle::experimental::Tensor X_Temp_Out;
  egr::EagerUtils::GetOutput(outs["X_Temp_Out"][0], &X_Temp_Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pyramid_hash node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_DropPos = egr::EagerUtils::autograd_meta(&DropPos);
    egr::AutogradMeta* p_autograd_X_Temp_Out = egr::EagerUtils::autograd_meta(&X_Temp_Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pyramid_hash "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_DropPos, p_autograd_X_Temp_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pyramid_hashGradNodeCompat>(new pyramid_hashGradNodeCompat(3, 4));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperDropPos(DropPos);
      grad_node->SetTensorWrapperW(W);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperX_Temp_Out(X_Temp_Out);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_DropPos, 1);
      egr::EagerUtils::SetHistory(p_autograd_DropPos, grad_node);
      grad_node->SetGradInMeta(DropPos, 1);
      egr::EagerUtils::CheckAndRetainGrad(DropPos);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X_Temp_Out, 2);
      grad_node->SetGradInMeta(X_Temp_Out, 2);

    }
  }

  return std::make_tuple(Out,DropPos,X_Temp_Out);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> fake_quantize_dequantize_moving_average_abs_max_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& InScale,const paddle::experimental::Tensor& InAccum,const paddle::experimental::Tensor& InState, paddle::experimental::Tensor* OutVar, paddle::experimental::Tensor* OutScaleVar, paddle::experimental::Tensor* OutStateVar, paddle::experimental::Tensor* OutAccumVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fake_quantize_dequantize_moving_average_abs_max dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fake_quantize_dequantize_moving_average_abs_max";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{InScale} };
    if(InAccum.initialized()) amp_tensors_vector.push_back({ InAccum });
    if(InState.initialized()) amp_tensors_vector.push_back({ InState });

    auto amp_dst_dtype = egr::GetAmpDestDtype("fake_quantize_dequantize_moving_average_abs_max", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fake_quantize_dequantize_moving_average_abs_max");
    auto NEW_InScale = egr::AmpAutoCast("InScale", InScale, amp_dst_dtype, "fake_quantize_dequantize_moving_average_abs_max");
    auto NEW_InAccum = ((InAccum.initialized()) ? egr::AmpAutoCast("InAccum", InAccum, amp_dst_dtype, "fake_quantize_dequantize_moving_average_abs_max") : InAccum);
    auto NEW_InState = ((InState.initialized()) ? egr::AmpAutoCast("InState", InState, amp_dst_dtype, "fake_quantize_dequantize_moving_average_abs_max") : InState);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fake_quantize_dequantize_moving_average_abs_max_dygraph_function( NEW_X, NEW_InScale, NEW_InAccum, NEW_InState, OutVar, OutScaleVar, OutStateVar, OutAccumVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "InScale", egr::EagerUtils::TrySyncToVars(InScale) } };
  if(InAccum.initialized()) ins["InAccum"] = egr::EagerUtils::TrySyncToVars(InAccum);
  if(InState.initialized()) ins["InState"] = egr::EagerUtils::TrySyncToVars(InState);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::TrySyncToVars(OutVar) },{ "OutScale", egr::EagerUtils::TrySyncToVars(OutScaleVar) },{ "OutState", egr::EagerUtils::TrySyncToVars(OutStateVar) },{ "OutAccum", egr::EagerUtils::TrySyncToVars(OutAccumVar) } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_InScale = egr::EagerUtils::nullable_autograd_meta(InScale);
  egr::AutogradMeta* p_autograd_InAccum = egr::EagerUtils::nullable_autograd_meta(InAccum);
  egr::AutogradMeta* p_autograd_InState = egr::EagerUtils::nullable_autograd_meta(InState);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_InScale, p_autograd_InAccum, p_autograd_InState);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fake_quantize_dequantize_moving_average_abs_max", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  egr::EagerUtils::GetOutput(outs["Out"][0], OutVar);
  paddle::experimental::Tensor& Out = *OutVar;
  egr::EagerUtils::GetOutput(outs["OutScale"][0], OutScaleVar);
  paddle::experimental::Tensor& OutScale = *OutScaleVar;
  if (outs.count("OutState"))  egr::EagerUtils::GetOutput(outs["OutState"][0], OutStateVar);
  paddle::experimental::Tensor& OutState = *OutStateVar;
  if (outs.count("OutAccum"))  egr::EagerUtils::GetOutput(outs["OutAccum"][0], OutAccumVar);
  paddle::experimental::Tensor& OutAccum = *OutAccumVar;

  {
    paddle::platform::RecordEvent node_creation_record_event("fake_quantize_dequantize_moving_average_abs_max node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_OutScale = egr::EagerUtils::autograd_meta(&OutScale);
    egr::AutogradMeta* p_autograd_OutState = egr::EagerUtils::autograd_meta(&OutState);
    egr::AutogradMeta* p_autograd_OutAccum = egr::EagerUtils::autograd_meta(&OutAccum);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fake_quantize_dequantize_moving_average_abs_max "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_OutScale, p_autograd_OutState, p_autograd_OutAccum);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fake_quantize_dequantize_moving_average_abs_maxGradNodeCompat>(new fake_quantize_dequantize_moving_average_abs_maxGradNodeCompat(4, 4));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_OutScale, 1);
      egr::EagerUtils::SetHistory(p_autograd_OutScale, grad_node);
      grad_node->SetGradInMeta(OutScale, 1);
      egr::EagerUtils::CheckAndRetainGrad(OutScale);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_OutState, 2);
      egr::EagerUtils::SetHistory(p_autograd_OutState, grad_node);
      grad_node->SetGradInMeta(OutState, 2);
      egr::EagerUtils::CheckAndRetainGrad(OutState);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_OutAccum, 3);
      egr::EagerUtils::SetHistory(p_autograd_OutAccum, grad_node);
      grad_node->SetGradInMeta(OutAccum, 3);
      egr::EagerUtils::CheckAndRetainGrad(OutAccum);

    }
  }

  return std::make_tuple(Out,OutScale,OutState,OutAccum);

}


paddle::experimental::Tensor multi_dot_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("multi_dot dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: multi_dot";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("multi_dot", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "multi_dot");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return multi_dot_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("multi_dot", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("multi_dot node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for multi_dot "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<multi_dotGradNodeCompat>(new multi_dotGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> sequence_pool_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sequence_pool dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sequence_pool";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("sequence_pool", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "sequence_pool");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sequence_pool_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "MaxIndex", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sequence_pool", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor MaxIndex;
  egr::EagerUtils::GetOutput(outs["MaxIndex"][0], &MaxIndex);

  {
    paddle::platform::RecordEvent node_creation_record_event("sequence_pool node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_MaxIndex = egr::EagerUtils::autograd_meta(&MaxIndex);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for sequence_pool "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_MaxIndex);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<sequence_poolGradNodeCompat>(new sequence_poolGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_MaxIndex, 1);
      grad_node->SetGradInMeta(MaxIndex, 1);

    }
  }

  return std::make_tuple(Out,MaxIndex);

}


paddle::experimental::Tensor broadcast_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("broadcast dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: broadcast";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("broadcast", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "broadcast");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return broadcast_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("broadcast", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor transpose_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("transpose dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: transpose";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("transpose", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "transpose");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return transpose_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("transpose", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("transpose node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for transpose "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<transposeGradNodeCompat>(new transposeGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> top_k_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("top_k dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: top_k";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("top_k", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "top_k");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return top_k_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Indices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("top_k", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Indices;
  egr::EagerUtils::GetOutput(outs["Indices"][0], &Indices);

  {
    paddle::platform::RecordEvent node_creation_record_event("top_k node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Indices = egr::EagerUtils::autograd_meta(&Indices);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for top_k "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Indices);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<top_kGradNodeCompat>(new top_kGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndices(Indices);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Indices, 1);
      egr::EagerUtils::SetHistory(p_autograd_Indices, grad_node);
      grad_node->SetGradInMeta(Indices, 1);
      egr::EagerUtils::CheckAndRetainGrad(Indices);

    }
  }

  return std::make_tuple(Out,Indices);

}


paddle::experimental::Tensor renorm_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("renorm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: renorm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("renorm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "renorm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return renorm_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("renorm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("renorm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for renorm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<renormGradNodeCompat>(new renormGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor pixel_unshuffle_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pixel_unshuffle dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pixel_unshuffle";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pixel_unshuffle", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "pixel_unshuffle");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pixel_unshuffle_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pixel_unshuffle", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pixel_unshuffle node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pixel_unshuffle "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pixel_unshuffleGradNodeCompat>(new pixel_unshuffleGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor take_along_axis_dygraph_function(const paddle::experimental::Tensor& Input,const paddle::experimental::Tensor& Index, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("take_along_axis dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: take_along_axis";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input},{Index} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("take_along_axis", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "take_along_axis");
    auto NEW_Index = egr::AmpAutoCast("Index", Index, amp_dst_dtype, "take_along_axis");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return take_along_axis_dygraph_function( NEW_Input, NEW_Index, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) },{ "Index", egr::EagerUtils::TrySyncToVars(Index) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Result", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input = egr::EagerUtils::nullable_autograd_meta(Input);
  egr::AutogradMeta* p_autograd_Index = egr::EagerUtils::nullable_autograd_meta(Index);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input, p_autograd_Index);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("take_along_axis", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Result;
  egr::EagerUtils::GetOutput(outs["Result"][0], &Result);

  {
    paddle::platform::RecordEvent node_creation_record_event("take_along_axis node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Result = egr::EagerUtils::autograd_meta(&Result);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for take_along_axis "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Result);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<take_along_axisGradNodeCompat>(new take_along_axisGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndex(Index);
      grad_node->SetTensorWrapperInput(Input);

      grad_node->SetGradOutMeta(Input, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Result, 0);
      egr::EagerUtils::SetHistory(p_autograd_Result, grad_node);
      grad_node->SetGradInMeta(Result, 0);
      egr::EagerUtils::CheckAndRetainGrad(Result);

    }
  }

  return Result;

}


paddle::experimental::Tensor dist_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("dist dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: dist";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("dist", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "dist");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "dist");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return dist_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("dist", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("dist node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for dist "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<distGradNodeCompat>(new distGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor affine_grid_dygraph_function(const paddle::experimental::Tensor& Theta, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("affine_grid dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: affine_grid";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Theta} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("affine_grid", amp_tensors_vector);

    auto NEW_Theta = egr::AmpAutoCast("Theta", Theta, amp_dst_dtype, "affine_grid");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return affine_grid_dygraph_function( NEW_Theta, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Theta", egr::EagerUtils::TrySyncToVars(Theta) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Output", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Theta = egr::EagerUtils::nullable_autograd_meta(Theta);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Theta);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("affine_grid", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Output;
  egr::EagerUtils::GetOutput(outs["Output"][0], &Output);

  {
    paddle::platform::RecordEvent node_creation_record_event("affine_grid node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Output = egr::EagerUtils::autograd_meta(&Output);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for affine_grid "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Output);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<affine_gridGradNodeCompat>(new affine_gridGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(Theta, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Output, 0);
      egr::EagerUtils::SetHistory(p_autograd_Output, grad_node);
      grad_node->SetGradInMeta(Output, 0);
      egr::EagerUtils::CheckAndRetainGrad(Output);

    }
  }

  return Output;

}


paddle::experimental::Tensor gaussian_random_batch_size_like_dygraph_function(const paddle::experimental::Tensor& Input, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("gaussian_random_batch_size_like dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: gaussian_random_batch_size_like";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("gaussian_random_batch_size_like", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "gaussian_random_batch_size_like");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return gaussian_random_batch_size_like_dygraph_function( NEW_Input, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("gaussian_random_batch_size_like", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor fake_channel_wise_dequantize_max_abs_dygraph_function(const paddle::experimental::Tensor& X,const std::vector<paddle::experimental::Tensor>& Scales, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fake_channel_wise_dequantize_max_abs dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fake_channel_wise_dequantize_max_abs";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},Scales };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fake_channel_wise_dequantize_max_abs", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fake_channel_wise_dequantize_max_abs");
    auto NEW_Scales = egr::AmpAutoCasts("Scales", Scales, amp_dst_dtype, "fake_channel_wise_dequantize_max_abs");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fake_channel_wise_dequantize_max_abs_dygraph_function( NEW_X, NEW_Scales, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Scales", egr::EagerUtils::TrySyncToVars(Scales) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fake_channel_wise_dequantize_max_abs", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor reciprocal_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("reciprocal dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: reciprocal";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("reciprocal", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "reciprocal");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return reciprocal_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("reciprocal", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("reciprocal node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for reciprocal "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<reciprocalGradNodeCompat>(new reciprocalGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor reciprocal__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("reciprocal dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: reciprocal";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("reciprocal", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("reciprocal node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for reciprocal "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<reciprocalGradNodeCompat>(new reciprocalGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor sequence_mask_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& MaxLenTensor, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sequence_mask dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sequence_mask";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(MaxLenTensor.initialized()) amp_tensors_vector.push_back({ MaxLenTensor });

    auto amp_dst_dtype = egr::GetAmpDestDtype("sequence_mask", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "sequence_mask");
    auto NEW_MaxLenTensor = ((MaxLenTensor.initialized()) ? egr::AmpAutoCast("MaxLenTensor", MaxLenTensor, amp_dst_dtype, "sequence_mask") : MaxLenTensor);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sequence_mask_dygraph_function( NEW_X, NEW_MaxLenTensor, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(MaxLenTensor.initialized()) ins["MaxLenTensor"] = egr::EagerUtils::TrySyncToVars(MaxLenTensor);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Y", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sequence_mask", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Y;
  egr::EagerUtils::GetOutput(outs["Y"][0], &Y);


  return Y;

}


paddle::experimental::Tensor prune_gate_by_capacity_dygraph_function(const paddle::experimental::Tensor& GateIdx,const paddle::experimental::Tensor& ExpertCount, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("prune_gate_by_capacity dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: prune_gate_by_capacity";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {GateIdx},{ExpertCount} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("prune_gate_by_capacity", amp_tensors_vector);

    auto NEW_GateIdx = egr::AmpAutoCast("GateIdx", GateIdx, amp_dst_dtype, "prune_gate_by_capacity");
    auto NEW_ExpertCount = egr::AmpAutoCast("ExpertCount", ExpertCount, amp_dst_dtype, "prune_gate_by_capacity");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return prune_gate_by_capacity_dygraph_function( NEW_GateIdx, NEW_ExpertCount, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "GateIdx", egr::EagerUtils::TrySyncToVars(GateIdx) },{ "ExpertCount", egr::EagerUtils::TrySyncToVars(ExpertCount) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "NewGateIdx", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("prune_gate_by_capacity", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor NewGateIdx;
  egr::EagerUtils::GetOutput(outs["NewGateIdx"][0], &NewGateIdx);


  return NewGateIdx;

}


paddle::experimental::Tensor fill_diagonal_tensor_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fill_diagonal_tensor dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fill_diagonal_tensor";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fill_diagonal_tensor", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fill_diagonal_tensor");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "fill_diagonal_tensor");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fill_diagonal_tensor_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fill_diagonal_tensor", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("fill_diagonal_tensor node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fill_diagonal_tensor "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fill_diagonal_tensorGradNodeCompat>(new fill_diagonal_tensorGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor fill_diagonal_tensor__dygraph_function(paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fill_diagonal_tensor dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fill_diagonal_tensor";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fill_diagonal_tensor", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("fill_diagonal_tensor node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fill_diagonal_tensor "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<fill_diagonal_tensorGradNodeCompat>(new fill_diagonal_tensorGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor abs_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("abs dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: abs";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("abs", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "abs");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return abs_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("abs", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("abs node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for abs "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<absGradNodeCompat>(new absGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor partial_concat_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("partial_concat dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: partial_concat";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("partial_concat", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "partial_concat");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return partial_concat_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("partial_concat", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("partial_concat node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for partial_concat "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<partial_concatGradNodeCompat>(new partial_concatGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor elu_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elu dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elu";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("elu", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "elu");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return elu_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elu", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("elu node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for elu "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<eluGradNodeCompat>(new eluGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor elu__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elu dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elu";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elu", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("elu node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for elu "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<eluGradNodeCompat>(new eluGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(X);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor index_select_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Index, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("index_select dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: index_select";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Index} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("index_select", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "index_select");
    auto NEW_Index = egr::AmpAutoCast("Index", Index, amp_dst_dtype, "index_select");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return index_select_dygraph_function( NEW_X, NEW_Index, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Index", egr::EagerUtils::TrySyncToVars(Index) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Index = egr::EagerUtils::nullable_autograd_meta(Index);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Index);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("index_select", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("index_select node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for index_select "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<index_selectGradNodeCompat>(new index_selectGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndex(Index);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor row_conv_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Filter, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("row_conv dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: row_conv";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Filter} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("row_conv", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "row_conv");
    auto NEW_Filter = egr::AmpAutoCast("Filter", Filter, amp_dst_dtype, "row_conv");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return row_conv_dygraph_function( NEW_X, NEW_Filter, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Filter", egr::EagerUtils::TrySyncToVars(Filter) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Filter = egr::EagerUtils::nullable_autograd_meta(Filter);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Filter);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("row_conv", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("row_conv node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for row_conv "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<row_convGradNodeCompat>(new row_convGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperFilter(Filter);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Filter, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor cross_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("cross dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: cross";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("cross", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "cross");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "cross");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return cross_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("cross", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("cross node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for cross "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<crossGradNodeCompat>(new crossGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor elementwise_mul_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elementwise_mul dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elementwise_mul";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("elementwise_mul", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "elementwise_mul");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "elementwise_mul");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return elementwise_mul_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elementwise_mul", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("elementwise_mul node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for elementwise_mul "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<elementwise_mulGradNodeCompat>(new elementwise_mulGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> decayed_adagrad_dygraph_function(const paddle::experimental::Tensor& Param,const paddle::experimental::Tensor& Grad,const paddle::experimental::Tensor& Moment,const paddle::experimental::Tensor& LearningRate, paddle::experimental::Tensor* ParamOutVar, paddle::experimental::Tensor* MomentOutVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("decayed_adagrad dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: decayed_adagrad";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Param},{Grad},{Moment},{LearningRate} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("decayed_adagrad", amp_tensors_vector);

    auto NEW_Param = egr::AmpAutoCast("Param", Param, amp_dst_dtype, "decayed_adagrad");
    auto NEW_Grad = egr::AmpAutoCast("Grad", Grad, amp_dst_dtype, "decayed_adagrad");
    auto NEW_Moment = egr::AmpAutoCast("Moment", Moment, amp_dst_dtype, "decayed_adagrad");
    auto NEW_LearningRate = egr::AmpAutoCast("LearningRate", LearningRate, amp_dst_dtype, "decayed_adagrad");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return decayed_adagrad_dygraph_function( NEW_Param, NEW_Grad, NEW_Moment, NEW_LearningRate, ParamOutVar, MomentOutVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Param", egr::EagerUtils::TrySyncToVars(Param) },{ "Grad", egr::EagerUtils::TrySyncToVars(Grad) },{ "Moment", egr::EagerUtils::TrySyncToVars(Moment) },{ "LearningRate", egr::EagerUtils::TrySyncToVars(LearningRate) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "ParamOut", ins["Param"] },{ "MomentOut", ins["Moment"] } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("decayed_adagrad", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  egr::EagerUtils::GetOutput(outs["ParamOut"][0], ParamOutVar);
  paddle::experimental::Tensor& ParamOut = *ParamOutVar;
  egr::EagerUtils::GetOutput(outs["MomentOut"][0], MomentOutVar);
  paddle::experimental::Tensor& MomentOut = *MomentOutVar;


  return std::make_tuple(ParamOut,MomentOut);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> bipartite_match_dygraph_function(const paddle::experimental::Tensor& DistMat, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("bipartite_match dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: bipartite_match";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {DistMat} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("bipartite_match", amp_tensors_vector);

    auto NEW_DistMat = egr::AmpAutoCast("DistMat", DistMat, amp_dst_dtype, "bipartite_match");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return bipartite_match_dygraph_function( NEW_DistMat, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "DistMat", egr::EagerUtils::TrySyncToVars(DistMat) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "ColToRowMatchIndices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "ColToRowMatchDist", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("bipartite_match", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor ColToRowMatchIndices;
  egr::EagerUtils::GetOutput(outs["ColToRowMatchIndices"][0], &ColToRowMatchIndices);
  paddle::experimental::Tensor ColToRowMatchDist;
  egr::EagerUtils::GetOutput(outs["ColToRowMatchDist"][0], &ColToRowMatchDist);


  return std::make_tuple(ColToRowMatchIndices,ColToRowMatchDist);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> fake_quantize_moving_average_abs_max_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& InScale, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fake_quantize_moving_average_abs_max dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fake_quantize_moving_average_abs_max";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{InScale} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fake_quantize_moving_average_abs_max", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fake_quantize_moving_average_abs_max");
    auto NEW_InScale = egr::AmpAutoCast("InScale", InScale, amp_dst_dtype, "fake_quantize_moving_average_abs_max");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fake_quantize_moving_average_abs_max_dygraph_function( NEW_X, NEW_InScale, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "InScale", egr::EagerUtils::TrySyncToVars(InScale) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "OutScale", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fake_quantize_moving_average_abs_max", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor OutScale;
  egr::EagerUtils::GetOutput(outs["OutScale"][0], &OutScale);


  return std::make_tuple(Out,OutScale);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> mine_hard_examples_dygraph_function(const paddle::experimental::Tensor& ClsLoss,const paddle::experimental::Tensor& MatchIndices,const paddle::experimental::Tensor& MatchDist, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("mine_hard_examples dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: mine_hard_examples";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {ClsLoss},{MatchIndices},{MatchDist} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("mine_hard_examples", amp_tensors_vector);

    auto NEW_ClsLoss = egr::AmpAutoCast("ClsLoss", ClsLoss, amp_dst_dtype, "mine_hard_examples");
    auto NEW_MatchIndices = egr::AmpAutoCast("MatchIndices", MatchIndices, amp_dst_dtype, "mine_hard_examples");
    auto NEW_MatchDist = egr::AmpAutoCast("MatchDist", MatchDist, amp_dst_dtype, "mine_hard_examples");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return mine_hard_examples_dygraph_function( NEW_ClsLoss, NEW_MatchIndices, NEW_MatchDist, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "ClsLoss", egr::EagerUtils::TrySyncToVars(ClsLoss) },{ "MatchIndices", egr::EagerUtils::TrySyncToVars(MatchIndices) },{ "MatchDist", egr::EagerUtils::TrySyncToVars(MatchDist) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "NegIndices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "UpdatedMatchIndices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("mine_hard_examples", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor NegIndices;
  egr::EagerUtils::GetOutput(outs["NegIndices"][0], &NegIndices);
  paddle::experimental::Tensor UpdatedMatchIndices;
  egr::EagerUtils::GetOutput(outs["UpdatedMatchIndices"][0], &UpdatedMatchIndices);


  return std::make_tuple(NegIndices,UpdatedMatchIndices);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> target_assign_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& MatchIndices, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("target_assign dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: target_assign";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{MatchIndices} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("target_assign", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "target_assign");
    auto NEW_MatchIndices = egr::AmpAutoCast("MatchIndices", MatchIndices, amp_dst_dtype, "target_assign");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return target_assign_dygraph_function( NEW_X, NEW_MatchIndices, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "MatchIndices", egr::EagerUtils::TrySyncToVars(MatchIndices) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "OutWeight", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("target_assign", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor OutWeight;
  egr::EagerUtils::GetOutput(outs["OutWeight"][0], &OutWeight);


  return std::make_tuple(Out,OutWeight);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> lstm_dygraph_function(const paddle::experimental::Tensor& Input,const paddle::experimental::Tensor& Weight,const paddle::experimental::Tensor& Bias, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("lstm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: lstm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input},{Weight},{Bias} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("lstm", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "lstm");
    auto NEW_Weight = egr::AmpAutoCast("Weight", Weight, amp_dst_dtype, "lstm");
    auto NEW_Bias = egr::AmpAutoCast("Bias", Bias, amp_dst_dtype, "lstm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return lstm_dygraph_function( NEW_Input, NEW_Weight, NEW_Bias, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) },{ "Weight", egr::EagerUtils::TrySyncToVars(Weight) },{ "Bias", egr::EagerUtils::TrySyncToVars(Bias) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Hidden", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Cell", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "BatchGate", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "BatchCellPreAct", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input = egr::EagerUtils::nullable_autograd_meta(Input);
  egr::AutogradMeta* p_autograd_Weight = egr::EagerUtils::nullable_autograd_meta(Weight);
  egr::AutogradMeta* p_autograd_Bias = egr::EagerUtils::nullable_autograd_meta(Bias);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input, p_autograd_Weight, p_autograd_Bias);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("lstm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Hidden;
  egr::EagerUtils::GetOutput(outs["Hidden"][0], &Hidden);
  paddle::experimental::Tensor Cell;
  egr::EagerUtils::GetOutput(outs["Cell"][0], &Cell);
  paddle::experimental::Tensor BatchGate;
  egr::EagerUtils::GetOutput(outs["BatchGate"][0], &BatchGate);
  paddle::experimental::Tensor BatchCellPreAct;
  egr::EagerUtils::GetOutput(outs["BatchCellPreAct"][0], &BatchCellPreAct);

  {
    paddle::platform::RecordEvent node_creation_record_event("lstm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Hidden = egr::EagerUtils::autograd_meta(&Hidden);
    egr::AutogradMeta* p_autograd_Cell = egr::EagerUtils::autograd_meta(&Cell);
    egr::AutogradMeta* p_autograd_BatchGate = egr::EagerUtils::autograd_meta(&BatchGate);
    egr::AutogradMeta* p_autograd_BatchCellPreAct = egr::EagerUtils::autograd_meta(&BatchCellPreAct);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for lstm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Hidden, p_autograd_Cell, p_autograd_BatchGate, p_autograd_BatchCellPreAct);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<lstmGradNodeCompat>(new lstmGradNodeCompat(4, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperBatchCellPreAct(BatchCellPreAct);
      grad_node->SetTensorWrapperBatchGate(BatchGate);
      grad_node->SetTensorWrapperBias(Bias);
      grad_node->SetTensorWrapperCell(Cell);
      grad_node->SetTensorWrapperHidden(Hidden);
      grad_node->SetTensorWrapperInput(Input);
      grad_node->SetTensorWrapperWeight(Weight);

      grad_node->SetGradOutMeta(Input, 0);
      grad_node->SetGradOutMeta(Weight, 1);
      grad_node->SetGradOutMeta(Bias, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Hidden, 0);
      egr::EagerUtils::SetHistory(p_autograd_Hidden, grad_node);
      grad_node->SetGradInMeta(Hidden, 0);
      egr::EagerUtils::CheckAndRetainGrad(Hidden);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Cell, 1);
      egr::EagerUtils::SetHistory(p_autograd_Cell, grad_node);
      grad_node->SetGradInMeta(Cell, 1);
      egr::EagerUtils::CheckAndRetainGrad(Cell);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_BatchGate, 2);
      grad_node->SetGradInMeta(BatchGate, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_BatchCellPreAct, 3);
      grad_node->SetGradInMeta(BatchCellPreAct, 3);

    }
  }

  return std::make_tuple(Hidden,Cell,BatchGate,BatchCellPreAct);

}


paddle::experimental::Tensor assign_pos_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& cum_count,const paddle::experimental::Tensor& eff_num_len, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("assign_pos dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: assign_pos";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{cum_count},{eff_num_len} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("assign_pos", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "assign_pos");
    auto NEW_cum_count = egr::AmpAutoCast("cum_count", cum_count, amp_dst_dtype, "assign_pos");
    auto NEW_eff_num_len = egr::AmpAutoCast("eff_num_len", eff_num_len, amp_dst_dtype, "assign_pos");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return assign_pos_dygraph_function( NEW_X, NEW_cum_count, NEW_eff_num_len, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "cum_count", egr::EagerUtils::TrySyncToVars(cum_count) },{ "eff_num_len", egr::EagerUtils::TrySyncToVars(eff_num_len) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("assign_pos", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor truncated_gaussian_random_dygraph_function( const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("truncated_gaussian_random dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: truncated_gaussian_random";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return truncated_gaussian_random_dygraph_function( attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = {  };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("truncated_gaussian_random", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> match_matrix_tensor_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y,const paddle::experimental::Tensor& W, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("match_matrix_tensor dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: match_matrix_tensor";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y},{W} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("match_matrix_tensor", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "match_matrix_tensor");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "match_matrix_tensor");
    auto NEW_W = egr::AmpAutoCast("W", W, amp_dst_dtype, "match_matrix_tensor");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return match_matrix_tensor_dygraph_function( NEW_X, NEW_Y, NEW_W, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) },{ "W", egr::EagerUtils::TrySyncToVars(W) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Tmp", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);
  egr::AutogradMeta* p_autograd_W = egr::EagerUtils::nullable_autograd_meta(W);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y, p_autograd_W);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("match_matrix_tensor", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Tmp;
  egr::EagerUtils::GetOutput(outs["Tmp"][0], &Tmp);

  {
    paddle::platform::RecordEvent node_creation_record_event("match_matrix_tensor node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Tmp = egr::EagerUtils::autograd_meta(&Tmp);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for match_matrix_tensor "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Tmp);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<match_matrix_tensorGradNodeCompat>(new match_matrix_tensorGradNodeCompat(2, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperTmp(Tmp);
      grad_node->SetTensorWrapperW(W);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      grad_node->SetGradOutMeta(W, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Tmp, 1);
      egr::EagerUtils::SetHistory(p_autograd_Tmp, grad_node);
      grad_node->SetGradInMeta(Tmp, 1);
      egr::EagerUtils::CheckAndRetainGrad(Tmp);

    }
  }

  return std::make_tuple(Out,Tmp);

}


paddle::experimental::Tensor elementwise_div_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elementwise_div dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elementwise_div";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("elementwise_div", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "elementwise_div");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "elementwise_div");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return elementwise_div_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elementwise_div", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("elementwise_div node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for elementwise_div "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<elementwise_divGradNodeCompat>(new elementwise_divGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}



