#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/amp_auto_cast.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#pragma GCC diagnostic ignored "-Wunused-variable"

paddle::experimental::Tensor pad3d_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pad3d dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pad3d";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pad3d", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "pad3d");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pad3d_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pad3d", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pad3d node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pad3d "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pad3dGradNodeCompat>(new pad3dGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> norm_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("norm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: norm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("norm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "norm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return norm_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Norm", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("norm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Norm;
  egr::EagerUtils::GetOutput(outs["Norm"][0], &Norm);
  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("norm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Norm = egr::EagerUtils::autograd_meta(&Norm);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for norm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Norm, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<normGradNodeCompat>(new normGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperNorm(Norm);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Norm, 0);
      grad_node->SetGradInMeta(Norm, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 1);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 1);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return std::make_tuple(Norm,Out);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> viterbi_decode_dygraph_function(const paddle::experimental::Tensor& Input,const paddle::experimental::Tensor& Transition,const paddle::experimental::Tensor& Length, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("viterbi_decode dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: viterbi_decode";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input},{Transition},{Length} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("viterbi_decode", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "viterbi_decode");
    auto NEW_Transition = egr::AmpAutoCast("Transition", Transition, amp_dst_dtype, "viterbi_decode");
    auto NEW_Length = egr::AmpAutoCast("Length", Length, amp_dst_dtype, "viterbi_decode");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return viterbi_decode_dygraph_function( NEW_Input, NEW_Transition, NEW_Length, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) },{ "Transition", egr::EagerUtils::TrySyncToVars(Transition) },{ "Length", egr::EagerUtils::TrySyncToVars(Length) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Scores", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Path", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("viterbi_decode", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Scores;
  egr::EagerUtils::GetOutput(outs["Scores"][0], &Scores);
  paddle::experimental::Tensor Path;
  egr::EagerUtils::GetOutput(outs["Path"][0], &Path);


  return std::make_tuple(Scores,Path);

}


paddle::experimental::Tensor mish_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("mish dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: mish";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("mish", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "mish");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return mish_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("mish", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("mish node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for mish "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<mishGradNodeCompat>(new mishGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor box_coder_dygraph_function(const paddle::experimental::Tensor& PriorBox,const paddle::experimental::Tensor& PriorBoxVar,const paddle::experimental::Tensor& TargetBox, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("box_coder dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: box_coder";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {PriorBox},{TargetBox} };
    if(PriorBoxVar.initialized()) amp_tensors_vector.push_back({ PriorBoxVar });

    auto amp_dst_dtype = egr::GetAmpDestDtype("box_coder", amp_tensors_vector);

    auto NEW_PriorBox = egr::AmpAutoCast("PriorBox", PriorBox, amp_dst_dtype, "box_coder");
    auto NEW_TargetBox = egr::AmpAutoCast("TargetBox", TargetBox, amp_dst_dtype, "box_coder");
    auto NEW_PriorBoxVar = ((PriorBoxVar.initialized()) ? egr::AmpAutoCast("PriorBoxVar", PriorBoxVar, amp_dst_dtype, "box_coder") : PriorBoxVar);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return box_coder_dygraph_function( NEW_PriorBox, NEW_PriorBoxVar, NEW_TargetBox, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "PriorBox", egr::EagerUtils::TrySyncToVars(PriorBox) },{ "TargetBox", egr::EagerUtils::TrySyncToVars(TargetBox) } };
  if(PriorBoxVar.initialized()) ins["PriorBoxVar"] = egr::EagerUtils::TrySyncToVars(PriorBoxVar);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "OutputBox", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("box_coder", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor OutputBox;
  egr::EagerUtils::GetOutput(outs["OutputBox"][0], &OutputBox);


  return OutputBox;

}


paddle::experimental::Tensor flatten_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("flatten dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: flatten";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("flatten", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "flatten");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return flatten_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("flatten", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("flatten node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for flatten "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<flattenGradNodeCompat>(new flattenGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor flatten__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("flatten dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: flatten";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("flatten", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("flatten node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for flatten "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<flattenGradNodeCompat>(new flattenGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor elementwise_mod_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elementwise_mod dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elementwise_mod";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("elementwise_mod", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "elementwise_mod");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "elementwise_mod");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return elementwise_mod_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elementwise_mod", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> margin_cross_entropy_dygraph_function(const paddle::experimental::Tensor& Logits,const paddle::experimental::Tensor& Label, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("margin_cross_entropy dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: margin_cross_entropy";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Logits},{Label} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("margin_cross_entropy", amp_tensors_vector);

    auto NEW_Logits = egr::AmpAutoCast("Logits", Logits, amp_dst_dtype, "margin_cross_entropy");
    auto NEW_Label = egr::AmpAutoCast("Label", Label, amp_dst_dtype, "margin_cross_entropy");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return margin_cross_entropy_dygraph_function( NEW_Logits, NEW_Label, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Logits", egr::EagerUtils::TrySyncToVars(Logits) },{ "Label", egr::EagerUtils::TrySyncToVars(Label) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Softmax", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Loss", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Logits = egr::EagerUtils::nullable_autograd_meta(Logits);
  egr::AutogradMeta* p_autograd_Label = egr::EagerUtils::nullable_autograd_meta(Label);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Logits, p_autograd_Label);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("margin_cross_entropy", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Softmax;
  egr::EagerUtils::GetOutput(outs["Softmax"][0], &Softmax);
  paddle::experimental::Tensor Loss;
  egr::EagerUtils::GetOutput(outs["Loss"][0], &Loss);

  {
    paddle::platform::RecordEvent node_creation_record_event("margin_cross_entropy node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Softmax = egr::EagerUtils::autograd_meta(&Softmax);
    egr::AutogradMeta* p_autograd_Loss = egr::EagerUtils::autograd_meta(&Loss);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for margin_cross_entropy "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Softmax, p_autograd_Loss);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<margin_cross_entropyGradNodeCompat>(new margin_cross_entropyGradNodeCompat(2, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperLabel(Label);
      grad_node->SetTensorWrapperLogits(Logits);
      grad_node->SetTensorWrapperSoftmax(Softmax);

      grad_node->SetGradOutMeta(Logits, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Softmax, 0);
      egr::EagerUtils::SetHistory(p_autograd_Softmax, grad_node);
      grad_node->SetGradInMeta(Softmax, 0);
      egr::EagerUtils::CheckAndRetainGrad(Softmax);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Loss, 1);
      egr::EagerUtils::SetHistory(p_autograd_Loss, grad_node);
      grad_node->SetGradInMeta(Loss, 1);
      egr::EagerUtils::CheckAndRetainGrad(Loss);

    }
  }

  return std::make_tuple(Softmax,Loss);

}


std::vector<paddle::experimental::Tensor> pull_sparse_dygraph_function(const std::vector<paddle::experimental::Tensor>& Ids,const std::vector<paddle::experimental::Tensor>& W, size_t OutNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pull_sparse dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pull_sparse";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { Ids,W };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pull_sparse", amp_tensors_vector);

    auto NEW_Ids = egr::AmpAutoCasts("Ids", Ids, amp_dst_dtype, "pull_sparse");
    auto NEW_W = egr::AmpAutoCasts("W", W, amp_dst_dtype, "pull_sparse");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pull_sparse_dygraph_function( NEW_Ids, NEW_W, OutNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Ids", egr::EagerUtils::TrySyncToVars(Ids) },{ "W", egr::EagerUtils::TrySyncToVars(W) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::CreateVars(OutNum) } };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_Ids = egr::EagerUtils::nullable_autograd_meta(Ids);
  std::vector<egr::AutogradMeta*> p_autograd_W = egr::EagerUtils::nullable_autograd_meta(W);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_Ids, &p_autograd_W);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pull_sparse", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> Out;
  egr::EagerUtils::GetOutputs(outs["Out"], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pull_sparse node_creation", paddle::platform::TracerEventType::Operator, 1);
    std::vector<egr::AutogradMeta*> p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pull_sparse "; 
      egr::EagerUtils::PassStopGradient(false, &p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pull_sparseGradNodeCompat>(new pull_sparseGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIds(Ids);
      grad_node->SetTensorWrapperW(W);

      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(&p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor logical_and_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("logical_and dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: logical_and";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("logical_and", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "logical_and");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "logical_and");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return logical_and_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("logical_and", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor pow_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pow dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pow";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pow", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "pow");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pow_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pow", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pow node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pow "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<powGradNodeCompat>(new powGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor dirichlet_dygraph_function(const paddle::experimental::Tensor& Alpha, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("dirichlet dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: dirichlet";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Alpha} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("dirichlet", amp_tensors_vector);

    auto NEW_Alpha = egr::AmpAutoCast("Alpha", Alpha, amp_dst_dtype, "dirichlet");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return dirichlet_dygraph_function( NEW_Alpha, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Alpha", egr::EagerUtils::TrySyncToVars(Alpha) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("dirichlet", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor stanh_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("stanh dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: stanh";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("stanh", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "stanh");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return stanh_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("stanh", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("stanh node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for stanh "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<stanhGradNodeCompat>(new stanhGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor label_smooth_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& PriorDist, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("label_smooth dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: label_smooth";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(PriorDist.initialized()) amp_tensors_vector.push_back({ PriorDist });

    auto amp_dst_dtype = egr::GetAmpDestDtype("label_smooth", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "label_smooth");
    auto NEW_PriorDist = ((PriorDist.initialized()) ? egr::AmpAutoCast("PriorDist", PriorDist, amp_dst_dtype, "label_smooth") : PriorDist);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return label_smooth_dygraph_function( NEW_X, NEW_PriorDist, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(PriorDist.initialized()) ins["PriorDist"] = egr::EagerUtils::TrySyncToVars(PriorDist);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_PriorDist = egr::EagerUtils::nullable_autograd_meta(PriorDist);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_PriorDist);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("label_smooth", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("label_smooth node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for label_smooth "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<label_smoothGradNodeCompat>(new label_smoothGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor fold_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fold dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fold";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fold", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fold");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fold_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Y", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fold", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Y;
  egr::EagerUtils::GetOutput(outs["Y"][0], &Y);

  {
    paddle::platform::RecordEvent node_creation_record_event("fold node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for fold "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Y);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<foldGradNodeCompat>(new foldGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 0);
      egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
      grad_node->SetGradInMeta(Y, 0);
      egr::EagerUtils::CheckAndRetainGrad(Y);

    }
  }

  return Y;

}


std::tuple<std::vector<paddle::experimental::Tensor>,std::vector<paddle::experimental::Tensor>,std::vector<paddle::experimental::Tensor>> merged_momentum_dygraph_function(const std::vector<paddle::experimental::Tensor>& Param,const std::vector<paddle::experimental::Tensor>& Grad,const std::vector<paddle::experimental::Tensor>& Velocity,const std::vector<paddle::experimental::Tensor>& LearningRate,const std::vector<paddle::experimental::Tensor>& MasterParam, std::vector<paddle::experimental::Tensor*>& ParamOutVar, std::vector<paddle::experimental::Tensor*>& VelocityOutVar, std::vector<paddle::experimental::Tensor*>& MasterParamOutVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("merged_momentum dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: merged_momentum";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { Param,Grad,Velocity,LearningRate };
    if(MasterParam.size() > 0) amp_tensors_vector.push_back(MasterParam);

    auto amp_dst_dtype = egr::GetAmpDestDtype("merged_momentum", amp_tensors_vector);

    auto NEW_Param = egr::AmpAutoCasts("Param", Param, amp_dst_dtype, "merged_momentum");
    auto NEW_Grad = egr::AmpAutoCasts("Grad", Grad, amp_dst_dtype, "merged_momentum");
    auto NEW_Velocity = egr::AmpAutoCasts("Velocity", Velocity, amp_dst_dtype, "merged_momentum");
    auto NEW_LearningRate = egr::AmpAutoCasts("LearningRate", LearningRate, amp_dst_dtype, "merged_momentum");
    auto NEW_MasterParam = ((MasterParam.size() > 0) ? egr::AmpAutoCasts("MasterParam", MasterParam, amp_dst_dtype, "merged_momentum") : MasterParam);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return merged_momentum_dygraph_function( NEW_Param, NEW_Grad, NEW_Velocity, NEW_LearningRate, NEW_MasterParam, ParamOutVar, VelocityOutVar, MasterParamOutVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Param", egr::EagerUtils::TrySyncToVars(Param) },{ "Grad", egr::EagerUtils::TrySyncToVars(Grad) },{ "Velocity", egr::EagerUtils::TrySyncToVars(Velocity) },{ "LearningRate", egr::EagerUtils::TrySyncToVars(LearningRate) } };
  if(MasterParam.size() > 0) ins["MasterParam"] = egr::EagerUtils::TrySyncToVars(MasterParam);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "ParamOut", ins["Param"] },{ "VelocityOut", ins["Velocity"] } };

  if (ins.count("MasterParam")) outs["MasterParamOut"] = ins["MasterParam"];

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("merged_momentum", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> ParamOut;
  egr::EagerUtils::GetOutputs(outs["ParamOut"], ParamOutVar);
  egr::EagerUtils::Output2Result(ParamOutVar, &ParamOut);
  std::vector<paddle::experimental::Tensor> VelocityOut;
  egr::EagerUtils::GetOutputs(outs["VelocityOut"], VelocityOutVar);
  egr::EagerUtils::Output2Result(VelocityOutVar, &VelocityOut);
  std::vector<paddle::experimental::Tensor> MasterParamOut;
  if (outs.count("MasterParamOut"))  egr::EagerUtils::GetOutputs(outs["MasterParamOut"], MasterParamOutVar);
  egr::EagerUtils::Output2Result(MasterParamOutVar, &MasterParamOut);


  return std::make_tuple(ParamOut,VelocityOut,MasterParamOut);

}


paddle::experimental::Tensor c_reduce_min_dygraph_function(const paddle::experimental::Tensor& X, paddle::experimental::Tensor* OutVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("c_reduce_min dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: c_reduce_min";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("c_reduce_min", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "c_reduce_min");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return c_reduce_min_dygraph_function( NEW_X, OutVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::TrySyncToVars(OutVar) } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("c_reduce_min", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  egr::EagerUtils::GetOutput(outs["Out"][0], OutVar);
  paddle::experimental::Tensor& Out = *OutVar;


  return Out;

}


std::vector<paddle::experimental::Tensor> ascend_trigger_dygraph_function(const std::vector<paddle::experimental::Tensor>& FeedList, size_t FetchListNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("ascend_trigger dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: ascend_trigger";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { FeedList };

    auto amp_dst_dtype = egr::GetAmpDestDtype("ascend_trigger", amp_tensors_vector);

    auto NEW_FeedList = egr::AmpAutoCasts("FeedList", FeedList, amp_dst_dtype, "ascend_trigger");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return ascend_trigger_dygraph_function( NEW_FeedList, FetchListNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "FeedList", egr::EagerUtils::TrySyncToVars(FeedList) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "FetchList", egr::EagerUtils::CreateVars(FetchListNum) } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("ascend_trigger", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> FetchList;
  egr::EagerUtils::GetOutputs(outs["FetchList"], &FetchList);


  return FetchList;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> rpn_target_assign_dygraph_function(const paddle::experimental::Tensor& Anchor,const paddle::experimental::Tensor& GtBoxes,const paddle::experimental::Tensor& IsCrowd,const paddle::experimental::Tensor& ImInfo, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("rpn_target_assign dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: rpn_target_assign";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Anchor},{GtBoxes},{IsCrowd},{ImInfo} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("rpn_target_assign", amp_tensors_vector);

    auto NEW_Anchor = egr::AmpAutoCast("Anchor", Anchor, amp_dst_dtype, "rpn_target_assign");
    auto NEW_GtBoxes = egr::AmpAutoCast("GtBoxes", GtBoxes, amp_dst_dtype, "rpn_target_assign");
    auto NEW_IsCrowd = egr::AmpAutoCast("IsCrowd", IsCrowd, amp_dst_dtype, "rpn_target_assign");
    auto NEW_ImInfo = egr::AmpAutoCast("ImInfo", ImInfo, amp_dst_dtype, "rpn_target_assign");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return rpn_target_assign_dygraph_function( NEW_Anchor, NEW_GtBoxes, NEW_IsCrowd, NEW_ImInfo, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Anchor", egr::EagerUtils::TrySyncToVars(Anchor) },{ "GtBoxes", egr::EagerUtils::TrySyncToVars(GtBoxes) },{ "IsCrowd", egr::EagerUtils::TrySyncToVars(IsCrowd) },{ "ImInfo", egr::EagerUtils::TrySyncToVars(ImInfo) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "LocationIndex", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "ScoreIndex", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "TargetBBox", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "TargetLabel", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "BBoxInsideWeight", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("rpn_target_assign", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor LocationIndex;
  egr::EagerUtils::GetOutput(outs["LocationIndex"][0], &LocationIndex);
  paddle::experimental::Tensor ScoreIndex;
  egr::EagerUtils::GetOutput(outs["ScoreIndex"][0], &ScoreIndex);
  paddle::experimental::Tensor TargetBBox;
  egr::EagerUtils::GetOutput(outs["TargetBBox"][0], &TargetBBox);
  paddle::experimental::Tensor TargetLabel;
  egr::EagerUtils::GetOutput(outs["TargetLabel"][0], &TargetLabel);
  paddle::experimental::Tensor BBoxInsideWeight;
  egr::EagerUtils::GetOutput(outs["BBoxInsideWeight"][0], &BBoxInsideWeight);


  return std::make_tuple(LocationIndex,ScoreIndex,TargetBBox,TargetLabel,BBoxInsideWeight);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> roi_perspective_transform_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& ROIs, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("roi_perspective_transform dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: roi_perspective_transform";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{ROIs} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("roi_perspective_transform", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "roi_perspective_transform");
    auto NEW_ROIs = egr::AmpAutoCast("ROIs", ROIs, amp_dst_dtype, "roi_perspective_transform");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return roi_perspective_transform_dygraph_function( NEW_X, NEW_ROIs, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "ROIs", egr::EagerUtils::TrySyncToVars(ROIs) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Mask", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "TransformMatrix", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Out2InIdx", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Out2InWeights", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_ROIs = egr::EagerUtils::nullable_autograd_meta(ROIs);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_ROIs);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("roi_perspective_transform", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Mask;
  egr::EagerUtils::GetOutput(outs["Mask"][0], &Mask);
  paddle::experimental::Tensor TransformMatrix;
  egr::EagerUtils::GetOutput(outs["TransformMatrix"][0], &TransformMatrix);
  paddle::experimental::Tensor Out2InIdx;
  egr::EagerUtils::GetOutput(outs["Out2InIdx"][0], &Out2InIdx);
  paddle::experimental::Tensor Out2InWeights;
  egr::EagerUtils::GetOutput(outs["Out2InWeights"][0], &Out2InWeights);

  {
    paddle::platform::RecordEvent node_creation_record_event("roi_perspective_transform node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Mask = egr::EagerUtils::autograd_meta(&Mask);
    egr::AutogradMeta* p_autograd_TransformMatrix = egr::EagerUtils::autograd_meta(&TransformMatrix);
    egr::AutogradMeta* p_autograd_Out2InIdx = egr::EagerUtils::autograd_meta(&Out2InIdx);
    egr::AutogradMeta* p_autograd_Out2InWeights = egr::EagerUtils::autograd_meta(&Out2InWeights);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for roi_perspective_transform "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Mask, p_autograd_TransformMatrix, p_autograd_Out2InIdx, p_autograd_Out2InWeights);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<roi_perspective_transformGradNodeCompat>(new roi_perspective_transformGradNodeCompat(5, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut2InIdx(Out2InIdx);
      grad_node->SetTensorWrapperOut2InWeights(Out2InWeights);
      grad_node->SetTensorWrapperROIs(ROIs);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Mask, 1);
      egr::EagerUtils::SetHistory(p_autograd_Mask, grad_node);
      grad_node->SetGradInMeta(Mask, 1);
      egr::EagerUtils::CheckAndRetainGrad(Mask);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_TransformMatrix, 2);
      egr::EagerUtils::SetHistory(p_autograd_TransformMatrix, grad_node);
      grad_node->SetGradInMeta(TransformMatrix, 2);
      egr::EagerUtils::CheckAndRetainGrad(TransformMatrix);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out2InIdx, 3);
      grad_node->SetGradInMeta(Out2InIdx, 3);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out2InWeights, 4);
      grad_node->SetGradInMeta(Out2InWeights, 4);

    }
  }

  return std::make_tuple(Out,Mask,TransformMatrix,Out2InIdx,Out2InWeights);

}


paddle::experimental::Tensor expand_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& ExpandTimes, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("expand dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: expand";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(ExpandTimes.initialized()) amp_tensors_vector.push_back({ ExpandTimes });

    auto amp_dst_dtype = egr::GetAmpDestDtype("expand", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "expand");
    auto NEW_ExpandTimes = ((ExpandTimes.initialized()) ? egr::AmpAutoCast("ExpandTimes", ExpandTimes, amp_dst_dtype, "expand") : ExpandTimes);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return expand_dygraph_function( NEW_X, NEW_ExpandTimes, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(ExpandTimes.initialized()) ins["ExpandTimes"] = egr::EagerUtils::TrySyncToVars(ExpandTimes);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_ExpandTimes = egr::EagerUtils::nullable_autograd_meta(ExpandTimes);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_ExpandTimes);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("expand", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("expand node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for expand "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<expandGradNodeCompat>(new expandGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperExpandTimes(ExpandTimes);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor prroi_pool_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& ROIs,const paddle::experimental::Tensor& BatchRoINums, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("prroi_pool dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: prroi_pool";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{ROIs} };
    if(BatchRoINums.initialized()) amp_tensors_vector.push_back({ BatchRoINums });

    auto amp_dst_dtype = egr::GetAmpDestDtype("prroi_pool", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "prroi_pool");
    auto NEW_ROIs = egr::AmpAutoCast("ROIs", ROIs, amp_dst_dtype, "prroi_pool");
    auto NEW_BatchRoINums = ((BatchRoINums.initialized()) ? egr::AmpAutoCast("BatchRoINums", BatchRoINums, amp_dst_dtype, "prroi_pool") : BatchRoINums);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return prroi_pool_dygraph_function( NEW_X, NEW_ROIs, NEW_BatchRoINums, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "ROIs", egr::EagerUtils::TrySyncToVars(ROIs) } };
  if(BatchRoINums.initialized()) ins["BatchRoINums"] = egr::EagerUtils::TrySyncToVars(BatchRoINums);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_ROIs = egr::EagerUtils::nullable_autograd_meta(ROIs);
  egr::AutogradMeta* p_autograd_BatchRoINums = egr::EagerUtils::nullable_autograd_meta(BatchRoINums);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_ROIs, p_autograd_BatchRoINums);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("prroi_pool", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("prroi_pool node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for prroi_pool "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<prroi_poolGradNodeCompat>(new prroi_poolGradNodeCompat(1, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperBatchRoINums(BatchRoINums);
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperROIs(ROIs);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(ROIs, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor pool3d_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pool3d dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pool3d";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pool3d", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "pool3d");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pool3d_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pool3d", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pool3d node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pool3d "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pool3dGradNodeCompat>(new pool3dGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor memcpy_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("memcpy dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: memcpy";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("memcpy", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "memcpy");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return memcpy_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("memcpy", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<std::vector<paddle::experimental::Tensor>,paddle::experimental::Tensor,std::vector<paddle::experimental::Tensor>> distribute_fpn_proposals_dygraph_function(const paddle::experimental::Tensor& FpnRois,const paddle::experimental::Tensor& RoisNum, size_t MultiFpnRoisNum, size_t MultiLevelRoIsNumNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("distribute_fpn_proposals dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: distribute_fpn_proposals";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {FpnRois} };
    if(RoisNum.initialized()) amp_tensors_vector.push_back({ RoisNum });

    auto amp_dst_dtype = egr::GetAmpDestDtype("distribute_fpn_proposals", amp_tensors_vector);

    auto NEW_FpnRois = egr::AmpAutoCast("FpnRois", FpnRois, amp_dst_dtype, "distribute_fpn_proposals");
    auto NEW_RoisNum = ((RoisNum.initialized()) ? egr::AmpAutoCast("RoisNum", RoisNum, amp_dst_dtype, "distribute_fpn_proposals") : RoisNum);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return distribute_fpn_proposals_dygraph_function( NEW_FpnRois, NEW_RoisNum, MultiFpnRoisNum, MultiLevelRoIsNumNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "FpnRois", egr::EagerUtils::TrySyncToVars(FpnRois) } };
  if(RoisNum.initialized()) ins["RoisNum"] = egr::EagerUtils::TrySyncToVars(RoisNum);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "MultiFpnRois", egr::EagerUtils::CreateVars(MultiFpnRoisNum) },{ "RestoreIndex", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "MultiLevelRoIsNum", egr::EagerUtils::CreateVars(MultiLevelRoIsNumNum) } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("distribute_fpn_proposals", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> MultiFpnRois;
  egr::EagerUtils::GetOutputs(outs["MultiFpnRois"], &MultiFpnRois);
  paddle::experimental::Tensor RestoreIndex;
  egr::EagerUtils::GetOutput(outs["RestoreIndex"][0], &RestoreIndex);
  std::vector<paddle::experimental::Tensor> MultiLevelRoIsNum;
  egr::EagerUtils::GetOutputs(outs["MultiLevelRoIsNum"], &MultiLevelRoIsNum);


  return std::make_tuple(MultiFpnRois,RestoreIndex,MultiLevelRoIsNum);

}


paddle::experimental::Tensor frame_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("frame dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: frame";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("frame", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "frame");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return frame_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("frame", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("frame node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for frame "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<frameGradNodeCompat>(new frameGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor bincount_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Weights, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("bincount dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: bincount";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(Weights.initialized()) amp_tensors_vector.push_back({ Weights });

    auto amp_dst_dtype = egr::GetAmpDestDtype("bincount", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "bincount");
    auto NEW_Weights = ((Weights.initialized()) ? egr::AmpAutoCast("Weights", Weights, amp_dst_dtype, "bincount") : Weights);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return bincount_dygraph_function( NEW_X, NEW_Weights, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(Weights.initialized()) ins["Weights"] = egr::EagerUtils::TrySyncToVars(Weights);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("bincount", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor shape_dygraph_function(const paddle::experimental::Tensor& Input, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("shape dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: shape";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("shape", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "shape");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return shape_dygraph_function( NEW_Input, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("shape", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> mode_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("mode dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: mode";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("mode", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "mode");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return mode_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Indices", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("mode", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Indices;
  egr::EagerUtils::GetOutput(outs["Indices"][0], &Indices);

  {
    paddle::platform::RecordEvent node_creation_record_event("mode node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Indices = egr::EagerUtils::autograd_meta(&Indices);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for mode "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Indices);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<modeGradNodeCompat>(new modeGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndices(Indices);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Indices, 1);
      egr::EagerUtils::SetHistory(p_autograd_Indices, grad_node);
      grad_node->SetGradInMeta(Indices, 1);
      egr::EagerUtils::CheckAndRetainGrad(Indices);

    }
  }

  return std::make_tuple(Out,Indices);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> group_norm_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Scale,const paddle::experimental::Tensor& Bias, paddle::experimental::Tensor* MeanVar, paddle::experimental::Tensor* VarianceVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("group_norm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: group_norm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(Scale.initialized()) amp_tensors_vector.push_back({ Scale });
    if(Bias.initialized()) amp_tensors_vector.push_back({ Bias });

    auto amp_dst_dtype = egr::GetAmpDestDtype("group_norm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "group_norm");
    auto NEW_Scale = ((Scale.initialized()) ? egr::AmpAutoCast("Scale", Scale, amp_dst_dtype, "group_norm") : Scale);
    auto NEW_Bias = ((Bias.initialized()) ? egr::AmpAutoCast("Bias", Bias, amp_dst_dtype, "group_norm") : Bias);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return group_norm_dygraph_function( NEW_X, NEW_Scale, NEW_Bias, MeanVar, VarianceVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(Scale.initialized()) ins["Scale"] = egr::EagerUtils::TrySyncToVars(Scale);
  if(Bias.initialized()) ins["Bias"] = egr::EagerUtils::TrySyncToVars(Bias);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Y", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Mean", egr::EagerUtils::TrySyncToVars(MeanVar) },{ "Variance", egr::EagerUtils::TrySyncToVars(VarianceVar) } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Scale = egr::EagerUtils::nullable_autograd_meta(Scale);
  egr::AutogradMeta* p_autograd_Bias = egr::EagerUtils::nullable_autograd_meta(Bias);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Scale, p_autograd_Bias);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("group_norm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Y;
  egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
  egr::EagerUtils::GetOutput(outs["Mean"][0], MeanVar);
  paddle::experimental::Tensor& Mean = *MeanVar;
  egr::EagerUtils::GetOutput(outs["Variance"][0], VarianceVar);
  paddle::experimental::Tensor& Variance = *VarianceVar;

  {
    paddle::platform::RecordEvent node_creation_record_event("group_norm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y);
    egr::AutogradMeta* p_autograd_Mean = egr::EagerUtils::autograd_meta(&Mean);
    egr::AutogradMeta* p_autograd_Variance = egr::EagerUtils::autograd_meta(&Variance);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for group_norm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Y, p_autograd_Mean, p_autograd_Variance);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<group_normGradNodeCompat>(new group_normGradNodeCompat(3, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperBias(Bias);
      grad_node->SetTensorWrapperMean(Mean);
      grad_node->SetTensorWrapperScale(Scale);
      grad_node->SetTensorWrapperVariance(Variance);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Scale, 1);
      grad_node->SetGradOutMeta(Bias, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 0);
      egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
      grad_node->SetGradInMeta(Y, 0);
      egr::EagerUtils::CheckAndRetainGrad(Y);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Mean, 1);
      grad_node->SetGradInMeta(Mean, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Variance, 2);
      grad_node->SetGradInMeta(Variance, 2);

    }
  }

  return std::make_tuple(Y,Mean,Variance);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> c_softmax_with_cross_entropy_dygraph_function(const paddle::experimental::Tensor& Logits,const paddle::experimental::Tensor& Label, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("c_softmax_with_cross_entropy dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: c_softmax_with_cross_entropy";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Logits},{Label} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("c_softmax_with_cross_entropy", amp_tensors_vector);

    auto NEW_Logits = egr::AmpAutoCast("Logits", Logits, amp_dst_dtype, "c_softmax_with_cross_entropy");
    auto NEW_Label = egr::AmpAutoCast("Label", Label, amp_dst_dtype, "c_softmax_with_cross_entropy");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return c_softmax_with_cross_entropy_dygraph_function( NEW_Logits, NEW_Label, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Logits", egr::EagerUtils::TrySyncToVars(Logits) },{ "Label", egr::EagerUtils::TrySyncToVars(Label) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Softmax", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Loss", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Logits = egr::EagerUtils::nullable_autograd_meta(Logits);
  egr::AutogradMeta* p_autograd_Label = egr::EagerUtils::nullable_autograd_meta(Label);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Logits, p_autograd_Label);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("c_softmax_with_cross_entropy", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Softmax;
  egr::EagerUtils::GetOutput(outs["Softmax"][0], &Softmax);
  paddle::experimental::Tensor Loss;
  egr::EagerUtils::GetOutput(outs["Loss"][0], &Loss);

  {
    paddle::platform::RecordEvent node_creation_record_event("c_softmax_with_cross_entropy node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Softmax = egr::EagerUtils::autograd_meta(&Softmax);
    egr::AutogradMeta* p_autograd_Loss = egr::EagerUtils::autograd_meta(&Loss);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for c_softmax_with_cross_entropy "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Softmax, p_autograd_Loss);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<c_softmax_with_cross_entropyGradNodeCompat>(new c_softmax_with_cross_entropyGradNodeCompat(2, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperLabel(Label);
      grad_node->SetTensorWrapperSoftmax(Softmax);

      grad_node->SetGradOutMeta(Logits, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Softmax, 0);
      egr::EagerUtils::SetHistory(p_autograd_Softmax, grad_node);
      grad_node->SetGradInMeta(Softmax, 0);
      egr::EagerUtils::CheckAndRetainGrad(Softmax);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Loss, 1);
      egr::EagerUtils::SetHistory(p_autograd_Loss, grad_node);
      grad_node->SetGradInMeta(Loss, 1);
      egr::EagerUtils::CheckAndRetainGrad(Loss);

    }
  }

  return std::make_tuple(Softmax,Loss);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> c_softmax_with_cross_entropy__dygraph_function(paddle::experimental::Tensor& Logits,const paddle::experimental::Tensor& Label, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("c_softmax_with_cross_entropy dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: c_softmax_with_cross_entropy";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Logits", egr::EagerUtils::TrySyncToVars(Logits) },{ "Label", egr::EagerUtils::TrySyncToVars(Label) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Softmax", ins["Logits"] },{ "Loss", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Logits = egr::EagerUtils::nullable_autograd_meta(Logits);
  egr::AutogradMeta* p_autograd_Label = egr::EagerUtils::nullable_autograd_meta(Label);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Logits, p_autograd_Label);
  // Check Inplace
  egr::EagerUtils::CheckInplace(Logits, p_autograd_Logits, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("c_softmax_with_cross_entropy", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"Logits", "Softmax"}});

  egr::EagerUtils::GetOutput(outs["Softmax"][0], &Logits);
  Logits.bump_inplace_version();
  VLOG(3) << "Tensor(" << Logits.name() << ") uses Inplace Strategy.";
  paddle::experimental::Tensor Loss;
  egr::EagerUtils::GetOutput(outs["Loss"][0], &Loss);

  {
    paddle::platform::RecordEvent node_creation_record_event("c_softmax_with_cross_entropy node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_Logits = egr::EagerUtils::autograd_meta(&Logits);
    egr::AutogradMeta* p_autograd_Loss = egr::EagerUtils::autograd_meta(&Loss);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for c_softmax_with_cross_entropy "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Logits, p_autograd_Loss);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<c_softmax_with_cross_entropyGradNodeCompat>(new c_softmax_with_cross_entropyGradNodeCompat(2, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperLabel(Label);
      grad_node->SetTensorWrapperSoftmax(Logits);

      grad_node->SetGradOutMeta(Logits, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Logits, 0);
      egr::EagerUtils::SetHistory(p_autograd_Logits, grad_node);
      grad_node->SetGradInMeta(Logits, 0);
      egr::EagerUtils::CheckAndRetainGrad(Logits);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Loss, 1);
      egr::EagerUtils::SetHistory(p_autograd_Loss, grad_node);
      grad_node->SetGradInMeta(Loss, 1);
      egr::EagerUtils::CheckAndRetainGrad(Loss);

    }
  }

  return std::make_tuple(Logits,Loss);

}


paddle::experimental::Tensor sequence_expand_as_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sequence_expand_as dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sequence_expand_as";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("sequence_expand_as", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "sequence_expand_as");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "sequence_expand_as");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sequence_expand_as_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sequence_expand_as", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("sequence_expand_as node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for sequence_expand_as "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<sequence_expand_asGradNodeCompat>(new sequence_expand_asGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> cos_sim_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("cos_sim dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: cos_sim";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("cos_sim", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "cos_sim");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "cos_sim");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return cos_sim_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "XNorm", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "YNorm", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("cos_sim", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor XNorm;
  egr::EagerUtils::GetOutput(outs["XNorm"][0], &XNorm);
  paddle::experimental::Tensor YNorm;
  egr::EagerUtils::GetOutput(outs["YNorm"][0], &YNorm);

  {
    paddle::platform::RecordEvent node_creation_record_event("cos_sim node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_XNorm = egr::EagerUtils::autograd_meta(&XNorm);
    egr::AutogradMeta* p_autograd_YNorm = egr::EagerUtils::autograd_meta(&YNorm);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for cos_sim "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_XNorm, p_autograd_YNorm);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<cos_simGradNodeCompat>(new cos_simGradNodeCompat(3, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperXNorm(XNorm);
      grad_node->SetTensorWrapperY(Y);
      grad_node->SetTensorWrapperYNorm(YNorm);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_XNorm, 1);
      grad_node->SetGradInMeta(XNorm, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_YNorm, 2);
      grad_node->SetGradInMeta(YNorm, 2);

    }
  }

  return std::make_tuple(Out,XNorm,YNorm);

}


paddle::experimental::Tensor eigvals_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("eigvals dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: eigvals";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("eigvals", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "eigvals");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return eigvals_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("eigvals", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


void* save_combine_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("save_combine dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: save_combine";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("save_combine", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "save_combine");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return save_combine_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = {  };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("save_combine", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});



return nullptr;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> class_center_sample_dygraph_function(const paddle::experimental::Tensor& Label, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("class_center_sample dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: class_center_sample";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Label} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("class_center_sample", amp_tensors_vector);

    auto NEW_Label = egr::AmpAutoCast("Label", Label, amp_dst_dtype, "class_center_sample");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return class_center_sample_dygraph_function( NEW_Label, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Label", egr::EagerUtils::TrySyncToVars(Label) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "RemappedLabel", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "SampledLocalClassCenter", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("class_center_sample", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor RemappedLabel;
  egr::EagerUtils::GetOutput(outs["RemappedLabel"][0], &RemappedLabel);
  paddle::experimental::Tensor SampledLocalClassCenter;
  egr::EagerUtils::GetOutput(outs["SampledLocalClassCenter"][0], &SampledLocalClassCenter);


  return std::make_tuple(RemappedLabel,SampledLocalClassCenter);

}


paddle::experimental::Tensor elementwise_fmin_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elementwise_fmin dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elementwise_fmin";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("elementwise_fmin", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "elementwise_fmin");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "elementwise_fmin");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return elementwise_fmin_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elementwise_fmin", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("elementwise_fmin node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for elementwise_fmin "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<elementwise_fminGradNodeCompat>(new elementwise_fminGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor read_file_dygraph_function( const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("read_file dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: read_file";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return read_file_dygraph_function( attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = {  };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("read_file", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor isfinite_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("isfinite dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: isfinite";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("isfinite", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "isfinite");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return isfinite_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("isfinite", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor arg_max_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("arg_max dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: arg_max";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("arg_max", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "arg_max");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return arg_max_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("arg_max", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor equal_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("equal dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: equal";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("equal", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "equal");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "equal");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return equal_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("equal", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor fake_dequantize_max_abs_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Scale, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fake_dequantize_max_abs dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fake_dequantize_max_abs";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Scale} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fake_dequantize_max_abs", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fake_dequantize_max_abs");
    auto NEW_Scale = egr::AmpAutoCast("Scale", Scale, amp_dst_dtype, "fake_dequantize_max_abs");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fake_dequantize_max_abs_dygraph_function( NEW_X, NEW_Scale, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Scale", egr::EagerUtils::TrySyncToVars(Scale) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fake_dequantize_max_abs", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> qr_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("qr dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: qr";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("qr", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "qr");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return qr_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Q", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "R", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("qr", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Q;
  egr::EagerUtils::GetOutput(outs["Q"][0], &Q);
  paddle::experimental::Tensor R;
  egr::EagerUtils::GetOutput(outs["R"][0], &R);

  {
    paddle::platform::RecordEvent node_creation_record_event("qr node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Q = egr::EagerUtils::autograd_meta(&Q);
    egr::AutogradMeta* p_autograd_R = egr::EagerUtils::autograd_meta(&R);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for qr "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Q, p_autograd_R);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<qrGradNodeCompat>(new qrGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperQ(Q);
      grad_node->SetTensorWrapperR(R);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Q, 0);
      egr::EagerUtils::SetHistory(p_autograd_Q, grad_node);
      grad_node->SetGradInMeta(Q, 0);
      egr::EagerUtils::CheckAndRetainGrad(Q);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_R, 1);
      egr::EagerUtils::SetHistory(p_autograd_R, grad_node);
      grad_node->SetGradInMeta(R, 1);
      egr::EagerUtils::CheckAndRetainGrad(R);

    }
  }

  return std::make_tuple(Q,R);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> anchor_generator_dygraph_function(const paddle::experimental::Tensor& Input, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("anchor_generator dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: anchor_generator";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("anchor_generator", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "anchor_generator");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return anchor_generator_dygraph_function( NEW_Input, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Anchors", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Variances", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("anchor_generator", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Anchors;
  egr::EagerUtils::GetOutput(outs["Anchors"][0], &Anchors);
  paddle::experimental::Tensor Variances;
  egr::EagerUtils::GetOutput(outs["Variances"][0], &Variances);


  return std::make_tuple(Anchors,Variances);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> layer_norm_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Scale,const paddle::experimental::Tensor& Bias, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("layer_norm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: layer_norm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };
    if(Scale.initialized()) amp_tensors_vector.push_back({ Scale });
    if(Bias.initialized()) amp_tensors_vector.push_back({ Bias });

    auto amp_dst_dtype = egr::GetAmpDestDtype("layer_norm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "layer_norm");
    auto NEW_Scale = ((Scale.initialized()) ? egr::AmpAutoCast("Scale", Scale, amp_dst_dtype, "layer_norm") : Scale);
    auto NEW_Bias = ((Bias.initialized()) ? egr::AmpAutoCast("Bias", Bias, amp_dst_dtype, "layer_norm") : Bias);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return layer_norm_dygraph_function( NEW_X, NEW_Scale, NEW_Bias, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };
  if(Scale.initialized()) ins["Scale"] = egr::EagerUtils::TrySyncToVars(Scale);
  if(Bias.initialized()) ins["Bias"] = egr::EagerUtils::TrySyncToVars(Bias);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Y", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Mean", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Variance", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Scale = egr::EagerUtils::nullable_autograd_meta(Scale);
  egr::AutogradMeta* p_autograd_Bias = egr::EagerUtils::nullable_autograd_meta(Bias);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Scale, p_autograd_Bias);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("layer_norm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Y;
  egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
  paddle::experimental::Tensor Mean;
  egr::EagerUtils::GetOutput(outs["Mean"][0], &Mean);
  paddle::experimental::Tensor Variance;
  egr::EagerUtils::GetOutput(outs["Variance"][0], &Variance);

  {
    paddle::platform::RecordEvent node_creation_record_event("layer_norm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y);
    egr::AutogradMeta* p_autograd_Mean = egr::EagerUtils::autograd_meta(&Mean);
    egr::AutogradMeta* p_autograd_Variance = egr::EagerUtils::autograd_meta(&Variance);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for layer_norm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Y, p_autograd_Mean, p_autograd_Variance);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<layer_normGradNodeCompat>(new layer_normGradNodeCompat(3, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperBias(Bias);
      grad_node->SetTensorWrapperMean(Mean);
      grad_node->SetTensorWrapperScale(Scale);
      grad_node->SetTensorWrapperVariance(Variance);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Scale, 1);
      grad_node->SetGradOutMeta(Bias, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 0);
      egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
      grad_node->SetGradInMeta(Y, 0);
      egr::EagerUtils::CheckAndRetainGrad(Y);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Mean, 1);
      grad_node->SetGradInMeta(Mean, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Variance, 2);
      grad_node->SetGradInMeta(Variance, 2);

    }
  }

  return std::make_tuple(Y,Mean,Variance);

}


paddle::experimental::Tensor merge_selected_rows_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("merge_selected_rows dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: merge_selected_rows";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("merge_selected_rows", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "merge_selected_rows");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return merge_selected_rows_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("merge_selected_rows", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor acosh_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("acosh dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: acosh";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("acosh", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "acosh");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return acosh_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("acosh", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("acosh node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for acosh "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<acoshGradNodeCompat>(new acoshGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor stft_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Window, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("stft dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: stft";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Window} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("stft", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "stft");
    auto NEW_Window = egr::AmpAutoCast("Window", Window, amp_dst_dtype, "stft");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return stft_dygraph_function( NEW_X, NEW_Window, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Window", egr::EagerUtils::TrySyncToVars(Window) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Window = egr::EagerUtils::nullable_autograd_meta(Window);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Window);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("stft", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("stft node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for stft "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<stftGradNodeCompat>(new stftGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperWindow(Window);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor less_equal_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("less_equal dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: less_equal";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("less_equal", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "less_equal");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "less_equal");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return less_equal_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("less_equal", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,std::vector<paddle::experimental::Tensor>> rnn_dygraph_function(const paddle::experimental::Tensor& Input,const std::vector<paddle::experimental::Tensor>& PreState,const std::vector<paddle::experimental::Tensor>& WeightList,const paddle::experimental::Tensor& SequenceLength, paddle::experimental::Tensor* DropoutStateVar, size_t StateNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("rnn dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: rnn";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input},PreState,WeightList };
    if(SequenceLength.initialized()) amp_tensors_vector.push_back({ SequenceLength });

    auto amp_dst_dtype = egr::GetAmpDestDtype("rnn", amp_tensors_vector);

    auto NEW_Input = egr::AmpAutoCast("Input", Input, amp_dst_dtype, "rnn");
    auto NEW_PreState = egr::AmpAutoCasts("PreState", PreState, amp_dst_dtype, "rnn");
    auto NEW_WeightList = egr::AmpAutoCasts("WeightList", WeightList, amp_dst_dtype, "rnn");
    auto NEW_SequenceLength = ((SequenceLength.initialized()) ? egr::AmpAutoCast("SequenceLength", SequenceLength, amp_dst_dtype, "rnn") : SequenceLength);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return rnn_dygraph_function( NEW_Input, NEW_PreState, NEW_WeightList, NEW_SequenceLength, DropoutStateVar, StateNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input", egr::EagerUtils::TrySyncToVars(Input) },{ "PreState", egr::EagerUtils::TrySyncToVars(PreState) },{ "WeightList", egr::EagerUtils::TrySyncToVars(WeightList) } };
  if(SequenceLength.initialized()) ins["SequenceLength"] = egr::EagerUtils::TrySyncToVars(SequenceLength);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "DropoutState", egr::EagerUtils::TrySyncToVars(DropoutStateVar) },{ "Reserve", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "State", egr::EagerUtils::CreateVars(StateNum) } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input = egr::EagerUtils::nullable_autograd_meta(Input);
  std::vector<egr::AutogradMeta*> p_autograd_PreState = egr::EagerUtils::nullable_autograd_meta(PreState);
  std::vector<egr::AutogradMeta*> p_autograd_WeightList = egr::EagerUtils::nullable_autograd_meta(WeightList);
  egr::AutogradMeta* p_autograd_SequenceLength = egr::EagerUtils::nullable_autograd_meta(SequenceLength);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input, &p_autograd_PreState, &p_autograd_WeightList, p_autograd_SequenceLength);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("rnn", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  if (outs.count("DropoutState"))  egr::EagerUtils::GetOutput(outs["DropoutState"][0], DropoutStateVar);
  paddle::experimental::Tensor& DropoutState = *DropoutStateVar;
  paddle::experimental::Tensor Reserve;
  egr::EagerUtils::GetOutput(outs["Reserve"][0], &Reserve);
  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  std::vector<paddle::experimental::Tensor> State;
  egr::EagerUtils::GetOutputs(outs["State"], &State);

  {
    paddle::platform::RecordEvent node_creation_record_event("rnn node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_DropoutState = egr::EagerUtils::autograd_meta(&DropoutState);
    egr::AutogradMeta* p_autograd_Reserve = egr::EagerUtils::autograd_meta(&Reserve);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    std::vector<egr::AutogradMeta*> p_autograd_State = egr::EagerUtils::autograd_meta(&State);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for rnn "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_DropoutState, p_autograd_Reserve, p_autograd_Out, &p_autograd_State);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<rnnGradNodeCompat>(new rnnGradNodeCompat(4, 4));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperDropoutState(DropoutState);
      grad_node->SetTensorWrapperInput(Input);
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperPreState(PreState);
      grad_node->SetTensorWrapperReserve(Reserve);
      grad_node->SetTensorWrapperSequenceLength(SequenceLength);
      grad_node->SetTensorWrapperWeightList(WeightList);

      grad_node->SetGradOutMeta(Input, 0);
      grad_node->SetGradOutMeta(PreState, 1);
      grad_node->SetGradOutMeta(WeightList, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_DropoutState, 0);
      egr::EagerUtils::SetHistory(p_autograd_DropoutState, grad_node);
      grad_node->SetGradInMeta(DropoutState, 0);
      egr::EagerUtils::CheckAndRetainGrad(DropoutState);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Reserve, 1);
      grad_node->SetGradInMeta(Reserve, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 2);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 2);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_State, 3);
      egr::EagerUtils::SetHistory(&p_autograd_State, grad_node);
      grad_node->SetGradInMeta(State, 3);
      egr::EagerUtils::CheckAndRetainGrad(State);

    }
  }

  return std::make_tuple(DropoutState,Reserve,Out,State);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> fusion_lstm_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& WeightX,const paddle::experimental::Tensor& WeightH,const paddle::experimental::Tensor& Bias, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fusion_lstm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fusion_lstm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{WeightX},{WeightH},{Bias} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fusion_lstm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fusion_lstm");
    auto NEW_WeightX = egr::AmpAutoCast("WeightX", WeightX, amp_dst_dtype, "fusion_lstm");
    auto NEW_WeightH = egr::AmpAutoCast("WeightH", WeightH, amp_dst_dtype, "fusion_lstm");
    auto NEW_Bias = egr::AmpAutoCast("Bias", Bias, amp_dst_dtype, "fusion_lstm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fusion_lstm_dygraph_function( NEW_X, NEW_WeightX, NEW_WeightH, NEW_Bias, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "WeightX", egr::EagerUtils::TrySyncToVars(WeightX) },{ "WeightH", egr::EagerUtils::TrySyncToVars(WeightH) },{ "Bias", egr::EagerUtils::TrySyncToVars(Bias) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Hidden", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Cell", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "XX", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "BatchedInput", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "BatchedHidden", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "BatchedCell", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "ReorderedH0", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "ReorderedC0", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "CheckedCell", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fusion_lstm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Hidden;
  egr::EagerUtils::GetOutput(outs["Hidden"][0], &Hidden);
  paddle::experimental::Tensor Cell;
  egr::EagerUtils::GetOutput(outs["Cell"][0], &Cell);
  paddle::experimental::Tensor XX;
  egr::EagerUtils::GetOutput(outs["XX"][0], &XX);
  paddle::experimental::Tensor BatchedInput;
  egr::EagerUtils::GetOutput(outs["BatchedInput"][0], &BatchedInput);
  paddle::experimental::Tensor BatchedHidden;
  egr::EagerUtils::GetOutput(outs["BatchedHidden"][0], &BatchedHidden);
  paddle::experimental::Tensor BatchedCell;
  egr::EagerUtils::GetOutput(outs["BatchedCell"][0], &BatchedCell);
  paddle::experimental::Tensor ReorderedH0;
  egr::EagerUtils::GetOutput(outs["ReorderedH0"][0], &ReorderedH0);
  paddle::experimental::Tensor ReorderedC0;
  egr::EagerUtils::GetOutput(outs["ReorderedC0"][0], &ReorderedC0);
  paddle::experimental::Tensor CheckedCell;
  egr::EagerUtils::GetOutput(outs["CheckedCell"][0], &CheckedCell);


  return std::make_tuple(Hidden,Cell,XX,BatchedInput,BatchedHidden,BatchedCell,ReorderedH0,ReorderedC0,CheckedCell);

}


std::tuple<std::vector<paddle::experimental::Tensor>,std::vector<paddle::experimental::Tensor>> lars_momentum_dygraph_function(const std::vector<paddle::experimental::Tensor>& Param,const std::vector<paddle::experimental::Tensor>& Grad,const std::vector<paddle::experimental::Tensor>& Velocity,const std::vector<paddle::experimental::Tensor>& LearningRate, std::vector<paddle::experimental::Tensor*>& ParamOutVar, std::vector<paddle::experimental::Tensor*>& VelocityOutVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("lars_momentum dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: lars_momentum";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { Param,Grad,Velocity,LearningRate };

    auto amp_dst_dtype = egr::GetAmpDestDtype("lars_momentum", amp_tensors_vector);

    auto NEW_Param = egr::AmpAutoCasts("Param", Param, amp_dst_dtype, "lars_momentum");
    auto NEW_Grad = egr::AmpAutoCasts("Grad", Grad, amp_dst_dtype, "lars_momentum");
    auto NEW_Velocity = egr::AmpAutoCasts("Velocity", Velocity, amp_dst_dtype, "lars_momentum");
    auto NEW_LearningRate = egr::AmpAutoCasts("LearningRate", LearningRate, amp_dst_dtype, "lars_momentum");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return lars_momentum_dygraph_function( NEW_Param, NEW_Grad, NEW_Velocity, NEW_LearningRate, ParamOutVar, VelocityOutVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Param", egr::EagerUtils::TrySyncToVars(Param) },{ "Grad", egr::EagerUtils::TrySyncToVars(Grad) },{ "Velocity", egr::EagerUtils::TrySyncToVars(Velocity) },{ "LearningRate", egr::EagerUtils::TrySyncToVars(LearningRate) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "ParamOut", ins["Param"] },{ "VelocityOut", ins["Velocity"] } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("lars_momentum", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> ParamOut;
  egr::EagerUtils::GetOutputs(outs["ParamOut"], ParamOutVar);
  egr::EagerUtils::Output2Result(ParamOutVar, &ParamOut);
  std::vector<paddle::experimental::Tensor> VelocityOut;
  egr::EagerUtils::GetOutputs(outs["VelocityOut"], VelocityOutVar);
  egr::EagerUtils::Output2Result(VelocityOutVar, &VelocityOut);


  return std::make_tuple(ParamOut,VelocityOut);

}


paddle::experimental::Tensor hard_sigmoid_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("hard_sigmoid dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: hard_sigmoid";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("hard_sigmoid", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "hard_sigmoid");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return hard_sigmoid_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("hard_sigmoid", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("hard_sigmoid node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for hard_sigmoid "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<hard_sigmoidGradNodeCompat>(new hard_sigmoidGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor hard_sigmoid__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("hard_sigmoid dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: hard_sigmoid";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("hard_sigmoid", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("hard_sigmoid node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for hard_sigmoid "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<hard_sigmoidGradNodeCompat>(new hard_sigmoidGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


paddle::experimental::Tensor isnan_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("isnan dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: isnan";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("isnan", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "isnan");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return isnan_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("isnan", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor elementwise_floordiv_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("elementwise_floordiv dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: elementwise_floordiv";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("elementwise_floordiv", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "elementwise_floordiv");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "elementwise_floordiv");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return elementwise_floordiv_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("elementwise_floordiv", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor correlation_dygraph_function(const paddle::experimental::Tensor& Input1,const paddle::experimental::Tensor& Input2, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("correlation dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: correlation";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Input1},{Input2} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("correlation", amp_tensors_vector);

    auto NEW_Input1 = egr::AmpAutoCast("Input1", Input1, amp_dst_dtype, "correlation");
    auto NEW_Input2 = egr::AmpAutoCast("Input2", Input2, amp_dst_dtype, "correlation");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return correlation_dygraph_function( NEW_Input1, NEW_Input2, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Input1", egr::EagerUtils::TrySyncToVars(Input1) },{ "Input2", egr::EagerUtils::TrySyncToVars(Input2) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Output", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Input1 = egr::EagerUtils::nullable_autograd_meta(Input1);
  egr::AutogradMeta* p_autograd_Input2 = egr::EagerUtils::nullable_autograd_meta(Input2);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Input1, p_autograd_Input2);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("correlation", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Output;
  egr::EagerUtils::GetOutput(outs["Output"][0], &Output);

  {
    paddle::platform::RecordEvent node_creation_record_event("correlation node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Output = egr::EagerUtils::autograd_meta(&Output);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for correlation "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Output);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<correlationGradNodeCompat>(new correlationGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperInput1(Input1);
      grad_node->SetTensorWrapperInput2(Input2);

      grad_node->SetGradOutMeta(Input1, 0);
      grad_node->SetGradOutMeta(Input2, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Output, 0);
      egr::EagerUtils::SetHistory(p_autograd_Output, grad_node);
      grad_node->SetGradInMeta(Output, 0);
      egr::EagerUtils::CheckAndRetainGrad(Output);

    }
  }

  return Output;

}


paddle::experimental::Tensor histogram_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("histogram dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: histogram";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("histogram", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "histogram");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return histogram_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("histogram", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor gather_tree_dygraph_function(const paddle::experimental::Tensor& Ids,const paddle::experimental::Tensor& Parents, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("gather_tree dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: gather_tree";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Ids},{Parents} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("gather_tree", amp_tensors_vector);

    auto NEW_Ids = egr::AmpAutoCast("Ids", Ids, amp_dst_dtype, "gather_tree");
    auto NEW_Parents = egr::AmpAutoCast("Parents", Parents, amp_dst_dtype, "gather_tree");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return gather_tree_dygraph_function( NEW_Ids, NEW_Parents, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Ids", egr::EagerUtils::TrySyncToVars(Ids) },{ "Parents", egr::EagerUtils::TrySyncToVars(Parents) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("gather_tree", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> nanmedian_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("nanmedian dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: nanmedian";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("nanmedian", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "nanmedian");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return nanmedian_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "MedianIndex", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("nanmedian", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor MedianIndex;
  egr::EagerUtils::GetOutput(outs["MedianIndex"][0], &MedianIndex);
  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("nanmedian node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_MedianIndex = egr::EagerUtils::autograd_meta(&MedianIndex);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for nanmedian "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_MedianIndex, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<nanmedianGradNodeCompat>(new nanmedianGradNodeCompat(2, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperMedianIndex(MedianIndex);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_MedianIndex, 0);
      grad_node->SetGradInMeta(MedianIndex, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 1);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 1);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return std::make_tuple(MedianIndex,Out);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> segment_pool_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& SegmentIds, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("segment_pool dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: segment_pool";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{SegmentIds} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("segment_pool", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "segment_pool");
    auto NEW_SegmentIds = egr::AmpAutoCast("SegmentIds", SegmentIds, amp_dst_dtype, "segment_pool");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return segment_pool_dygraph_function( NEW_X, NEW_SegmentIds, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "SegmentIds", egr::EagerUtils::TrySyncToVars(SegmentIds) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "SummedIds", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_SegmentIds = egr::EagerUtils::nullable_autograd_meta(SegmentIds);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_SegmentIds);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("segment_pool", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor SummedIds;
  egr::EagerUtils::GetOutput(outs["SummedIds"][0], &SummedIds);

  {
    paddle::platform::RecordEvent node_creation_record_event("segment_pool node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_SummedIds = egr::EagerUtils::autograd_meta(&SummedIds);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for segment_pool "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_SummedIds);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<segment_poolGradNodeCompat>(new segment_poolGradNodeCompat(2, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperOut(Out);
      grad_node->SetTensorWrapperSegmentIds(SegmentIds);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_SummedIds, 1);
      grad_node->SetGradInMeta(SummedIds, 1);

    }
  }

  return std::make_tuple(Out,SummedIds);

}


std::tuple<std::vector<paddle::experimental::Tensor>,paddle::experimental::Tensor> fusion_repeated_fc_relu_dygraph_function(const paddle::experimental::Tensor& X,const std::vector<paddle::experimental::Tensor>& W,const std::vector<paddle::experimental::Tensor>& Bias, size_t ReluOutNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("fusion_repeated_fc_relu dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: fusion_repeated_fc_relu";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},W,Bias };

    auto amp_dst_dtype = egr::GetAmpDestDtype("fusion_repeated_fc_relu", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "fusion_repeated_fc_relu");
    auto NEW_W = egr::AmpAutoCasts("W", W, amp_dst_dtype, "fusion_repeated_fc_relu");
    auto NEW_Bias = egr::AmpAutoCasts("Bias", Bias, amp_dst_dtype, "fusion_repeated_fc_relu");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return fusion_repeated_fc_relu_dygraph_function( NEW_X, NEW_W, NEW_Bias, ReluOutNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "W", egr::EagerUtils::TrySyncToVars(W) },{ "Bias", egr::EagerUtils::TrySyncToVars(Bias) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "ReluOut", egr::EagerUtils::CreateVars(ReluOutNum) },{ "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("fusion_repeated_fc_relu", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> ReluOut;
  egr::EagerUtils::GetOutputs(outs["ReluOut"], &ReluOut);
  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return std::make_tuple(ReluOut,Out);

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> sync_batch_norm_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Scale,const paddle::experimental::Tensor& Bias,const paddle::experimental::Tensor& Mean,const paddle::experimental::Tensor& Variance, paddle::experimental::Tensor* MeanOutVar, paddle::experimental::Tensor* VarianceOutVar, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("sync_batch_norm dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: sync_batch_norm";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Scale},{Bias},{Mean},{Variance} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("sync_batch_norm", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "sync_batch_norm");
    auto NEW_Scale = egr::AmpAutoCast("Scale", Scale, amp_dst_dtype, "sync_batch_norm");
    auto NEW_Bias = egr::AmpAutoCast("Bias", Bias, amp_dst_dtype, "sync_batch_norm");
    auto NEW_Mean = egr::AmpAutoCast("Mean", Mean, amp_dst_dtype, "sync_batch_norm");
    auto NEW_Variance = egr::AmpAutoCast("Variance", Variance, amp_dst_dtype, "sync_batch_norm");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return sync_batch_norm_dygraph_function( NEW_X, NEW_Scale, NEW_Bias, NEW_Mean, NEW_Variance, MeanOutVar, VarianceOutVar, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Scale", egr::EagerUtils::TrySyncToVars(Scale) },{ "Bias", egr::EagerUtils::TrySyncToVars(Bias) },{ "Mean", egr::EagerUtils::TrySyncToVars(Mean) },{ "Variance", egr::EagerUtils::TrySyncToVars(Variance) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Y", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "MeanOut", ins["Mean"] },{ "VarianceOut", ins["Variance"] },{ "SavedMean", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "SavedVariance", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "ReserveSpace", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Scale = egr::EagerUtils::nullable_autograd_meta(Scale);
  egr::AutogradMeta* p_autograd_Bias = egr::EagerUtils::nullable_autograd_meta(Bias);
  egr::AutogradMeta* p_autograd_Mean = egr::EagerUtils::nullable_autograd_meta(Mean);
  egr::AutogradMeta* p_autograd_Variance = egr::EagerUtils::nullable_autograd_meta(Variance);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Scale, p_autograd_Bias, p_autograd_Mean, p_autograd_Variance);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("sync_batch_norm", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Y;
  egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
  egr::EagerUtils::GetOutput(outs["MeanOut"][0], MeanOutVar);
  paddle::experimental::Tensor& MeanOut = *MeanOutVar;
  egr::EagerUtils::GetOutput(outs["VarianceOut"][0], VarianceOutVar);
  paddle::experimental::Tensor& VarianceOut = *VarianceOutVar;
  paddle::experimental::Tensor SavedMean;
  egr::EagerUtils::GetOutput(outs["SavedMean"][0], &SavedMean);
  paddle::experimental::Tensor SavedVariance;
  egr::EagerUtils::GetOutput(outs["SavedVariance"][0], &SavedVariance);
  paddle::experimental::Tensor ReserveSpace;
  egr::EagerUtils::GetOutput(outs["ReserveSpace"][0], &ReserveSpace);

  {
    paddle::platform::RecordEvent node_creation_record_event("sync_batch_norm node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y);
    egr::AutogradMeta* p_autograd_MeanOut = egr::EagerUtils::autograd_meta(&MeanOut);
    egr::AutogradMeta* p_autograd_VarianceOut = egr::EagerUtils::autograd_meta(&VarianceOut);
    egr::AutogradMeta* p_autograd_SavedMean = egr::EagerUtils::autograd_meta(&SavedMean);
    egr::AutogradMeta* p_autograd_SavedVariance = egr::EagerUtils::autograd_meta(&SavedVariance);
    egr::AutogradMeta* p_autograd_ReserveSpace = egr::EagerUtils::autograd_meta(&ReserveSpace);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for sync_batch_norm "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Y, p_autograd_MeanOut, p_autograd_VarianceOut, p_autograd_SavedMean, p_autograd_SavedVariance, p_autograd_ReserveSpace);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<sync_batch_normGradNodeCompat>(new sync_batch_normGradNodeCompat(6, 5));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperBias(Bias);
      grad_node->SetTensorWrapperSavedMean(SavedMean);
      grad_node->SetTensorWrapperSavedVariance(SavedVariance);
      grad_node->SetTensorWrapperScale(Scale);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Scale, 1);
      grad_node->SetGradOutMeta(Bias, 2);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 0);
      egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
      grad_node->SetGradInMeta(Y, 0);
      egr::EagerUtils::CheckAndRetainGrad(Y);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_MeanOut, 1);
      egr::EagerUtils::SetHistory(p_autograd_MeanOut, grad_node);
      grad_node->SetGradInMeta(MeanOut, 1);
      egr::EagerUtils::CheckAndRetainGrad(MeanOut);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_VarianceOut, 2);
      egr::EagerUtils::SetHistory(p_autograd_VarianceOut, grad_node);
      grad_node->SetGradInMeta(VarianceOut, 2);
      egr::EagerUtils::CheckAndRetainGrad(VarianceOut);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_SavedMean, 3);
      grad_node->SetGradInMeta(SavedMean, 3);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_SavedVariance, 4);
      grad_node->SetGradInMeta(SavedVariance, 4);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_ReserveSpace, 5);
      egr::EagerUtils::SetHistory(p_autograd_ReserveSpace, grad_node);
      grad_node->SetGradInMeta(ReserveSpace, 5);
      egr::EagerUtils::CheckAndRetainGrad(ReserveSpace);

    }
  }

  return std::make_tuple(Y,MeanOut,VarianceOut,SavedMean,SavedVariance,ReserveSpace);

}


std::vector<paddle::experimental::Tensor> nop_dygraph_function(const std::vector<paddle::experimental::Tensor>& X, size_t OutNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("nop dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: nop";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("nop", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "nop");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return nop_dygraph_function( NEW_X, OutNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::CreateVars(OutNum) } };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("nop", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> Out;
  egr::EagerUtils::GetOutputs(outs["Out"], &Out);


  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> filter_by_instag_dygraph_function(const paddle::experimental::Tensor& Ins,const paddle::experimental::Tensor& Ins_tag,const paddle::experimental::Tensor& Filter_tag, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("filter_by_instag dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: filter_by_instag";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Ins},{Ins_tag},{Filter_tag} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("filter_by_instag", amp_tensors_vector);

    auto NEW_Ins = egr::AmpAutoCast("Ins", Ins, amp_dst_dtype, "filter_by_instag");
    auto NEW_Ins_tag = egr::AmpAutoCast("Ins_tag", Ins_tag, amp_dst_dtype, "filter_by_instag");
    auto NEW_Filter_tag = egr::AmpAutoCast("Filter_tag", Filter_tag, amp_dst_dtype, "filter_by_instag");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return filter_by_instag_dygraph_function( NEW_Ins, NEW_Ins_tag, NEW_Filter_tag, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Ins", egr::EagerUtils::TrySyncToVars(Ins) },{ "Ins_tag", egr::EagerUtils::TrySyncToVars(Ins_tag) },{ "Filter_tag", egr::EagerUtils::TrySyncToVars(Filter_tag) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "LossWeight", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "IndexMap", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Ins = egr::EagerUtils::nullable_autograd_meta(Ins);
  egr::AutogradMeta* p_autograd_Ins_tag = egr::EagerUtils::nullable_autograd_meta(Ins_tag);
  egr::AutogradMeta* p_autograd_Filter_tag = egr::EagerUtils::nullable_autograd_meta(Filter_tag);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Ins, p_autograd_Ins_tag, p_autograd_Filter_tag);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("filter_by_instag", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor LossWeight;
  egr::EagerUtils::GetOutput(outs["LossWeight"][0], &LossWeight);
  paddle::experimental::Tensor IndexMap;
  egr::EagerUtils::GetOutput(outs["IndexMap"][0], &IndexMap);

  {
    paddle::platform::RecordEvent node_creation_record_event("filter_by_instag node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_LossWeight = egr::EagerUtils::autograd_meta(&LossWeight);
    egr::AutogradMeta* p_autograd_IndexMap = egr::EagerUtils::autograd_meta(&IndexMap);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for filter_by_instag "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_LossWeight, p_autograd_IndexMap);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<filter_by_instagGradNodeCompat>(new filter_by_instagGradNodeCompat(3, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIndexMap(IndexMap);
      grad_node->SetTensorWrapperIns(Ins);
      grad_node->SetTensorWrapperLossWeight(LossWeight);

      grad_node->SetGradOutMeta(Ins, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_LossWeight, 1);
      egr::EagerUtils::SetHistory(p_autograd_LossWeight, grad_node);
      grad_node->SetGradInMeta(LossWeight, 1);
      egr::EagerUtils::CheckAndRetainGrad(LossWeight);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_IndexMap, 2);
      egr::EagerUtils::SetHistory(p_autograd_IndexMap, grad_node);
      grad_node->SetGradInMeta(IndexMap, 2);
      egr::EagerUtils::CheckAndRetainGrad(IndexMap);

    }
  }

  return std::make_tuple(Out,LossWeight,IndexMap);

}


paddle::experimental::Tensor expand_as_v2_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("expand_as_v2 dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: expand_as_v2";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("expand_as_v2", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "expand_as_v2");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return expand_as_v2_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("expand_as_v2", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("expand_as_v2 node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for expand_as_v2 "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<expand_as_v2GradNodeCompat>(new expand_as_v2GradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor diag_v2_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("diag_v2 dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: diag_v2";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("diag_v2", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "diag_v2");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return diag_v2_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("diag_v2", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("diag_v2 node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for diag_v2 "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<diag_v2GradNodeCompat>(new diag_v2GradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::vector<paddle::experimental::Tensor> pull_box_sparse_dygraph_function(const std::vector<paddle::experimental::Tensor>& Ids, size_t OutNum, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("pull_box_sparse dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: pull_box_sparse";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { Ids };

    auto amp_dst_dtype = egr::GetAmpDestDtype("pull_box_sparse", amp_tensors_vector);

    auto NEW_Ids = egr::AmpAutoCasts("Ids", Ids, amp_dst_dtype, "pull_box_sparse");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return pull_box_sparse_dygraph_function( NEW_Ids, OutNum, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Ids", egr::EagerUtils::TrySyncToVars(Ids) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", egr::EagerUtils::CreateVars(OutNum) } };


  // Prepare Autograd Meta 
  std::vector<egr::AutogradMeta*> p_autograd_Ids = egr::EagerUtils::nullable_autograd_meta(Ids);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, &p_autograd_Ids);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("pull_box_sparse", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  std::vector<paddle::experimental::Tensor> Out;
  egr::EagerUtils::GetOutputs(outs["Out"], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("pull_box_sparse node_creation", paddle::platform::TracerEventType::Operator, 1);
    std::vector<egr::AutogradMeta*> p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for pull_box_sparse "; 
      egr::EagerUtils::PassStopGradient(false, &p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<pull_box_sparseGradNodeCompat>(new pull_box_sparseGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIds(Ids);

      egr::EagerUtils::SetOutRankWithSlot(&p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(&p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor> nll_loss_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Label,const paddle::experimental::Tensor& Weight, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("nll_loss dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: nll_loss";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Label} };
    if(Weight.initialized()) amp_tensors_vector.push_back({ Weight });

    auto amp_dst_dtype = egr::GetAmpDestDtype("nll_loss", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "nll_loss");
    auto NEW_Label = egr::AmpAutoCast("Label", Label, amp_dst_dtype, "nll_loss");
    auto NEW_Weight = ((Weight.initialized()) ? egr::AmpAutoCast("Weight", Weight, amp_dst_dtype, "nll_loss") : Weight);

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return nll_loss_dygraph_function( NEW_X, NEW_Label, NEW_Weight, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Label", egr::EagerUtils::TrySyncToVars(Label) } };
  if(Weight.initialized()) ins["Weight"] = egr::EagerUtils::TrySyncToVars(Weight);

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "Total_weight", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Label = egr::EagerUtils::nullable_autograd_meta(Label);
  egr::AutogradMeta* p_autograd_Weight = egr::EagerUtils::nullable_autograd_meta(Weight);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Label, p_autograd_Weight);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("nll_loss", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor Total_weight;
  egr::EagerUtils::GetOutput(outs["Total_weight"][0], &Total_weight);

  {
    paddle::platform::RecordEvent node_creation_record_event("nll_loss node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_Total_weight = egr::EagerUtils::autograd_meta(&Total_weight);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for nll_loss "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_Total_weight);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<nll_lossGradNodeCompat>(new nll_lossGradNodeCompat(2, 3));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperLabel(Label);
      grad_node->SetTensorWrapperTotal_weight(Total_weight);
      grad_node->SetTensorWrapperWeight(Weight);
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Total_weight, 1);
      egr::EagerUtils::SetHistory(p_autograd_Total_weight, grad_node);
      grad_node->SetGradInMeta(Total_weight, 1);
      egr::EagerUtils::CheckAndRetainGrad(Total_weight);

    }
  }

  return std::make_tuple(Out,Total_weight);

}


paddle::experimental::Tensor dot_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Y, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("dot dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: dot";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Y} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("dot", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "dot");
    auto NEW_Y = egr::AmpAutoCast("Y", Y, amp_dst_dtype, "dot");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return dot_dygraph_function( NEW_X, NEW_Y, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Y", egr::EagerUtils::TrySyncToVars(Y) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::nullable_autograd_meta(Y);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Y);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("dot", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("dot node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for dot "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<dotGradNodeCompat>(new dotGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);
      grad_node->SetTensorWrapperY(Y);

      grad_node->SetGradOutMeta(X, 0);
      grad_node->SetGradOutMeta(Y, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor scale_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("scale dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: scale";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("scale", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "scale");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return scale_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("scale", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("scale node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for scale "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<scaleGradNodeCompat>(new scaleGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor scale__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("scale dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: scale";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("scale", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("scale node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for scale "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<scaleGradNodeCompat>(new scaleGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}


std::tuple<paddle::experimental::Tensor,paddle::experimental::Tensor,paddle::experimental::Tensor> shuffle_batch_dygraph_function(const paddle::experimental::Tensor& X,const paddle::experimental::Tensor& Seed, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("shuffle_batch dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: shuffle_batch";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X},{Seed} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("shuffle_batch", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "shuffle_batch");
    auto NEW_Seed = egr::AmpAutoCast("Seed", Seed, amp_dst_dtype, "shuffle_batch");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return shuffle_batch_dygraph_function( NEW_X, NEW_Seed, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) },{ "Seed", egr::EagerUtils::TrySyncToVars(Seed) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "ShuffleIdx", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}},{ "SeedOut", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
  egr::AutogradMeta* p_autograd_Seed = egr::EagerUtils::nullable_autograd_meta(Seed);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X, p_autograd_Seed);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("shuffle_batch", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
  paddle::experimental::Tensor ShuffleIdx;
  egr::EagerUtils::GetOutput(outs["ShuffleIdx"][0], &ShuffleIdx);
  paddle::experimental::Tensor SeedOut;
  egr::EagerUtils::GetOutput(outs["SeedOut"][0], &SeedOut);

  {
    paddle::platform::RecordEvent node_creation_record_event("shuffle_batch node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    egr::AutogradMeta* p_autograd_ShuffleIdx = egr::EagerUtils::autograd_meta(&ShuffleIdx);
    egr::AutogradMeta* p_autograd_SeedOut = egr::EagerUtils::autograd_meta(&SeedOut);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for shuffle_batch "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out, p_autograd_ShuffleIdx, p_autograd_SeedOut);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<shuffle_batchGradNodeCompat>(new shuffle_batchGradNodeCompat(3, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperShuffleIdx(ShuffleIdx);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_ShuffleIdx, 1);
      egr::EagerUtils::SetHistory(p_autograd_ShuffleIdx, grad_node);
      grad_node->SetGradInMeta(ShuffleIdx, 1);
      egr::EagerUtils::CheckAndRetainGrad(ShuffleIdx);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_SeedOut, 2);
      egr::EagerUtils::SetHistory(p_autograd_SeedOut, grad_node);
      grad_node->SetGradInMeta(SeedOut, 2);
      egr::EagerUtils::CheckAndRetainGrad(SeedOut);

    }
  }

  return std::make_tuple(Out,ShuffleIdx,SeedOut);

}


paddle::experimental::Tensor diag_dygraph_function(const paddle::experimental::Tensor& Diagonal, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("diag dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: diag";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Diagonal} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("diag", amp_tensors_vector);

    auto NEW_Diagonal = egr::AmpAutoCast("Diagonal", Diagonal, amp_dst_dtype, "diag");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return diag_dygraph_function( NEW_Diagonal, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Diagonal", egr::EagerUtils::TrySyncToVars(Diagonal) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("diag", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);


  return Out;

}


paddle::experimental::Tensor multiplex_dygraph_function(const paddle::experimental::Tensor& Ids,const std::vector<paddle::experimental::Tensor>& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("multiplex dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: multiplex";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {Ids},X };

    auto amp_dst_dtype = egr::GetAmpDestDtype("multiplex", amp_tensors_vector);

    auto NEW_Ids = egr::AmpAutoCast("Ids", Ids, amp_dst_dtype, "multiplex");
    auto NEW_X = egr::AmpAutoCasts("X", X, amp_dst_dtype, "multiplex");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return multiplex_dygraph_function( NEW_Ids, NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "Ids", egr::EagerUtils::TrySyncToVars(Ids) },{ "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_Ids = egr::EagerUtils::nullable_autograd_meta(Ids);
  std::vector<egr::AutogradMeta*> p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_Ids, &p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("multiplex", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("multiplex node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for multiplex "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<multiplexGradNodeCompat>(new multiplexGradNodeCompat(1, 2));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperIds(Ids);

      grad_node->SetGradOutMeta(X, 1);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor leaky_relu_dygraph_function(const paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("leaky_relu dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: leaky_relu";
  // Dygraph Forward Pass

  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {
    VLOG(5) << "Check and Prepare For AMP";
  
    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = { {X} };

    auto amp_dst_dtype = egr::GetAmpDestDtype("leaky_relu", amp_tensors_vector);

    auto NEW_X = egr::AmpAutoCast("X", X, amp_dst_dtype, "leaky_relu");

    {
      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
      return leaky_relu_dygraph_function( NEW_X, attr_map );
    }

  }

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", {std::make_shared<egr::EagerVariable>(egr::Controller::Instance().GenerateUniqueName())}} };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("leaky_relu", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {});

  paddle::experimental::Tensor Out;
  egr::EagerUtils::GetOutput(outs["Out"][0], &Out);

  {
    paddle::platform::RecordEvent node_creation_record_event("leaky_relu node_creation", paddle::platform::TracerEventType::Operator, 1);
    egr::AutogradMeta* p_autograd_Out = egr::EagerUtils::autograd_meta(&Out);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for leaky_relu "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_Out);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<leaky_reluGradNodeCompat>(new leaky_reluGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0);
      egr::EagerUtils::SetHistory(p_autograd_Out, grad_node);
      grad_node->SetGradInMeta(Out, 0);
      egr::EagerUtils::CheckAndRetainGrad(Out);

    }
  }

  return Out;

}


paddle::experimental::Tensor leaky_relu__dygraph_function(paddle::experimental::Tensor& X, const paddle::framework::AttributeMap& attr_map) {

  paddle::platform::RecordEvent dygraph_entrance_record_event("leaky_relu dygraph", paddle::platform::TracerEventType::Operator, 1);
  VLOG(3) << "Running Eager Forward Op: leaky_relu";
  // Dygraph Forward Pass

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins = { { "X", egr::EagerUtils::TrySyncToVars(X) } };

  std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs = { { "Out", ins["X"] } };


  // Prepare Autograd Meta 
  egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);

  bool trace_backward = egr::Controller::Instance().HasGrad();

  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_X);
  // Check Inplace
  egr::EagerUtils::CheckInplace(X, p_autograd_X, require_any_grad);

  paddle::framework::AttributeMap attrs = attr_map;
  paddle::framework::AttributeMap default_attrs;
  egr::Controller::Instance().GetCurrentTracer()->TraceOp("leaky_relu", ins, outs, attrs, 
     egr::Controller::Instance().GetExpectedPlace(),
     &default_attrs, true, {{"X", "Out"}});

  egr::EagerUtils::GetOutput(outs["Out"][0], &X);
  X.bump_inplace_version();
  VLOG(3) << "Tensor(" << X.name() << ") uses Inplace Strategy.";

  {
    paddle::platform::RecordEvent node_creation_record_event("leaky_relu node_creation", paddle::platform::TracerEventType::Operator, 1);
    p_autograd_X = egr::EagerUtils::autograd_meta(&X);
    if(require_any_grad) {
      VLOG(6) << " Construct Grad for leaky_relu "; 
      egr::EagerUtils::PassStopGradient(false, p_autograd_X);
      // Create GradOpNode
      auto grad_node = std::shared_ptr<leaky_reluGradNodeCompat>(new leaky_reluGradNodeCompat(1, 1));

      // Set Attributes
      grad_node->SetAttrMap(std::move(attrs));
      grad_node->SetDefaultAttrMap(std::move(default_attrs));

      // Set Tensor Wrappers
      grad_node->SetTensorWrapperX(X);

      grad_node->SetGradOutMeta(X, 0);
      egr::EagerUtils::SetOutRankWithSlot(p_autograd_X, 0);
      egr::EagerUtils::SetHistory(p_autograd_X, grad_node);
      grad_node->SetGradInMeta(X, 0);
      egr::EagerUtils::CheckAndRetainGrad(X);

    }
  }

  return X;

}



