cross entropy metrics and objective (#685)

* Created objectives and metrics xentropy and xentropy1 * Some coment and code cleanup. * Added Kullback-Leibler version of metric. Changed some warning messages. * Fixed sign error in KL-divergence calc. * Removed __PRETTY_FUNCTION__. * Fixed better name for alternative xentropy parameterization. Documented details on the objectives / metrics in code comments. * Common code for label interval checks. Cleanups. * Use common utility for various weight property checks.

cross entropy metrics and objective (#685)
* Created objectives and metrics xentropy and xentropy1 * Some coment and code cleanup. * Added Kullback-Leibler version of metric. Changed some warning messages. * Fixed sign error in KL-divergence calc. * Removed __PRETTY_FUNCTION__. * Fixed better name for alternative xentropy parameterization. Documented details on the objectives / metrics in code comments. * Common code for label interval checks. Cleanups. * Use common utility for various weight property checks.
f8597c93 · olofer · Guolin Ke · c79d897c · f8597c93 · f8597c93
Commit f8597c93 authored Jul 16, 2017 by olofer Committed by Guolin Ke Jul 17, 2017
5 changed files
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -579,6 +579,31 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred) {
  return ParallelSort(_First, _Last, _Pred, IteratorValType(_First));
 }

+// Check that all y[] are in interval [ymin, ymax] (end points included); throws error if not
+inline void check_elements_interval_closed(const float *y, float ymin, float ymax, int ny, const char *callername) {
+  for (int i = 0; i < ny; ++i) {
+    if (y[i] < ymin || y[i] > ymax) {
+      Log::Fatal("[%s]: does not tolerate element [#%i = %f] outside [%f, %f]", callername, i, y[i], ymin, ymax);
+    }
+  }
+}
+
+// One-pass scan over array w with nw elements: find min, max and sum of elements;
+// this is useful for checking weight requirements.
+inline void obtain_min_max_sum(const float *w, int nw, float *mi, float *ma, double *su) {
+  float minw = w[0];
+  float maxw = w[0];
+  double sumw = static_cast<double>(w[0]);
+  for (int i = 1; i < nw; ++i) {
+    sumw += w[i];
+    if (w[i] < minw) minw = w[i];
+    if (w[i] > maxw) maxw = w[i];
+  }
+  if (mi != nullptr) *mi = minw;
+  if (ma != nullptr) *ma = maxw;
+  if (su != nullptr) *su = sumw;
+}
+
 }  // namespace Common

 }  // namespace LightGBM

--- a/src/metric/metric.cpp
+++ b/src/metric/metric.cpp
@@ -4,6 +4,7 @@
 #include "rank_metric.hpp"
 #include "map_metric.hpp"
 #include "multiclass_metric.hpp"
+#include "xentropy_metric.hpp"

 namespace LightGBM {

@@ -34,6 +35,12 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
    return new MultiSoftmaxLoglossMetric(config);
  } else if (type == std::string("multi_error")) {
    return new MultiErrorMetric(config);
+  } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) {
+    return new CrossEntropyMetric(config);
+  } else if (type == std::string("xentlambda")) {
+    return new CrossEntropyLambdaMetric(config);
+  } else if (type == std::string("kldiv") || type == std::string("kullback_leibler")) {
+    return new KullbackLeiblerDivergence(config);
  }
  return nullptr;
 }

--- a/src/metric/xentropy_metric.hpp
+++ b/src/metric/xentropy_metric.hpp
+#ifndef LIGHTGBM_METRIC_XENTROPY_METRIC_HPP_
+#define LIGHTGBM_METRIC_XENTROPY_METRIC_HPP_
+
+#include <LightGBM/utils/log.h>
+#include <LightGBM/utils/common.h>
+
+#include <LightGBM/metric.h>
+
+#include <algorithm>
+#include <vector>
+#include <sstream>
+
+/* 
+ * Implements three related metrics:
+ *
+ * (1) standard cross-entropy that can be used for continuous labels in [0, 1]
+ * (2) "intensity-weighted" cross-entropy, also for continuous labels in [0, 1]
+ * (3) Kullback-Leibler divergence, also for continuous labels in [0, 1]
+ *
+ * (3) adds an offset term to (1); the entropy of the label
+ *
+ * See xentropy_objective.hpp for further details.
+ *
+ */
+
+namespace LightGBM {
+
+  // label should be in interval [0, 1];
+  // prob should be in interval (0, 1); prob is clipped if needed
+  inline static double XentLoss(float label, double prob) {
+    const double log_arg_epsilon = 1.0e-12;
+    double a = label;
+    if (prob > log_arg_epsilon) {
+      a *= std::log(prob);
+    } else {
+      a *= std::log(log_arg_epsilon);
+    }
+    double b = 1.0f - label;
+    if (1.0f - prob > log_arg_epsilon) {
+      b *= std::log(1.0f - prob);
+    } else {
+      b *= std::log(log_arg_epsilon);
+    }
+    return - (a + b);
+  }
+
+  // hhat >(=) 0 assumed; and weight > 0 required; but not checked here
+  inline static double XentLambdaLoss(float label, float weight, double hhat) {
+    return XentLoss(label, 1.0f - std::exp(-weight * hhat));
+  }
+
+  // Computes the (negative) entropy for label p; p should be in interval [0, 1];
+  // This is used to presum the KL-divergence offset term (to be _added_ to the cross-entropy loss).
+  // NOTE: x*log(x) = 0 for x=0,1; so only add when in (0, 1); avoid log(0)*0
+  inline static double YentLoss(double p) {
+    double hp = 0.0;
+    if (p > 0) hp += p * std::log(p);
+    double q = 1.0f - p;
+    if (q > 0) hp += q * std::log(q);
+    return hp;
+  }
+
+//
+// CrossEntropyMetric : "xentropy" : (optional) weights are used linearly
+//
+class CrossEntropyMetric : public Metric {
+public:
+	explicit CrossEntropyMetric(const MetricConfig&) {}
+  virtual ~CrossEntropyMetric() {}
+
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    name_.emplace_back("xentropy");
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+
+    CHECK_NOTNULL(label_);
+
+    // ensure that labels are in interval [0, 1], interval ends included
+    Common::check_elements_interval_closed(label_, 0.0f, 1.0f, num_data_, GetName()[0].c_str());
+    Log::Info("[%s:%s]: (metric) labels passed interval [0, 1] check",  GetName()[0].c_str(), __func__);
+
+    // check that weights are non-negative and sum is positive
+    if (weights_ == nullptr) {
+      sum_weights_ = static_cast<double>(num_data_);
+    } else {
+      float minw;
+      Common::obtain_min_max_sum(weights_, num_data_, &minw, nullptr, &sum_weights_);
+      if (minw < 0.0f) {
+        Log::Fatal("[%s:%s]: (metric) weights not allowed to be negative", GetName()[0].c_str(), __func__);
+      }
+    }
+
+    // check weight sum (may fail to be zero)
+    if (sum_weights_ <= 0.0f) {
+      Log::Fatal("[%s:%s]: sum-of-weights = %f is non-positive", __func__, GetName()[0].c_str(), sum_weights_);
+    }
+    Log::Info("[%s:%s]: sum-of-weights = %f", GetName()[0].c_str(), __func__, sum_weights_);
+  }
+
+  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
+    double sum_loss = 0.0f;
+    if (objective == nullptr) {
+      if (weights_ == nullptr) {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          sum_loss += XentLoss(label_[i], score[i]); // NOTE: does not work unless score is a probability
+        }
+      } else {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          sum_loss += XentLoss(label_[i], score[i]) * weights_[i]; // NOTE: does not work unless score is a probability
+        }
+      }
+    } else {
+      if (weights_ == nullptr) {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double p = 0;
+          objective->ConvertOutput(&score[i], &p);
+          sum_loss += XentLoss(label_[i], p);
+        }
+      } else {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double p = 0;
+          objective->ConvertOutput(&score[i], &p);
+          sum_loss += XentLoss(label_[i], p) * weights_[i];
+        }
+      }
+    }
+    double loss = sum_loss / sum_weights_;
+    return std::vector<double>(1, loss);
+  }
+
+  const std::vector<std::string>& GetName() const override {
+    return name_;
+  }
+
+  double factor_to_bigger_better() const override {
+    return -1.0f; // negative means smaller loss is better, positive means larger loss is better
+  }
+
+private:
+  /*! \brief Number of data points */
+  data_size_t num_data_;
+  /*! \brief Pointer to label */
+  const float* label_;
+  /*! \brief Pointer to weights */
+  const float* weights_;
+  /*! \brief Sum of weights */
+  double sum_weights_;
+  /*! \brief Name of this metric */
+  std::vector<std::string> name_;
+};
+
+//
+// CrossEntropyLambdaMetric : "xentlambda" : (optional) weights have a different meaning than for "xentropy"
+// ATTENTION: Supposed to be used when the objective also is "xentlambda"
+//
+class CrossEntropyLambdaMetric : public Metric {
+public:
+  explicit CrossEntropyLambdaMetric(const MetricConfig&) {}
+  virtual ~CrossEntropyLambdaMetric() {}
+
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    name_.emplace_back("xentlambda");
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+
+    CHECK_NOTNULL(label_);
+    Common::check_elements_interval_closed(label_, 0.0f, 1.0f, num_data_, GetName()[0].c_str());
+    Log::Info("[%s:%s]: (metric) labels passed interval [0, 1] check",  GetName()[0].c_str(), __func__);
+
+    // check all weights are strictly positive; throw error if not
+    if (weights_ != nullptr) {
+      float minw;
+      Common::obtain_min_max_sum(weights_, num_data_, &minw, nullptr, nullptr);
+      if (minw <= 0.0f) {
+        Log::Fatal("[%s:%s]: (metric) all weights must be positive", GetName()[0].c_str(), __func__);
+      }
+    }
+    
+  }
+
+  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
+    double sum_loss = 0.0f;
+    if (objective == nullptr) {
+      if (weights_ == nullptr) {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double hhat = std::log(1.0f + std::exp(score[i])); // auto-convert
+          sum_loss += XentLambdaLoss(label_[i], 1.0f, hhat);
+        }
+      } else {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double hhat = std::log(1.0f + std::exp(score[i])); // auto-convert
+          sum_loss += XentLambdaLoss(label_[i], weights_[i], hhat);
+        }
+      }
+    } else {
+      if (weights_ == nullptr) {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double hhat = 0;
+          objective->ConvertOutput(&score[i], &hhat); // NOTE: this only works if objective = "xentlambda"
+          sum_loss += XentLambdaLoss(label_[i], 1.0f, hhat);
+        }
+      } else {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double hhat = 0;
+          objective->ConvertOutput(&score[i], &hhat); // NOTE: this only works if objective = "xentlambda"
+          sum_loss += XentLambdaLoss(label_[i], weights_[i], hhat);
+        }
+      }
+    }
+    return std::vector<double>(1, sum_loss / static_cast<double>(num_data_));
+  }
+
+  const std::vector<std::string>& GetName() const override {
+    return name_;
+  }
+
+  double factor_to_bigger_better() const override {
+    return -1.0f;
+  }
+
+private:
+  /*! \brief Number of data points */
+  data_size_t num_data_;
+  /*! \brief Pointer to label */
+  const float* label_;
+  /*! \brief Pointer to weights */
+  const float* weights_;
+  /*! \brief Name of this metric */
+  std::vector<std::string> name_;
+};
+
+//
+// KullbackLeiblerDivergence : "kldiv" : (optional) weights are used linearly
+//
+class KullbackLeiblerDivergence : public Metric {
+public:
+  explicit KullbackLeiblerDivergence(const MetricConfig&) {}
+  virtual ~KullbackLeiblerDivergence() {}
+
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    name_.emplace_back("kldiv");
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+
+    CHECK_NOTNULL(label_);
+    Common::check_elements_interval_closed(label_, 0.0f, 1.0f, num_data_, GetName()[0].c_str());
+    Log::Info("[%s:%s]: (metric) labels passed interval [0, 1] check",  GetName()[0].c_str(), __func__);
+
+    if (weights_ == nullptr) {
+      sum_weights_ = static_cast<double>(num_data_);
+    } else {
+      float minw;
+      Common::obtain_min_max_sum(weights_, num_data_, &minw, nullptr, &sum_weights_);
+      if (minw < 0.0f) {
+        Log::Fatal("[%s:%s]: (metric) at least one weight is negative", GetName()[0].c_str(), __func__);
+      }
+    }
+
+    // check weight sum
+    if (sum_weights_ <= 0.0f) {
+      Log::Fatal("[%s:%s]: sum-of-weights = %f is non-positive", GetName()[0].c_str(), __func__, sum_weights_);
+    }
+
+    Log::Info("[%s:%s]: sum-of-weights = %f", GetName()[0].c_str(), __func__, sum_weights_);
+
+    // evaluate offset term
+    presum_label_entropy_ = 0.0f;
+    if (weights_ == nullptr) {
+    //  #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data; ++i) {
+        presum_label_entropy_ += YentLoss(label_[i]);
+      }
+    } else {
+    //  #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data; ++i) {
+        presum_label_entropy_ += YentLoss(label_[i]) * weights_[i];
+      }
+    }
+    presum_label_entropy_ /= sum_weights_;
+
+    // communicate the value of the offset term to be added
+    Log::Info("%s offset term = %f", GetName()[0].c_str(), presum_label_entropy_);
+  }
+
+  std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
+    double sum_loss = 0.0f;
+    if (objective == nullptr) {
+      if (weights_ == nullptr) {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          sum_loss += XentLoss(label_[i], score[i]); // NOTE: does not work unless score is a probability
+        }
+      } else {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          sum_loss += XentLoss(label_[i], score[i]) * weights_[i]; // NOTE: does not work unless score is a probability
+        }
+      }
+    } else {
+      if (weights_ == nullptr) {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double p = 0;
+          objective->ConvertOutput(&score[i], &p);
+          sum_loss += XentLoss(label_[i], p);
+        }
+      } else {
+        #pragma omp parallel for schedule(static) reduction(+:sum_loss)
+        for (data_size_t i = 0; i < num_data_; ++i) {
+          double p = 0;
+          objective->ConvertOutput(&score[i], &p);
+          sum_loss += XentLoss(label_[i], p) * weights_[i];
+        }
+      }
+    }
+    double loss = presum_label_entropy_ + sum_loss / sum_weights_;
+    return std::vector<double>(1, loss);
+  }
+
+  const std::vector<std::string>& GetName() const override {
+    return name_;
+  }
+
+  double factor_to_bigger_better() const override {
+    return -1.0f;
+  }
+
+private:
+  /*! \brief Number of data points */
+  data_size_t num_data_;
+  /*! \brief Pointer to label */
+  const float* label_;
+  /*! \brief Pointer to weights */
+  const float* weights_;
+  /*! \brief Sum of weights */
+  double sum_weights_;
+  /*! \brief Offset term to cross-entropy; precomputed during init */
+  double presum_label_entropy_;
+  /*! \brief Name of this metric */
+  std::vector<std::string> name_;
+};
+
+} // end namespace LightGBM
+
+#endif // end #ifndef LIGHTGBM_METRIC_XENTROPY_METRIC_HPP_
--- a/src/objective/objective_function.cpp
+++ b/src/objective/objective_function.cpp
@@ -3,6 +3,7 @@
 #include "binary_objective.hpp"
 #include "rank_objective.hpp"
 #include "multiclass_objective.hpp"
+#include "xentropy_objective.hpp"

 namespace LightGBM {

@@ -26,6 +27,10 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
    return new MulticlassSoftmax(config);
  } else if (type == std::string("multiclassova")) {
    return new MulticlassOVA(config);
+  } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) {
+    return new CrossEntropy(config);
+  } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) {
+    return new CrossEntropyLambda(config);
  }
  return nullptr;
 }
@@ -51,6 +56,10 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
    return new MulticlassSoftmax(strs);
  } else if (type == std::string("multiclassova")) {
    return new MulticlassOVA(strs);
+  } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) {
+    return new CrossEntropy(strs);
+  } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) {
+    return new CrossEntropyLambda(strs);
  }
  return nullptr;
 }

--- a/src/objective/xentropy_objective.hpp
+++ b/src/objective/xentropy_objective.hpp
+#ifndef LIGHTGBM_OBJECTIVE_XENTROPY_OBJECTIVE_HPP_
+#define LIGHTGBM_OBJECTIVE_XENTROPY_OBJECTIVE_HPP_
+
+#include <LightGBM/utils/common.h>
+
+#include <LightGBM/objective_function.h>
+
+#include <cstring>
+#include <cmath>
+
+/*
+ * Implements gradients and hessians for the following point losses.
+ * Target y is anything in interval [0, 1].
+ *
+ * (1) CrossEntropy; "xentropy";
+ * 
+ * loss(y, p, w) = { -(1-y)*log(1-p)-y*log(p) }*w,
+ * with probability p = 1/(1+exp(-f)), where f is being boosted
+ *
+ * ConvertToOutput: f -> p
+ *
+ * (2) CrossEntropyLambda; "xentlambda"
+ *
+ * loss(y, p, w) = -(1-y)*log(1-p)-y*log(p), 
+ * with p = 1-exp(-lambda*w), lambda = log(1+exp(f)), f being boosted, and w > 0
+ *
+ * ConvertToOutput: f -> lambda
+ *
+ * (1) and (2) are the same if w=1; but outputs still differ.
+ *
+ */
+
+namespace LightGBM {
+/*!
+* \brief Objective function for cross-entropy (with optional linear weights)
+*/
+class CrossEntropy: public ObjectiveFunction {
+public:
+  explicit CrossEntropy(const ObjectiveConfig&) {
+  }
+
+  explicit CrossEntropy(const std::vector<std::string>&) {
+  }
+
+  ~CrossEntropy() {}
+
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+
+    CHECK_NOTNULL(label_);
+    Common::check_elements_interval_closed(label_, 0.0f, 1.0f, num_data_, GetName());
+    Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check",  GetName(), __func__);
+
+    if (weights_ != nullptr) {
+      float minw;
+      double sumw;
+      Common::obtain_min_max_sum(weights_, num_data_, &minw, nullptr, &sumw);
+      if (minw < 0.0f) {
+        Log::Fatal("[%s]: at least one weight is negative.", GetName());
+      }
+      if (sumw == 0.0f) {
+        Log::Fatal("[%s]: sum of weights is zero.", GetName());
+      }
+    }
+    
+  }
+
+  void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
+    if (weights_ == nullptr) {
+      // compute pointwise gradients and hessians with implied unit weights
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        gradients[i] = static_cast<score_t>(z - label_[i]);
+        hessians[i] = static_cast<score_t>(z * (1.0f - z));
+      }
+    } else {
+      // compute pointwise gradients and hessians with given weights
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        gradients[i] = static_cast<score_t>((z - label_[i]) * weights_[i]);
+        hessians[i] = static_cast<score_t>(z * (1.0f - z) * weights_[i]);
+      }
+    }
+  }
+
+  const char* GetName() const override {
+    return "xentropy";
+  }
+
+  // convert score to a probability
+  void ConvertOutput(const double* input, double* output) const override {
+    output[0] = 1.0f / (1.0f + std::exp(-input[0]));
+  }
+
+  std::string ToString() const override {
+    std::stringstream str_buf;
+    str_buf << GetName();
+    return str_buf.str();
+  }
+
+  bool BoostFromAverage() const override { return true; }
+
+private:
+  /*! \brief Number of data points */
+  data_size_t num_data_;
+  /*! \brief Pointer for label */
+  const float* label_;
+  /*! \brief Weights for data */
+  const float* weights_;
+};
+
+/*!
+* \brief Objective function for alternative parameterization of cross-entropy (see top of file for explanation)
+*/
+class CrossEntropyLambda: public ObjectiveFunction {
+public:
+  explicit CrossEntropyLambda(const ObjectiveConfig&) {
+    min_weight_ = max_weight_ = 0.0f;
+  }
+
+  explicit CrossEntropyLambda(const std::vector<std::string>&) {
+  }
+
+  ~CrossEntropyLambda() {}
+
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+
+    CHECK_NOTNULL(label_);
+    Common::check_elements_interval_closed(label_, 0.0f, 1.0f, num_data_, GetName());
+    Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check",  GetName(), __func__);
+
+    if (weights_ != nullptr) {
+
+      Common::obtain_min_max_sum(weights_, num_data_, &min_weight_, &max_weight_, nullptr);
+      if (min_weight_ <= 0.0f) {
+        Log::Fatal("[%s]: at least one weight is non-positive.", GetName());
+      }
+
+      // Issue an info statement about this ratio
+      double weight_ratio = max_weight_ / min_weight_;
+      Log::Info("[%s:%s]: min, max weights = %f, %f; ratio = %f", 
+                GetName(), __func__,
+                min_weight_, max_weight_,
+                weight_ratio);
+    } else {
+      // all weights are implied to be unity; no need to do anything 
+    }
+  }
+
+  void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
+    if (weights_ == nullptr) {
+      // compute pointwise gradients and hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        gradients[i] = static_cast<score_t>(z - label_[i]);
+        hessians[i] = static_cast<score_t>(z * (1.0f - z));
+      }
+    } else {
+      // compute pointwise gradients and hessians with given weights
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double w = weights_[i];
+        const double y = label_[i];
+        const double epf = std::exp(score[i]);
+        const double hhat = std::log(1.0f + epf);
+        const double z = 1.0f - std::exp(-w*hhat);
+        const double enf = 1.0f / epf; // = std::exp(-score[i]);
+        gradients[i] = static_cast<score_t>((1.0f - y / z) * w / (1.0f + enf));
+        const double c = 1.0f / (1.0f - z);
+        double d = 1.0f + epf;
+        const double a = w * epf / (d * d);
+        d = c - 1.0f;
+        const double b = (c / (d * d) ) * (1.0f + w * epf - c);
+        hessians[i] = static_cast<score_t>(a * (1.0f + y * b));
+      }
+    }
+  }
+
+  const char* GetName() const override {
+    return "xentlambda";
+  }
+
+  //
+  // ATTENTION: the function output is the "normalized exponential parameter" lambda > 0, not the probability
+  //
+  // If this code would read: output[0] = 1.0f / (1.0f + std::exp(-input[0]));
+  // The output would still not be the probability unless the weights are unity.
+  //
+  // Let z = 1 / (1 + exp(-f)), then prob(z) = 1-(1-z)^w, where w is the weight for the specific point.
+  //
+
+  void ConvertOutput(const double* input, double* output) const override {
+    output[0] = std::log(1.0f + std::exp(input[0]));
+  }
+
+  std::string ToString() const override {
+    std::stringstream str_buf;
+    str_buf << GetName();
+    return str_buf.str();
+  }
+
+  // might want to boost from a weighted average in general, if possible
+  bool BoostFromAverage() const override { return true; }
+
+private:
+  /*! \brief Number of data points */
+  data_size_t num_data_;
+  /*! \brief Pointer for label */
+  const float* label_;
+  /*! \brief Weights for data */
+  const float* weights_;
+  /*! \brief Minimum weight found during init */
+  float min_weight_;
+  /*! \brief Maximum weight found during init */
+  float max_weight_;
+};
+
+}  // end namespace LightGBM
+
+#endif   // end #ifndef LIGHTGBM_OBJECTIVE_XENTROPY_OBJECTIVE_HPP_