some refine for c_api (#152)

1. add csc support 2. some data type from float to double

some refine for c_api (#152)
1. add csc support 2. some data type from float to double
72c2d790 · Guolin Ke · GitHub · bd7274ba · 72c2d790 · 72c2d790
Commit 72c2d790 authored Dec 31, 2016 by Guolin Ke Committed by GitHub Dec 31, 2016
8 changed files
--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -96,13 +96,19 @@ public:
  */
  virtual const score_t* GetTrainingScore(int64_t* out_len) = 0;

+  /*!
+  * \brief Get prediction result at data_idx data
+  * \param data_idx 0: training data, 1: 1st validation data
+  * \return out_len lenght of returned score
+  */
+  virtual int64_t GetNumPredictAt(int data_idx) const = 0;
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
  * \param out_len lenght of returned score
  */
-  virtual void GetPredictAt(int data_idx, score_t* result, int64_t* out_len) = 0;
+  virtual void GetPredictAt(int data_idx, double* result, int64_t* out_len) = 0;

  /*!
  * \brief Prediction for one record, not sigmoid transform

--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -370,7 +370,20 @@ DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, c
 DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
  int data_idx,
  int64_t* out_len,
-  float* out_results);
+  double* out_results);
+
+/*!
+* \brief Get number of predict for inner dataset
+this can be used to support customized eval function
+Note:  should pre-allocate memory for out_result, its length is equal to num_class * num_data
+* \param handle handle
+* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
+* \param out_len len of output result
+* \return 0 when succeed, -1 when failure happens
+*/
+DllExport int LGBM_BoosterGetNumPredict(BoosterHandle handle,
+  int data_idx,
+  int64_t* out_len);

 /*!
 * \brief Get prediction for training data and validation data
@@ -385,7 +398,7 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
 DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
  int data_idx,
  int64_t* out_len,
-  float* out_result);
+  double* out_result);

 /*!
 * \brief make prediction for file
@@ -407,6 +420,24 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
  int64_t num_iteration,
  const char* result_filename);

+/*!
+* \brief Get number of prediction
+* \param handle handle
+* \param num_row 
+* \param predict_type
+*          C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
+*          C_API_PREDICT_RAW_SCORE: raw score
+*          C_API_PREDICT_LEAF_INDEX: leaf index
+* \param num_iteration number of iteration for prediction, <= 0 means no limit
+* \param out_len lenght of prediction
+* \return 0 when succeed, -1 when failure happens
+*/
+DllExport int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
+  int64_t num_row,
+  int predict_type,
+  int64_t num_iteration,
+  int64_t* out_len);
+
 /*!
 * \brief make prediction for an new data set
 *        Note:  should pre-allocate memory for out_result, 
@@ -442,7 +473,44 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
  int predict_type,
  int64_t num_iteration,
  int64_t* out_len,
-  float* out_result);
+  double* out_result);
+
+/*!
+* \brief make prediction for an new data set
+*        Note:  should pre-allocate memory for out_result,
+*               for noraml and raw score: its length is equal to num_class * num_data
+*               for leaf index, its length is equal to num_class * num_data * num_iteration
+* \param handle handle
+* \param col_ptr pointer to col headers
+* \param col_ptr_type type of col_ptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
+* \param indices findex
+* \param data fvalue
+* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
+* \param ncol_ptr number of cols in the matrix + 1
+* \param nelem number of nonzero elements in the matrix
+* \param num_row number of rows
+* \param predict_type
+*          C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
+*          C_API_PREDICT_RAW_SCORE: raw score
+*          C_API_PREDICT_LEAF_INDEX: leaf index
+* \param num_iteration number of iteration for prediction, <= 0 means no limit
+* \param out_len len of output result
+* \param out_result used to set a pointer to array, should allocate memory before call this function
+* \return 0 when succeed, -1 when failure happens
+*/
+DllExport int LGBM_BoosterPredictForCSC(BoosterHandle handle,
+  const void* col_ptr,
+  int col_ptr_type,
+  const int32_t* indices,
+  const void* data,
+  int data_type,
+  int64_t ncol_ptr,
+  int64_t nelem,
+  int64_t num_row,
+  int predict_type,
+  int64_t num_iteration,
+  int64_t* out_len,
+  double* out_result);

 /*!
 * \brief make prediction for an new data set
@@ -473,7 +541,7 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
  int predict_type,
  int64_t num_iteration,
  int64_t* out_len,
-  float* out_result);
+  double* out_result);

 /*!
 * \brief save model into file
@@ -497,7 +565,7 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
 DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
  int buffer_len,
  int64_t* out_len,
-  char** out_str);
+  char* out_str);

 /*!
 * \brief Get leaf value 
@@ -510,7 +578,7 @@ DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
 DllExport int LGBM_BoosterGetLeafValue(BoosterHandle handle,
  int tree_idx,
  int leaf_idx,
-  float* out_val);
+  double* out_val);

 /*!
 * \brief Set leaf value
@@ -523,26 +591,7 @@ DllExport int LGBM_BoosterGetLeafValue(BoosterHandle handle,
 DllExport int LGBM_BoosterSetLeafValue(BoosterHandle handle,
  int tree_idx,
  int leaf_idx,
-  float val);
-
-// some help functions used to convert data
-
-std::function<std::vector<double>(int row_idx)>
-RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major);
-
-std::function<std::vector<std::pair<int, double>>(int row_idx)>
-RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major);
-
-std::function<std::vector<std::pair<int, double>>(int idx)>
-RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
-  const void* data, int data_type, int64_t nindptr, int64_t nelem);
-
-std::function<std::vector<std::pair<int, double>>(int idx)>
-ColumnFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indices,
-  const void* data, int data_type, int64_t ncol_ptr, int64_t nelem);
-
-std::vector<double>
-SampleFromOneColumn(const std::vector<std::pair<int, double>>& data, const std::vector<int>& indices);
+  double val);

 #if defined(_MSC_VER)
 // exception handle and error msg

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -324,14 +324,8 @@ public:
    }
  }

-  inline void PushOneColumn(int tid, data_size_t col_idx, const std::vector<std::pair<int, double>>& feature_values) {
-    if (col_idx >= num_total_features_) { return; }
-    int feature_idx = used_feature_map_[col_idx];
-    if (feature_idx >= 0) {
-      for (auto& inner_data : feature_values) {
-        features_[feature_idx]->PushData(tid, inner_data.first, inner_data.second);
-      }
-    }
+  inline int GetInnerFeatureIndex(int col_idx) const {
+    return used_feature_map_[col_idx];
  }

  Dataset* Subset(const data_size_t* used_indices, data_size_t num_used_indices, bool is_enable_sparse) const;
@@ -358,7 +352,7 @@ public:
  * \param i Index for feature
  * \return Pointer of feature
  */
-  inline const Feature* FeatureAt(int i) const { return features_[i].get(); }
+  inline Feature* FeatureAt(int i) const { return features_[i].get(); }

  /*!
  * \brief Get meta data pointer

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -288,10 +288,13 @@ class _InnerPredictor(object):
                lines = tmp_file.readlines()
                nrow = len(lines)
                preds = [float(token) for line in lines for token in line.split('\t')]
-                preds = np.array(preds, dtype=np.float32, copy=False)
+                preds = np.array(preds, dtype=np.float64, copy=False)
        elif isinstance(data, scipy.sparse.csr_matrix):
            preds, nrow = self.__pred_for_csr(data, num_iteration,
                                              predict_type)
+        elif isinstance(data, scipy.sparse.csc_matrix):
+            preds, nrow = self.__pred_for_csc(data, num_iteration,
+                                              predict_type)
        elif isinstance(data, np.ndarray):
            preds, nrow = self.__pred_for_np2d(data, num_iteration,
                                               predict_type)
@@ -319,13 +322,14 @@ class _InnerPredictor(object):
        """
        Get size of prediction result
        """
-        n_preds = self.num_class * nrow
-        if predict_type == C_API_PREDICT_LEAF_INDEX:
-            if num_iteration > 0:
-                n_preds *= min(num_iteration, self.num_total_iteration)
-            else:
-                n_preds *= self.num_total_iteration
-        return n_preds
+        n_preds = ctypes.c_int64(0)
+        _safe_call(_LIB.LGBM_BoosterCalcNumPredict(
+            self.handle,
+            nrow,
+            predict_type,
+            num_iteration,
+            ctypes.byref(n_preds)))
+        return n_preds.value

    def __pred_for_np2d(self, mat, num_iteration, predict_type):
        """
@@ -342,7 +346,7 @@ class _InnerPredictor(object):
        ptr_data, type_ptr_data = c_float_array(data)
        n_preds = self.__get_num_preds(num_iteration, mat.shape[0],
                                       predict_type)
-        preds = np.zeros(n_preds, dtype=np.float32)
+        preds = np.zeros(n_preds, dtype=np.float64)
        out_num_preds = ctypes.c_int64(0)
        _safe_call(_LIB.LGBM_BoosterPredictForMat(
            self.handle,
@@ -354,7 +358,7 @@ class _InnerPredictor(object):
            predict_type,
            num_iteration,
            ctypes.byref(out_num_preds),
-            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
            ))
        if n_preds != out_num_preds.value:
            raise ValueError("Wrong length for predict results")
@@ -366,7 +370,7 @@ class _InnerPredictor(object):
        """
        nrow = len(csr.indptr) - 1
        n_preds = self.__get_num_preds(num_iteration, nrow, predict_type)
-        preds = np.zeros(n_preds, dtype=np.float32)
+        preds = np.zeros(n_preds, dtype=np.float64)
        out_num_preds = ctypes.c_int64(0)

        ptr_indptr, type_ptr_indptr = c_int_array(csr.indptr)
@@ -385,7 +389,38 @@ class _InnerPredictor(object):
            predict_type,
            num_iteration,
            ctypes.byref(out_num_preds),
-            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
+            ))
+        if n_preds != out_num_preds.value:
+            raise ValueError("Wrong length for predict results")
+        return preds, nrow
+
+    def __pred_for_csc(self, csc, num_iteration, predict_type):
+        """
+        Predict for a csc data
+        """
+        nrow = csc.shape[0]
+        n_preds = self.__get_num_preds(num_iteration, nrow, predict_type)
+        preds = np.zeros(n_preds, dtype=np.float64)
+        out_num_preds = ctypes.c_int64(0)
+
+        ptr_indptr, type_ptr_indptr = c_int_array(csc.indptr)
+        ptr_data, type_ptr_data = c_float_array(csc.data)
+
+        _safe_call(_LIB.LGBM_BoosterPredictForCSC(
+            self.handle,
+            ptr_indptr,
+            type_ptr_indptr,
+            csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
+            ptr_data,
+            type_ptr_data,
+            len(csc.indptr),
+            len(csc.data),
+            csc.shape[0],
+            predict_type,
+            num_iteration,
+            ctypes.byref(out_num_preds),
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
            ))
        if n_preds != out_num_preds.value:
            raise ValueError("Wrong length for predict results")
@@ -511,6 +546,8 @@ class _InnerDataset(object):
                ctypes.byref(self.handle)))
        elif isinstance(data, scipy.sparse.csr_matrix):
            self.__init_from_csr(data, params_str, ref_dataset)
+        elif isinstance(data, scipy.sparse.csc_matrix):
+            self.__init_from_csc(data, params_str, ref_dataset)
        elif isinstance(data, np.ndarray):
            self.__init_from_np2d(data, params_str, ref_dataset)
        else:
@@ -541,6 +578,7 @@ class _InnerDataset(object):
                    for j in range(self.predictor.num_class):
                        new_init_score[j * num_data + i] = init_score[i * self.predictor.num_class + j]
                init_score = new_init_score
+            init_score = init_score.astype(dtype=np.float32, copy=False)
            self.set_init_score(init_score)
        elif self.predictor is not None:
            raise TypeError('wrong predictor type {}'.format(type(self.predictor).__name__))
@@ -655,6 +693,30 @@ class _InnerDataset(object):
            ref_dataset,
            ctypes.byref(self.handle)))

+    def __init_from_csc(self, csc, params_str, ref_dataset):
+        """
+        Initialize data from a csc matrix.
+        """
+        if len(csc.indices) != len(csc.data):
+            raise ValueError('Length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
+        self.handle = ctypes.c_void_p()
+
+        ptr_indptr, type_ptr_indptr = c_int_array(csc.indptr)
+        ptr_data, type_ptr_data = c_float_array(csc.data)
+
+        _safe_call(_LIB.LGBM_DatasetCreateFromCSC(
+            ptr_indptr,
+            type_ptr_indptr,
+            csc.indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
+            ptr_data,
+            type_ptr_data,
+            len(csc.indptr),
+            len(csc.data),
+            csc.shape[0],
+            c_str(params_str),
+            ref_dataset,
+            ctypes.byref(self.handle)))
+
    def __del__(self):
        _safe_call(_LIB.LGBM_DatasetFree(self.handle))

@@ -1498,7 +1560,7 @@ class Booster(object):
            self.handle,
            buffer_len,
            ctypes.byref(tmp_out_len),
-            ctypes.byref(ptr_string_buffer)))
+            ptr_string_buffer))
        actual_len = tmp_out_len.value
        '''if buffer length is not long enough, reallocate a buffer'''
        if actual_len > buffer_len:
@@ -1577,13 +1639,13 @@ class Booster(object):
        self.__get_eval_info()
        ret = []
        if self.__num_inner_eval > 0:
-            result = np.array([0.0 for _ in range(self.__num_inner_eval)], dtype=np.float32)
+            result = np.array([0.0 for _ in range(self.__num_inner_eval)], dtype=np.float64)
            tmp_out_len = ctypes.c_int64(0)
            _safe_call(_LIB.LGBM_BoosterGetEval(
                self.handle,
                data_idx,
                ctypes.byref(tmp_out_len),
-                result.ctypes.data_as(ctypes.POINTER(ctypes.c_float))))
+                result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
            if tmp_out_len.value != self.__num_inner_eval:
                raise ValueError("Wrong length of eval results")
            for i in range(self.__num_inner_eval):
@@ -1614,11 +1676,11 @@ class Booster(object):
            else:
                n_preds = self.valid_sets[data_idx - 1].num_data() * self.__num_class
            self.__inner_predict_buffer[data_idx] = \
-                np.array([0.0 for _ in range(n_preds)], dtype=np.float32, copy=False)
+                np.array([0.0 for _ in range(n_preds)], dtype=np.float64, copy=False)
        """avoid to predict many time in one iteration"""
        if not self.__is_predicted_cur_iter[data_idx]:
            tmp_out_len = ctypes.c_int64(0)
-            data_ptr = self.__inner_predict_buffer[data_idx].ctypes.data_as(ctypes.POINTER(ctypes.c_float))
+            data_ptr = self.__inner_predict_buffer[data_idx].ctypes.data_as(ctypes.POINTER(ctypes.c_double))
            _safe_call(_LIB.LGBM_BoosterGetPredict(
                self.handle,
                data_idx,

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -350,7 +350,7 @@ std::string GBDT::OutputMetric(int iter) {

 /*! \brief Get eval result */
 std::vector<double> GBDT::GetEvalAt(int data_idx) const {
-  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
+  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
  std::vector<double> ret;
  if (data_idx == 0) {
    for (auto& sub_metric : training_metrics_) {
@@ -378,8 +378,8 @@ const score_t* GBDT::GetTrainingScore(int64_t* out_len) {
  return train_score_updater_->score();
 }

-void GBDT::GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) {
-  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
+void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
+  CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));

  const score_t* raw_scores = nullptr;
  data_size_t num_data = 0;
@@ -401,18 +401,18 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) {
      }
      Common::Softmax(&tmp_result);
      for (int j = 0; j < num_class_; ++j) {
-        out_result[j * num_data + i] = static_cast<score_t>(tmp_result[j]);
+        out_result[j * num_data + i] = static_cast<double>(tmp_result[j]);
      }
    }
  } else if(sigmoid_ > 0.0f){
 #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
-      out_result[i] = static_cast<score_t>(1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * raw_scores[i])));
+      out_result[i] = static_cast<double>(1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * raw_scores[i])));
    }
  } else {
 #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_data; ++i) {
-      out_result[i] = raw_scores[i];
+      out_result[i] = static_cast<double>(raw_scores[i]);
    }
  }


--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -107,13 +107,21 @@ public:
  */
  virtual const score_t* GetTrainingScore(int64_t* out_len) override;

+  virtual int64_t GetNumPredictAt(int data_idx) const override {
+    CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
+    data_size_t num_data = train_data_->num_data();
+    if (data_idx > 0) {
+      num_data = valid_score_updater_[data_idx - 1]->num_data();
+    }
+    return num_data * num_class_;
+  }
  /*!
  * \brief Get prediction result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
  * \param result used to store prediction result, should allocate memory before call this function
  * \param out_len lenght of returned score
  */
-  void GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) override;
+  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) override;

  /*!
  * \brief Prediction for one record without sigmoid transformation

--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -2,6 +2,7 @@

 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/random.h>
+#include <LightGBM/utils/threading.h>
 #include <LightGBM/c_api.h>
 #include <LightGBM/dataset_loader.h>
 #include <LightGBM/dataset.h>
@@ -17,6 +18,7 @@
 #include <memory>
 #include <stdexcept>
 #include <mutex>
+#include <functional>

 #include "./application/predictor.hpp"
 #include "./boosting/gbdt.h"
@@ -171,7 +173,7 @@ public:
    return Predictor(boosting_.get(), is_raw_score, is_predict_leaf);
  }

-  void GetPredictAt(int data_idx, score_t* out_result, int64_t* out_len) {
+  void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
    boosting_->GetPredictAt(data_idx, out_result, out_len);
  }

@@ -233,6 +235,38 @@ private:

 using namespace LightGBM;

+// some help functions used to convert data
+
+std::function<std::vector<double>(int row_idx)>
+RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major);
+
+std::function<std::vector<std::pair<int, double>>(int row_idx)>
+RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major);
+
+std::function<std::vector<std::pair<int, double>>(int idx)>
+RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
+  const void* data, int data_type, int64_t nindptr, int64_t nelem);
+
+// Row iterator of on column for CSC matrix
+class CSC_RowIterator {
+public:
+  CSC_RowIterator(const void* col_ptr, int col_ptr_type, const int32_t* indices,
+    const void* data, int data_type, int64_t ncol_ptr, int64_t nelem, int col_idx);
+  ~CSC_RowIterator() {}
+  // return value at idx, only can access by ascent order
+  double Get(int idx);
+  // return next non-zero pair, if index < 0, means no more data
+  std::pair<int, double> NextNonZero();
+private:
+  int nonzero_idx_ = 0;
+  int cur_idx_ = -1;
+  double cur_val_ = 0.0f;
+  bool is_end_ = false;
+  std::function<std::pair<int, double>(int idx)> iter_fun_;
+};
+
+// start of c_api functions
+
 DllExport const char* LGBM_GetLastError() {
  return LastErrorMsg();
 }
@@ -382,10 +416,8 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
  IOConfig io_config;
  io_config.Set(param);
  std::unique_ptr<Dataset> ret;
-  auto get_col_fun = ColumnFunctionFromCSC(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem);
  int32_t nrow = static_cast<int32_t>(num_row);
  if (reference == nullptr) {
-    Log::Warning("Construct from CSC format is not efficient");
    // sample data first
    Random rand(io_config.data_random_seed);
    const int sample_cnt = static_cast<int>(nrow < io_config.bin_construct_sample_cnt ? nrow : io_config.bin_construct_sample_cnt);
@@ -393,8 +425,13 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
    std::vector<std::vector<double>> sample_values(ncol_ptr - 1);
 #pragma omp parallel for schedule(guided)
    for (int i = 0; i < static_cast<int>(sample_values.size()); ++i) {
-      auto cur_col = get_col_fun(i);
-      sample_values[i] = SampleFromOneColumn(cur_col, sample_indices);
+      CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
+      for (int j = 0; j < sample_cnt; j++) {
+        auto val = col_it.Get(sample_indices[j]);
+        if (std::fabs(val) > kEpsilon) {
+          sample_values[i].push_back(val);
+        }
+      }
    }
    DatasetLoader loader(io_config, nullptr, 1, nullptr);
    ret.reset(loader.CostructFromSampleData(sample_values, sample_cnt, nrow));
@@ -408,8 +445,17 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
 #pragma omp parallel for schedule(guided)
  for (int i = 0; i < ncol_ptr - 1; ++i) {
    const int tid = omp_get_thread_num();
-    auto one_col = get_col_fun(i);
-    ret->PushOneColumn(tid, i, one_col);
+    int feature_idx = ret->GetInnerFeatureIndex(i);
+    if (feature_idx < 0) { continue; }
+    CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
+    int row_idx = 0;
+    while (row_idx < nrow) {
+      auto pair = col_it.NextNonZero();
+      row_idx = pair.first;
+      // no more data
+      if (row_idx < 0) { break; }
+      ret->FeatureAt(feature_idx)->PushData(tid, row_idx, pair.second);
+    }
  }
  ret->FinishLoad();
  *out = ret.release();
@@ -517,7 +563,6 @@ DllExport int LGBM_DatasetGetNumFeature(DatasetHandle handle,
  API_END();
 }

-
 // ---- start of booster

 DllExport int LGBM_BoosterCreate(const DatasetHandle train_data,
@@ -627,10 +672,7 @@ DllExport int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int64_t* out
  *out_iteration = ref_booster->GetBoosting()->GetCurrentIteration();
  API_END();
 }
-/*!
-* \brief Get number of eval
-* \return total number of eval result
-*/
+
 DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
@@ -638,10 +680,6 @@ DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len)
  API_END();
 }

-/*!
-* \brief Get number of eval
-* \return total number of eval result
-*/
 DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, char** out_strs) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
@@ -649,26 +687,34 @@ DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, c
  API_END();
 }

-
 DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
  int data_idx,
  int64_t* out_len,
-  float* out_results) {
+  double* out_results) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
  auto boosting = ref_booster->GetBoosting();
  auto result_buf = boosting->GetEvalAt(data_idx);
  *out_len = static_cast<int64_t>(result_buf.size());
  for (size_t i = 0; i < result_buf.size(); ++i) {
-    (out_results)[i] = static_cast<float>(result_buf[i]);
+    (out_results)[i] = static_cast<double>(result_buf[i]);
  }
  API_END();
 }

+DllExport int LGBM_BoosterGetNumPredict(BoosterHandle handle,
+  int data_idx,
+  int64_t* out_len) {
+  API_BEGIN();
+  auto boosting = reinterpret_cast<Booster*>(handle)->GetBoosting();
+  *out_len = boosting->GetNumPredictAt(data_idx);
+  API_END();
+}
+
 DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
  int data_idx,
  int64_t* out_len,
-  float* out_result) {
+  double* out_result) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
  ref_booster->GetPredictAt(data_idx, out_result, out_len);
@@ -689,6 +735,30 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
  API_END();
 }

+int GetNumPredOneRow(const Booster* ref_booster, int predict_type, int64_t num_iteration) {
+  int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
+  if (predict_type == C_API_PREDICT_LEAF_INDEX) {
+    int64_t max_iteration = ref_booster->GetBoosting()->GetCurrentIteration();
+    if (num_iteration > 0) {
+      num_preb_in_one_row *= static_cast<int>(std::min(max_iteration, num_iteration));
+    } else {
+      num_preb_in_one_row *= max_iteration;
+    }
+  }
+  return num_preb_in_one_row;
+}
+
+DllExport int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
+  int64_t num_row,
+  int predict_type,
+  int64_t num_iteration,
+  int64_t* out_len) {
+  API_BEGIN();
+  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
+  *out_len = static_cast<int64_t>(num_row * GetNumPredOneRow(ref_booster, predict_type, num_iteration));
+  API_END();
+}
+
 DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
  const void* indptr,
  int indptr_type,
@@ -701,32 +771,70 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
  int predict_type,
  int64_t num_iteration,
  int64_t* out_len,
-  float* out_result) {
+  double* out_result) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
  auto predictor = ref_booster->NewPredictor(static_cast<int>(num_iteration), predict_type);
  auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
-  int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
-  if (predict_type == C_API_PREDICT_LEAF_INDEX) {
-    if (num_iteration > 0) {
-      num_preb_in_one_row *= static_cast<int>(num_iteration);
-    } else {
-      num_preb_in_one_row *= ref_booster->GetBoosting()->NumberOfTotalModel() / num_preb_in_one_row;
-    }
-  }
+  int num_preb_in_one_row = GetNumPredOneRow(ref_booster, predict_type, num_iteration);
  int nrow = static_cast<int>(nindptr - 1);
 #pragma omp parallel for schedule(guided)
  for (int i = 0; i < nrow; ++i) {
    auto one_row = get_row_fun(i);
    auto predicton_result = predictor.GetPredictFunction()(one_row);
    for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
-      out_result[i * num_preb_in_one_row + j] = static_cast<float>(predicton_result[j]);
+      out_result[i * num_preb_in_one_row + j] = static_cast<double>(predicton_result[j]);
    }
  }
  *out_len = nrow * num_preb_in_one_row;
  API_END();
 }

+DllExport int LGBM_BoosterPredictForCSC(BoosterHandle handle,
+  const void* col_ptr,
+  int col_ptr_type,
+  const int32_t* indices,
+  const void* data,
+  int data_type,
+  int64_t ncol_ptr,
+  int64_t nelem,
+  int64_t num_row,
+  int predict_type,
+  int64_t num_iteration,
+  int64_t* out_len,
+  double* out_result) {
+  API_BEGIN();
+  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
+  auto predictor = ref_booster->NewPredictor(static_cast<int>(num_iteration), predict_type);
+  int num_preb_in_one_row = GetNumPredOneRow(ref_booster, predict_type, num_iteration);
+  int ncol = static_cast<int>(ncol_ptr - 1);
+
+  Threading::For<int64_t>(0, num_row,
+    [&predictor, &out_result, num_preb_in_one_row, ncol, col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem]
+  (int, data_size_t start, data_size_t end) {
+    std::vector<CSC_RowIterator> iterators;
+    for (int j = 0; j < ncol; ++j) {
+      iterators.emplace_back(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, j);
+    }
+    std::vector<std::pair<int, double>> one_row;
+    for (int64_t i = start; i < end; ++i) {
+      one_row.clear();
+      for (int j = 0; j < ncol; ++j) {
+        auto val = iterators[j].Get(static_cast<int>(i));
+        if (std::fabs(val) > kEpsilon) {
+          one_row.emplace_back(j, val);
+        }
+      }
+      auto predicton_result = predictor.GetPredictFunction()(one_row);
+      for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
+        out_result[i * num_preb_in_one_row + j] = static_cast<double>(predicton_result[j]);
+      }
+    }
+  });
+  *out_len = num_row * num_preb_in_one_row;
+  API_END();
+}
+
 DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
  const void* data,
  int data_type,
@@ -736,25 +844,18 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
  int predict_type,
  int64_t num_iteration,
  int64_t* out_len,
-  float* out_result) {
+  double* out_result) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
  auto predictor = ref_booster->NewPredictor(static_cast<int>(num_iteration), predict_type);
  auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
-  int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
-  if (predict_type == C_API_PREDICT_LEAF_INDEX) {
-    if (num_iteration > 0) {
-      num_preb_in_one_row *= static_cast<int>(num_iteration);
-    } else {
-      num_preb_in_one_row *= ref_booster->GetBoosting()->NumberOfTotalModel() / num_preb_in_one_row;
-    }
-  }
+  int num_preb_in_one_row = GetNumPredOneRow(ref_booster, predict_type, num_iteration);
 #pragma omp parallel for schedule(guided)
  for (int i = 0; i < nrow; ++i) {
    auto one_row = get_row_fun(i);
    auto predicton_result = predictor.GetPredictFunction()(one_row);
    for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
-      out_result[i * num_preb_in_one_row + j] = static_cast<float>(predicton_result[j]);
+      out_result[i * num_preb_in_one_row + j] = static_cast<double>(predicton_result[j]);
    }
  }
  *out_len = nrow * num_preb_in_one_row;
@@ -773,37 +874,34 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
 DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
  int buffer_len,
  int64_t* out_len,
-  char** out_str) {
+  char* out_str) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
  std::string model = ref_booster->DumpModel();
  *out_len = static_cast<int64_t>(model.size()) + 1;
  if (*out_len <= buffer_len) {
-    std::strcpy(*out_str, model.c_str());
+    std::strcpy(out_str, model.c_str());
  }
  API_END();
 }

-
-
 DllExport int LGBM_BoosterGetLeafValue(BoosterHandle handle,
  int tree_idx,
  int leaf_idx,
-  float* out_val) {
+  double* out_val) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
-  *out_val = static_cast<float>(ref_booster->GetLeafValue(tree_idx, leaf_idx));
+  *out_val = static_cast<double>(ref_booster->GetLeafValue(tree_idx, leaf_idx));
  API_END();
 }

-
 DllExport int LGBM_BoosterSetLeafValue(BoosterHandle handle,
  int tree_idx,
  int leaf_idx,
-  float val) {
+  double val) {
  API_BEGIN();
  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
-  ref_booster->SetLeafValue(tree_idx, leaf_idx, static_cast<double>(val));
+  ref_booster->SetLeafValue(tree_idx, leaf_idx, val);
  API_END();
 }

@@ -929,72 +1027,103 @@ RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
  throw std::runtime_error("unknown data type in RowFunctionFromCSR");
 }

-std::function<std::vector<std::pair<int, double>>(int idx)>
-ColumnFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indices, const void* data, int data_type, int64_t ncol_ptr, int64_t nelem) {
+std::function<std::pair<int, double>(int idx)>
+IterateFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indices, const void* data, int data_type, int64_t ncol_ptr, int64_t nelem, int col_idx) {
+  CHECK(col_idx < ncol_ptr && col_idx >= 0);
  if (data_type == C_API_DTYPE_FLOAT32) {
    const float* data_ptr = reinterpret_cast<const float*>(data);
    if (col_ptr_type == C_API_DTYPE_INT32) {
      const int32_t* ptr_col_ptr = reinterpret_cast<const int32_t*>(col_ptr);
-      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
-        std::vector<std::pair<int, double>> ret;
-        int64_t start = ptr_col_ptr[idx];
-        int64_t end = ptr_col_ptr[idx + 1];
-        for (int64_t i = start; i < end; ++i) {
-          ret.emplace_back(indices[i], data_ptr[i]);
-        }
-        return ret;
+      int64_t start = ptr_col_ptr[col_idx];
+      int64_t end = ptr_col_ptr[col_idx + 1];
+      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem, start, end](int bias) {
+        int64_t i = static_cast<int64_t>(start + bias);
+        if (i >= end) {
+          return std::make_pair(-1, 0.0);
+        }
+        int idx = static_cast<int>(indices[i]);
+        double val = static_cast<double>(data_ptr[i]);
+        return std::make_pair(idx, val);
      };
    } else if (col_ptr_type == C_API_DTYPE_INT64) {
      const int64_t* ptr_col_ptr = reinterpret_cast<const int64_t*>(col_ptr);
-      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
-        std::vector<std::pair<int, double>> ret;
-        int64_t start = ptr_col_ptr[idx];
-        int64_t end = ptr_col_ptr[idx + 1];
-        for (int64_t i = start; i < end; ++i) {
-          ret.emplace_back(indices[i], data_ptr[i]);
-        }
-        return ret;
+      int64_t start = ptr_col_ptr[col_idx];
+      int64_t end = ptr_col_ptr[col_idx + 1];
+      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem, start, end](int bias) {
+        int64_t i = static_cast<int64_t>(start + bias);
+        if (i >= end) {
+          return std::make_pair(-1, 0.0);
+        }
+        int idx = static_cast<int>(indices[i]);
+        double val = static_cast<double>(data_ptr[i]);
+        return std::make_pair(idx, val);
      };
    }
  } else if (data_type == C_API_DTYPE_FLOAT64) {
    const double* data_ptr = reinterpret_cast<const double*>(data);
    if (col_ptr_type == C_API_DTYPE_INT32) {
      const int32_t* ptr_col_ptr = reinterpret_cast<const int32_t*>(col_ptr);
-      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
-        std::vector<std::pair<int, double>> ret;
-        int64_t start = ptr_col_ptr[idx];
-        int64_t end = ptr_col_ptr[idx + 1];
-        for (int64_t i = start; i < end; ++i) {
-          ret.emplace_back(indices[i], data_ptr[i]);
-        }
-        return ret;
+      int64_t start = ptr_col_ptr[col_idx];
+      int64_t end = ptr_col_ptr[col_idx + 1];
+      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem, start, end](int bias) {
+        int64_t i = static_cast<int64_t>(start + bias);
+        if (i >= end) {
+          return std::make_pair(-1, 0.0);
+        }
+        int idx = static_cast<int>(indices[i]);
+        double val = static_cast<double>(data_ptr[i]);
+        return std::make_pair(idx, val);
      };
    } else if (col_ptr_type == C_API_DTYPE_INT64) {
      const int64_t* ptr_col_ptr = reinterpret_cast<const int64_t*>(col_ptr);
-      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
-        std::vector<std::pair<int, double>> ret;
-        int64_t start = ptr_col_ptr[idx];
-        int64_t end = ptr_col_ptr[idx + 1];
-        for (int64_t i = start; i < end; ++i) {
-          ret.emplace_back(indices[i], data_ptr[i]);
-        }
-        return ret;
+      int64_t start = ptr_col_ptr[col_idx];
+      int64_t end = ptr_col_ptr[col_idx + 1];
+      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem, start, end](int bias) {
+        int64_t i = static_cast<int64_t>(start + bias);
+        if (i >= end) {
+          return std::make_pair(-1, 0.0);
+        }
+        int idx = static_cast<int>(indices[i]);
+        double val = static_cast<double>(data_ptr[i]);
+        return std::make_pair(idx, val);
      };
    }
  }
-  throw std::runtime_error("unknown data type in ColumnFunctionFromCSC");
+  throw std::runtime_error("unknown data type in CSC matrix");
+}
+
+CSC_RowIterator::CSC_RowIterator(const void* col_ptr, int col_ptr_type, const int32_t* indices,
+  const void* data, int data_type, int64_t ncol_ptr, int64_t nelem, int col_idx) {
+  iter_fun_ = IterateFunctionFromCSC(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, col_idx);
 }

-std::vector<double> SampleFromOneColumn(const std::vector<std::pair<int, double>>& data, const std::vector<int>& indices) {
-  size_t j = 0;
-  std::vector<double> ret;
-  for (auto row_idx : indices) {
-    while (j < data.size() && data[j].first < static_cast<int>(row_idx)) {
-      ++j;
+double CSC_RowIterator::Get(int idx) {
+  while (idx > cur_idx_ && !is_end_) {
+    auto ret = iter_fun_(nonzero_idx_);
+    if (ret.first < 0) {
+      is_end_ = true;
+      break;
    }
-    if (j < data.size() && data[j].first == static_cast<int>(row_idx)) {
-      ret.push_back(data[j].second);
+    cur_idx_ = ret.first;
+    cur_val_ = ret.second;
+    ++nonzero_idx_;
  }
+  if (idx == cur_idx_) {
+    return cur_val_;
+  } else {
+    return 0.0f;
+  }
+}
+
+std::pair<int, double> CSC_RowIterator::NextNonZero() {
+  if (!is_end_) {
+    auto ret = iter_fun_(nonzero_idx_);
+    ++nonzero_idx_;
+    if (ret.first < 0) {
+      is_end_ = true;
    }
    return ret;
+  } else {
+    return std::make_pair(-1, 0.0);
+  }
 }
--- a/tests/c_api_test/test.py
+++ b/tests/c_api_test/test.py
@@ -175,9 +175,9 @@ def test_booster():
    is_finished = ctypes.c_int(0)
    for i in range(100):
        LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
-        result = np.array([0.0], dtype=np.float32)
+        result = np.array([0.0], dtype=np.float64)
        out_len = ctypes.c_ulong(0)
-        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
+        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
        print ('%d Iteration test AUC %f' %(i, result[0]))
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
@@ -192,7 +192,7 @@ def test_booster():
        data.append( [float(x) for x in line.split('\t')[1:]] )
    inp.close()
    mat = np.array(data)
-    preb = np.zeros(mat.shape[0], dtype=np.float32)
+    preb = np.zeros(mat.shape[0], dtype=np.float64)
    num_preb = ctypes.c_long()
    data = np.array(mat.reshape(mat.size), copy=False)
    LIB.LGBM_BoosterPredictForMat(booster2,