update travis, clean code

a44bc1ca · Guolin Ke · 41c0370b · a44bc1ca · a44bc1ca · a44bc1ca
Commit a44bc1ca authored Nov 09, 2016 by Guolin Ke
5 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,14 +5,22 @@ dist: trusty
 before_install:
 - test -n $CC  && unset CC
 - test -n $CXX && unset CXX
+- wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+- chmod +x conda.sh
+- bash conda.sh -b -p $HOME/miniconda
+- export PATH="$HOME/miniconda/bin:$PATH"
+- conda config --set always_yes yes --set changeps1 no
+- conda update -q conda

 install:
- sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential
+- sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential 
+- conda install --yes atlas numpy scipy scikit-learn


 script:
 - cd $TRAVIS_BUILD_DIR
 - mkdir build && cd build && cmake .. && make -j
+- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
 - cd $TRAVIS_BUILD_DIR
 - rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j


--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -26,10 +26,14 @@
 typedef void* DatesetHandle;
 typedef void* BoosterHandle;

-#define dtype_float32 (0)
-#define dtype_float64 (1)
-#define dtype_int32   (2)
-#define dtype_int64   (3)
+#define C_API_DTYPE_FLOAT32 (0)
+#define C_API_DTYPE_FLOAT64 (1)
+#define C_API_DTYPE_INT32   (2)
+#define C_API_DTYPE_INT64   (3)
+
+#define C_API_PREDICT_NORMAL     (0)
+#define C_API_PREDICT_RAW_SCORE  (1)
+#define C_API_PREDICT_LEAF_INDEX (2)

 /*!
 * \brief get string message of the last error

--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -45,7 +45,7 @@ public:
  * \brief Training logic
  * \param gradient nullptr for using default objective, otherwise use self-defined boosting
  * \param hessian nullptr for using default objective, otherwise use self-defined boosting
-  * \param is_eval true if need evalulation or early stop
+  * \param is_eval true if need evaluation or early stop
  * \return True if meet early stopping or cannot boosting
  */
  bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;

--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -106,12 +106,12 @@ public:
    if (predictor_ != nullptr) { delete predictor_; }
    bool is_predict_leaf = false;
    bool is_raw_score = false;
-    if (predict_type == 2) {
+    if (predict_type == C_API_PREDICT_LEAF_INDEX) {
      is_predict_leaf = true;
-    } else if (predict_type == 1) {
-      is_raw_score = false;
-    } else {
+    } else if (predict_type == C_API_PREDICT_RAW_SCORE) {
      is_raw_score = true;
+    } else {
+      is_raw_score = false;
    }
    predictor_ = new Predictor(boosting_, is_raw_score, is_predict_leaf);
  }
@@ -362,9 +362,9 @@ DllExport int LGBM_DatasetSetField(DatesetHandle handle,
  int type) {
  auto dataset = reinterpret_cast<Dataset*>(handle);
  bool is_success = false;
-  if (type == dtype_float32) {
+  if (type == C_API_DTYPE_FLOAT32) {
    is_success = dataset->SetFloatField(field_name, reinterpret_cast<const float*>(field_data), static_cast<int32_t>(num_element));
-  } else if (type == dtype_int32) {
+  } else if (type == C_API_DTYPE_INT32) {
    is_success = dataset->SetIntField(field_name, reinterpret_cast<const int*>(field_data), static_cast<int32_t>(num_element));
  }
  if (is_success) { return 0; }
@@ -378,10 +378,10 @@ DllExport int LGBM_DatasetGetField(DatesetHandle handle,
  int* out_type) {
  auto dataset = reinterpret_cast<Dataset*>(handle);
  if (dataset->GetFloatField(field_name, out_len, reinterpret_cast<const float**>(out_ptr))) {
-    *out_type = dtype_float32;
+    *out_type = C_API_DTYPE_FLOAT32;
    return 0;
  } else if (dataset->GetIntField(field_name, out_len, reinterpret_cast<const int**>(out_ptr))) {
-    *out_type = dtype_int32;
+    *out_type = C_API_DTYPE_INT32;
    return 0;
  }
  return -1;
@@ -582,7 +582,7 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,

 std::function<std::vector<double>(int row_idx)>
 RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major) {
-  if (data_type == dtype_float32) {
+  if (data_type == C_API_DTYPE_FLOAT32) {
    const float* data_ptr = reinterpret_cast<const float*>(data);
    if (is_row_major) {
      return [data_ptr, num_col, num_row](int row_idx) {
@@ -604,7 +604,7 @@ RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_
        return ret;
      };
    }
-  } else if (data_type == dtype_float64) {
+  } else if (data_type == C_API_DTYPE_FLOAT64) {
    const double* data_ptr = reinterpret_cast<const double*>(data);
    if (is_row_major) {
      return [data_ptr, num_col, num_row](int row_idx) {
@@ -634,61 +634,27 @@ RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_

 std::function<std::vector<std::pair<int, double>>(int row_idx)>
 RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major) {
-  if (data_type == dtype_float32) {
-    const float* data_ptr = reinterpret_cast<const float*>(data);
-    if (is_row_major) {
-      return [data_ptr, num_col, num_row](int row_idx) {
-        CHECK(row_idx < num_row);
-        std::vector<std::pair<int, double>> ret;
-        auto tmp_ptr = data_ptr + num_col * row_idx;
-        for (int i = 0; i < num_col; ++i) {
-          ret.emplace_back(i, static_cast<double>(*(tmp_ptr + i)));
+  auto inner_function = RowFunctionFromDenseMatric(data, num_row, num_col, data_type, is_row_major);
+  if (inner_function != nullptr) {
+    return [inner_function](int row_idx) {
+      auto raw_values = inner_function(row_idx);
+      std::vector<std::pair<int, double>> ret;
+      for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) {
+        if (std::fabs(raw_values[i]) > 1e-15) {
+          ret.emplace_back(i, raw_values[i]);
        }
-        return ret;
-      };
-    } else {
-      return [data_ptr, num_col, num_row](int row_idx) {
-        CHECK(row_idx < num_row);
-        std::vector<std::pair<int, double>> ret;
-        for (int i = 0; i < num_col; ++i) {
-          ret.emplace_back(i, static_cast<double>(*(data_ptr + num_row * i + row_idx)));
-        }
-        return ret;
-      };
-    }
-  } else if (data_type == dtype_float64) {
-    const double* data_ptr = reinterpret_cast<const double*>(data);
-    if (is_row_major) {
-      return [data_ptr, num_col, num_row](int row_idx) {
-        CHECK(row_idx < num_row);
-        std::vector<std::pair<int, double>> ret;
-        auto tmp_ptr = data_ptr + num_col * row_idx;
-        for (int i = 0; i < num_col; ++i) {
-          ret.emplace_back(i, static_cast<double>(*(tmp_ptr + i)));
-        }
-        return ret;
-      };
-    } else {
-      return [data_ptr, num_col, num_row](int row_idx) {
-        CHECK(row_idx < num_row);
-        std::vector<std::pair<int, double>> ret;
-        for (int i = 0; i < num_col; ++i) {
-          ret.emplace_back(i, static_cast<double>(*(data_ptr + num_row * i + row_idx)));
-        }
-        return ret;
-      };
-    }
-  } else {
-    Log::Fatal("unknown data type in RowPairFunctionFromDenseMatric");
+      }
+      return ret;
+    };
  }
  return nullptr;
 }

 std::function<std::vector<std::pair<int, double>>(int idx)>
 RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, const void* data, int data_type, int64_t nindptr, int64_t nelem) {
-  if (data_type == dtype_float32) {
+  if (data_type == C_API_DTYPE_FLOAT32) {
    const float* data_ptr = reinterpret_cast<const float*>(data);
-    if (indptr_type == dtype_int32) {
+    if (indptr_type == C_API_DTYPE_INT32) {
      const int32_t* ptr_indptr = reinterpret_cast<const int32_t*>(indptr);
      return [ptr_indptr, indices, data_ptr, nindptr, nelem](int idx) {
        CHECK(idx + 1 < nindptr);
@@ -701,7 +667,7 @@ RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
        }
        return ret;
      };
-    } else if (indptr_type == dtype_int64) {
+    } else if (indptr_type == C_API_DTYPE_INT64) {
      const int64_t* ptr_indptr = reinterpret_cast<const int64_t*>(indptr);
      return [ptr_indptr, indices, data_ptr, nindptr, nelem](int idx) {
        CHECK(idx + 1 < nindptr);
@@ -717,9 +683,9 @@ RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
    } else {
      Log::Fatal("unknown data type in RowFunctionFromCSR");
    }
-  } else if (data_type == dtype_float64) {
+  } else if (data_type == C_API_DTYPE_FLOAT64) {
    const double* data_ptr = reinterpret_cast<const double*>(data);
-    if (indptr_type == dtype_int32) {
+    if (indptr_type == C_API_DTYPE_INT32) {
      const int32_t* ptr_indptr = reinterpret_cast<const int32_t*>(indptr);
      return [ptr_indptr, indices, data_ptr, nindptr, nelem](int idx) {
        CHECK(idx + 1 < nindptr);
@@ -732,7 +698,7 @@ RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
        }
        return ret;
      };
-    } else if (indptr_type == dtype_int64) {
+    } else if (indptr_type == C_API_DTYPE_INT64) {
      const int64_t* ptr_indptr = reinterpret_cast<const int64_t*>(indptr);
      return [ptr_indptr, indices, data_ptr, nindptr, nelem](int idx) {
        CHECK(idx + 1 < nindptr);
@@ -756,9 +722,9 @@ RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,

 std::function<std::vector<std::pair<int, double>>(int idx)>
 ColumnFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indices, const void* data, int data_type, int64_t ncol_ptr, int64_t nelem) {
-  if (data_type == dtype_float32) {
+  if (data_type == C_API_DTYPE_FLOAT32) {
    const float* data_ptr = reinterpret_cast<const float*>(data);
-    if (col_ptr_type == dtype_int32) {
+    if (col_ptr_type == C_API_DTYPE_INT32) {
      const int32_t* ptr_col_ptr = reinterpret_cast<const int32_t*>(col_ptr);
      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
        CHECK(idx + 1 < ncol_ptr);
@@ -771,7 +737,7 @@ ColumnFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indi
        }
        return ret;
      };
-    } else if (col_ptr_type == dtype_int64) {
+    } else if (col_ptr_type == C_API_DTYPE_INT64) {
      const int64_t* ptr_col_ptr = reinterpret_cast<const int64_t*>(col_ptr);
      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
        CHECK(idx + 1 < ncol_ptr);
@@ -787,9 +753,9 @@ ColumnFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indi
    } else {
      Log::Fatal("unknown data type in ColumnFunctionFromCSC");
    }
-  } else if (data_type == dtype_float64) {
+  } else if (data_type == C_API_DTYPE_FLOAT64) {
    const double* data_ptr = reinterpret_cast<const double*>(data);
-    if (col_ptr_type == dtype_int32) {
+    if (col_ptr_type == C_API_DTYPE_INT32) {
      const int32_t* ptr_col_ptr = reinterpret_cast<const int32_t*>(col_ptr);
      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
        CHECK(idx + 1 < ncol_ptr);
@@ -802,7 +768,7 @@ ColumnFunctionFromCSC(const void* col_ptr, int col_ptr_type, const int32_t* indi
        }
        return ret;
      };
-    } else if (col_ptr_type == dtype_int64) {
+    } else if (col_ptr_type == C_API_DTYPE_INT64) {
      const int64_t* ptr_col_ptr = reinterpret_cast<const int64_t*>(col_ptr);
      return [ptr_col_ptr, indices, data_ptr, ncol_ptr, nelem](int idx) {
        CHECK(idx + 1 < ncol_ptr);

--- a/tests/c_api_test/test.py
+++ b/tests/c_api_test/test.py
@@ -7,7 +7,10 @@ import numpy as np
 from scipy import sparse

 def LoadDll():
-    lib_path = '../../windows/x64/DLL/lib_lightgbm.dll'
+    if os.name == 'nt':
+        lib_path = '../../windows/x64/DLL/lib_lightgbm.dll'
+    else:
+        lib_path = '../../lib_lightgbm.so'
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib

@@ -23,7 +26,7 @@ def c_array(ctype, values):
    return (ctype * len(values))(*values)

 def c_str(string):
-    return ctypes.c_char_p(string.encode('utf-8'))
+    return ctypes.c_char_p(string.encode('ascii'))

 def test_load_from_file(filename, reference):
    ref = None
@@ -37,7 +40,7 @@ def test_load_from_file(filename, reference):
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
-    print '#data:%d #feature:%d' %(num_data.value, num_feature.value)
+    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
    return handle

 def test_save_to_binary(handle, filename):
@@ -50,7 +53,7 @@ def test_load_from_binary(filename):
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
-    print '#data:%d #feature:%d' %(num_data.value, num_feature.value)
+    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
    return handle

 def test_load_from_csr(filename, reference):
@@ -77,7 +80,7 @@ def test_load_from_csr(filename, reference):
        len(csr.indptr), 
        len(csr.data),
        csr.shape[1], 
-        ctypes.c_char_p('max_bin=15'), 
+        c_str('max_bin=15'), 
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
@@ -85,7 +88,7 @@ def test_load_from_csr(filename, reference):
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
-    print '#data:%d #feature:%d' %(num_data.value, num_feature.value)
+    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
    return handle

 def test_load_from_csc(filename, reference):
@@ -112,7 +115,7 @@ def test_load_from_csc(filename, reference):
        len(csr.indptr), 
        len(csr.data),
        csr.shape[0], 
-        ctypes.c_char_p('max_bin=15'), 
+        c_str('max_bin=15'), 
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
@@ -120,7 +123,7 @@ def test_load_from_csc(filename, reference):
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
-    print '#data:%d #feature:%d' %(num_data.value, num_feature.value)
+    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
    return handle

 def test_load_from_mat(filename, reference):
@@ -144,7 +147,7 @@ def test_load_from_mat(filename, reference):
        mat.shape[0],
        mat.shape[1],
        1,
-        ctypes.c_char_p('max_bin=15'), 
+        c_str('max_bin=15'), 
        ref, 
        ctypes.byref(handle) )
    num_data = ctypes.c_long()
@@ -152,7 +155,7 @@ def test_load_from_mat(filename, reference):
    num_feature = ctypes.c_long()
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
-    print '#data:%d #feature:%d' %(num_data.value, num_feature.value)
+    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
    return handle
 def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)
@@ -175,14 +178,14 @@ def test_booster():
    name = [c_str('test')]
    booster = ctypes.c_void_p()
    LIB.LGBM_BoosterCreate(train, c_array(ctypes.c_void_p, test), c_array(ctypes.c_char_p, name), 
-        len(test), "app=binary metric=auc num_leaves=31 verbose=0", ctypes.byref(booster))
+        len(test), c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
    is_finished = ctypes.c_int(0)
-    for i in xrange(100):
+    for i in range(100):
        LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
        result = np.array([0.0], dtype=np.float32)
        out_len = ctypes.c_ulong(0)
-        LIB.LGBM_BoosterEval(booster, 1, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
-        print '%d Iteration test AUC %f' %(i, result[0])
+        LIB.LGBM_BoosterEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
+        print ('%d Iteration test AUC %f' %(i, result[0]))
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)