Unverified Commit 17d4e007 authored by shiyu1994's avatar shiyu1994 Committed by GitHub
Browse files

Load initial scores with binary data files in CLI version (#4807)

parent c991b2bc
...@@ -1269,6 +1269,8 @@ The initial score file corresponds with data file line by line, and has per scor ...@@ -1269,6 +1269,8 @@ The initial score file corresponds with data file line by line, and has per scor
And if the name of data file is ``train.txt``, the initial score file should be named as ``train.txt.init`` and placed in the same folder as the data file. And if the name of data file is ``train.txt``, the initial score file should be named as ``train.txt.init`` and placed in the same folder as the data file.
In this case, LightGBM will auto load initial score file if it exists. In this case, LightGBM will auto load initial score file if it exists.
If binary data files exist for raw data file ``train.txt``, for example in the name ``train.txt.bin``, then the initial score file should be named as ``train.txt.bin.init``.
Weight Data Weight Data
~~~~~~~~~~~ ~~~~~~~~~~~
......
...@@ -147,6 +147,9 @@ class Metadata { ...@@ -147,6 +147,9 @@ class Metadata {
queries_[idx] = static_cast<data_size_t>(value); queries_[idx] = static_cast<data_size_t>(value);
} }
/*! \brief Load initial scores from file */
void LoadInitialScore(const std::string& data_filename);
/*! /*!
* \brief Get weights, if not exists, will return nullptr * \brief Get weights, if not exists, will return nullptr
* \return Pointer of weights * \return Pointer of weights
...@@ -223,8 +226,6 @@ class Metadata { ...@@ -223,8 +226,6 @@ class Metadata {
#endif // USE_CUDA_EXP #endif // USE_CUDA_EXP
private: private:
/*! \brief Load initial scores from file */
void LoadInitialScore();
/*! \brief Load wights from file */ /*! \brief Load wights from file */
void LoadWeights(); void LoadWeights();
/*! \brief Load query boundaries from file */ /*! \brief Load query boundaries from file */
......
...@@ -272,6 +272,11 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac ...@@ -272,6 +272,11 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
is_load_from_binary = true; is_load_from_binary = true;
Log::Info("Load from binary file %s", bin_filename.c_str()); Log::Info("Load from binary file %s", bin_filename.c_str());
dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), rank, num_machines, &num_global_data, &used_data_indices)); dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), rank, num_machines, &num_global_data, &used_data_indices));
// checks whether there's a initial score file when loaded from binary data files
// the intial score file should with suffix ".bin.init"
dataset->metadata_.LoadInitialScore(bin_filename);
dataset->device_type_ = config_.device_type; dataset->device_type_ = config_.device_type;
dataset->gpu_device_id_ = config_.gpu_device_id; dataset->gpu_device_id_ = config_.gpu_device_id;
#ifdef USE_CUDA_EXP #ifdef USE_CUDA_EXP
...@@ -338,6 +343,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, ...@@ -338,6 +343,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
} else { } else {
// load data from binary file // load data from binary file
dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), 0, 1, &num_global_data, &used_data_indices)); dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), 0, 1, &num_global_data, &used_data_indices));
// checks whether there's a initial score file when loaded from binary data files
// the intial score file should with suffix ".bin.init"
dataset->metadata_.LoadInitialScore(bin_filename);
} }
// not need to check validation data // not need to check validation data
// check meta data // check meta data
......
...@@ -29,7 +29,7 @@ void Metadata::Init(const char* data_filename) { ...@@ -29,7 +29,7 @@ void Metadata::Init(const char* data_filename) {
LoadQueryBoundaries(); LoadQueryBoundaries();
LoadWeights(); LoadWeights();
LoadQueryWeights(); LoadQueryWeights();
LoadInitialScore(); LoadInitialScore(data_filename_);
} }
Metadata::~Metadata() { Metadata::~Metadata() {
...@@ -418,10 +418,10 @@ void Metadata::LoadWeights() { ...@@ -418,10 +418,10 @@ void Metadata::LoadWeights() {
weight_load_from_file_ = true; weight_load_from_file_ = true;
} }
void Metadata::LoadInitialScore() { void Metadata::LoadInitialScore(const std::string& data_filename) {
num_init_score_ = 0; num_init_score_ = 0;
std::string init_score_filename(data_filename_); std::string init_score_filename(data_filename);
init_score_filename = std::string(data_filename_); init_score_filename = std::string(data_filename);
// default init_score file name // default init_score file name
init_score_filename.append(".init"); init_score_filename.append(".init");
TextReader<size_t> reader(init_score_filename.c_str(), false); TextReader<size_t> reader(init_score_filename.c_str(), false);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment