Commit ab5ab8a7 authored by Qiwei Ye's avatar Qiwei Ye
Browse files

Merge branch 'master' of https://github.com/Microsoft/LightGBM

Conflicts:
	src/io/dataset.cpp
parents 7d25c253 0241d323
......@@ -146,7 +146,7 @@ Tree::Tree(const std::string& str) {
|| key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
|| key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
|| key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0) {
Log::Fatal("tree model string format error");
Log::Fatal("Tree model string format error");
}
Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);
......@@ -164,19 +164,19 @@ Tree::Tree(const std::string& str) {
leaf_depth_ = nullptr;
Common::StringToIntArray(key_vals["split_feature"], ' ',
num_leaves_ - 1, split_feature_real_);
num_leaves_ - 1, split_feature_real_);
Common::StringToDoubleArray(key_vals["split_gain"], ' ',
num_leaves_ - 1, split_gain_);
num_leaves_ - 1, split_gain_);
Common::StringToDoubleArray(key_vals["threshold"], ' ',
num_leaves_ - 1, threshold_);
num_leaves_ - 1, threshold_);
Common::StringToIntArray(key_vals["left_child"], ' ',
num_leaves_ - 1, left_child_);
num_leaves_ - 1, left_child_);
Common::StringToIntArray(key_vals["right_child"], ' ',
num_leaves_ - 1, right_child_);
num_leaves_ - 1, right_child_);
Common::StringToIntArray(key_vals["leaf_parent"], ' ',
num_leaves_ , leaf_parent_);
num_leaves_ , leaf_parent_);
Common::StringToDoubleArray(key_vals["leaf_value"], ' ',
num_leaves_ , leaf_value_);
num_leaves_ , leaf_value_);
}
} // namespace LightGBM
......@@ -21,7 +21,7 @@ public:
explicit BinaryMetric(const MetricConfig& config) {
sigmoid_ = static_cast<score_t>(config.sigmoid);
if (sigmoid_ <= 0.0f) {
Log::Fatal("Sigmoid param %f should greater than zero", sigmoid_);
Log::Fatal("Sigmoid parameter %f should greater than zero", sigmoid_);
}
}
......
......@@ -60,7 +60,7 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
std::vector<data_size_t> label_cnt(label_gain_.size(), 0);
// counts for all labels
for (data_size_t i = 0; i < num_data; ++i) {
if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Fatal("label excel %d", label[i]); }
if (static_cast<size_t>(label[i]) >= label_cnt.size()) { Log::Fatal("Label excel %d", label[i]); }
++label_cnt[static_cast<int>(label[i])];
}
score_t cur_result = 0.0f;
......
......@@ -45,7 +45,7 @@ public:
// get query boundaries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("For NDCG metric, there should be query information");
Log::Fatal("The NDCG metric requires query information");
}
num_queries_ = metadata.num_queries();
// get query weights
......
......@@ -44,7 +44,7 @@ Linkers::Linkers(NetworkConfig config) {
}
}
if (rank_ == -1) {
Log::Fatal("Machine list file doesn't contain local machine");
Log::Fatal("Machine list file doesn't contain the local machine");
}
// construct listener
listener_ = new TcpSocket();
......@@ -53,7 +53,7 @@ Linkers::Linkers(NetworkConfig config) {
for (int i = 0; i < num_machines_; ++i) {
linkers_.push_back(nullptr);
}
// construct communication topo
bruck_map_ = BruckMap::Construct(rank_, num_machines_);
recursive_halving_map_ = RecursiveHalvingMap::Construct(rank_, num_machines_);
......@@ -73,14 +73,14 @@ Linkers::~Linkers() {
}
}
TcpSocket::Finalize();
Log::Info("Network using %f seconds", network_time_ * 1e-3);
Log::Info("Finished linking network in %f seconds", network_time_ * 1e-3);
}
void Linkers::ParseMachineList(const char * filename) {
TextReader<size_t> machine_list_reader(filename, false);
machine_list_reader.ReadAllLines();
if (machine_list_reader.Lines().size() <= 0) {
Log::Fatal("Machine list file:%s doesn't exist", filename);
Log::Fatal("Machine list file %s doesn't exist", filename);
}
for (auto& line : machine_list_reader.Lines()) {
......@@ -95,7 +95,7 @@ void Linkers::ParseMachineList(const char * filename) {
continue;
}
if (client_ips_.size() >= static_cast<size_t>(num_machines_)) {
Log::Warning("The #machine in machine_list is larger than parameter num_machines, the redundant will ignored");
Log::Warning("machine_list size is larger than the parameter num_machines, ignoring redundant entries");
break;
}
str_after_split[0] = Common::Trim(str_after_split[0]);
......@@ -104,17 +104,17 @@ void Linkers::ParseMachineList(const char * filename) {
client_ports_.push_back(atoi(str_after_split[1].c_str()));
}
if (client_ips_.size() != static_cast<size_t>(num_machines_)) {
Log::Warning("The world size is bigger the #machine in machine list, change world size to %d .", client_ips_.size());
Log::Warning("World size is larger than the machine_list size, change world size to %d", client_ips_.size());
num_machines_ = static_cast<int>(client_ips_.size());
}
}
void Linkers::TryBind(int port) {
Log::Info("try to bind port %d.", port);
Log::Info("Trying to bind port %d...", port);
if (listener_->Bind(port)) {
Log::Info("Binding port %d success.", port);
Log::Info("Binding port %d succeeded", port);
} else {
Log::Fatal("Binding port %d failed.", port);
Log::Fatal("Binding port %d failed", port);
}
}
......@@ -192,7 +192,7 @@ void Linkers::Construct() {
if (cur_socket.Connect(client_ips_[out_rank].c_str(), client_ports_[out_rank])) {
break;
} else {
Log::Warning("Connect to rank %d failed, wait for %d milliseconds", out_rank, connect_fail_delay_time);
Log::Warning("Connecting to rank %d failed, waiting for %d milliseconds", out_rank, connect_fail_delay_time);
std::this_thread::sleep_for(std::chrono::milliseconds(connect_fail_delay_time));
}
}
......@@ -217,7 +217,7 @@ bool Linkers::CheckLinker(int rank) {
void Linkers::PrintLinkers() {
for (int i = 0; i < num_machines_; ++i) {
if (CheckLinker(i)) {
Log::Info("Connected to rank %d.", i);
Log::Info("Connected to rank %d", i);
}
}
}
......
......@@ -30,7 +30,7 @@ void Network::Init(NetworkConfig config) {
block_len_ = new int[num_machines_];
buffer_size_ = 1024 * 1024;
buffer_ = new char[buffer_size_];
Log::Info("local rank %d, total number of machines %d", rank_, num_machines_);
Log::Info("Local rank: %d, total number of machines: %d", rank_, num_machines_);
}
void Network::Dispose() {
......
......@@ -60,7 +60,7 @@ public:
TcpSocket() {
sockfd_ = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sockfd_ == INVALID_SOCKET) {
Log::Fatal("Socket construct error");
Log::Fatal("Socket construction error");
return;
}
ConfigSocket();
......@@ -97,7 +97,7 @@ public:
#if defined(_WIN32)
WSADATA wsa_data;
if (WSAStartup(MAKEWORD(2, 2), &wsa_data) == -1) {
Log::Fatal("Socket error: WSAStart up error");
Log::Fatal("Socket error: WSAStartup error");
}
if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) {
WSACleanup();
......@@ -128,7 +128,7 @@ public:
char buffer[512];
// get hostName
if (gethostname(buffer, sizeof(buffer)) == SOCKET_ERROR) {
Log::Fatal("Error code: %d, when getting local host name.", WSAGetLastError());
Log::Fatal("Error code %d, when getting local host name", WSAGetLastError());
}
// push local ip
PIP_ADAPTER_INFO pAdapterInfo;
......@@ -137,7 +137,7 @@ public:
ULONG ulOutBufLen = sizeof(IP_ADAPTER_INFO);
pAdapterInfo = (IP_ADAPTER_INFO *)MALLOC(sizeof(IP_ADAPTER_INFO));
if (pAdapterInfo == NULL) {
Log::Fatal("GetAdaptersinfo error: allocating memory ");
Log::Fatal("GetAdaptersinfo error: allocating memory");
}
// Make an initial call to GetAdaptersInfo to get
// the necessary size into the ulOutBufLen variable
......@@ -145,7 +145,7 @@ public:
FREE(pAdapterInfo);
pAdapterInfo = (IP_ADAPTER_INFO *)MALLOC(ulOutBufLen);
if (pAdapterInfo == NULL) {
Log::Fatal("GetAdaptersinfo error: allocating memory ");
Log::Fatal("GetAdaptersinfo error: allocating memory");
}
}
if ((dwRetVal = GetAdaptersInfo(pAdapterInfo, &ulOutBufLen)) == NO_ERROR) {
......@@ -155,7 +155,7 @@ public:
pAdapter = pAdapter->Next;
}
} else {
Log::Fatal("GetAdaptersinfo error: code %d ", dwRetVal);
Log::Fatal("GetAdaptersinfo error: code %d", dwRetVal);
}
if (pAdapterInfo)
FREE(pAdapterInfo);
......
......@@ -16,7 +16,7 @@ public:
is_unbalance_ = config.is_unbalance;
sigmoid_ = static_cast<score_t>(config.sigmoid);
if (sigmoid_ <= 0.0) {
Log::Fatal("Sigmoid parameter %f :should greater than zero", sigmoid_);
Log::Fatal("Sigmoid parameter %f should be greater than zero", sigmoid_);
}
}
~BinaryLogloss() {}
......@@ -34,10 +34,10 @@ public:
++cnt_negative;
}
}
Log::Info("Number of postive:%d, number of negative:%d", cnt_positive, cnt_negative);
Log::Info("Number of postive: %d, number of negative: %d", cnt_positive, cnt_negative);
// cannot continue if all sample are same class
if (cnt_positive == 0 || cnt_negative == 0) {
Log::Fatal("Input training data only contains one class");
Log::Fatal("Training data only contains one class");
}
// use -1 for negative class, and 1 for positive class
label_val_[0] = -1;
......@@ -47,8 +47,13 @@ public:
label_weights_[1] = 1.0f;
// if using unbalance, change the labels weight
if (is_unbalance_) {
label_weights_[1] = 1.0f;
label_weights_[0] = static_cast<score_t>(cnt_positive) / cnt_negative;
if (cnt_positive > cnt_negative) {
label_weights_[1] = 1.0f;
label_weights_[0] = static_cast<score_t>(cnt_positive) / cnt_negative;
} else {
label_weights_[1] = static_cast<score_t>(cnt_negative) / cnt_positive;
label_weights_[0] = 1.0f;
}
}
}
......
......@@ -16,20 +16,20 @@ public:
:label_int_(nullptr) {
num_class_ = config.num_class;
}
~MulticlassLogloss() {
if (label_int_ != nullptr) { delete[] label_int_; }
if (label_int_ != nullptr) { delete[] label_int_; }
}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
label_ = metadata.label();
weights_ = metadata.weights();
label_int_ = new int[num_data_];
for (int i = 0; i < num_data_; ++i){
label_int_[i] = static_cast<int>(label_[i]);
label_int_[i] = static_cast<int>(label_[i]);
if (label_int_[i] < 0 || label_int_[i] >= num_class_) {
Log::Fatal("Label must be in [0, %d), but find %d in label", num_class_, label_int_[i]);
Log::Fatal("Label must be in [0, %d), but found %d in label", num_class_, label_int_[i]);
}
}
}
......@@ -42,7 +42,7 @@ public:
for (int k = 0; k < num_class_; ++k){
rec[k] = static_cast<double>(score[k * num_data_ + i]);
}
Common::Softmax(&rec);
Common::Softmax(&rec);
for (int k = 0; k < num_class_; ++k) {
score_t p = static_cast<score_t>(rec[k]);
if (label_int_[i] == k) {
......@@ -51,7 +51,7 @@ public:
gradients[k * num_data_ + i] = p;
}
hessians[k * num_data_ + i] = 2.0f * p * (1.0f - p);
}
}
}
} else {
#pragma omp parallel for schedule(static)
......@@ -59,7 +59,7 @@ public:
std::vector<double> rec(num_class_);
for (int k = 0; k < num_class_; ++k){
rec[k] = static_cast<double>(score[k * num_data_ + i]);
}
}
Common::Softmax(&rec);
for (int k = 0; k < num_class_; ++k) {
score_t p = static_cast<score_t>(rec[k]);
......
......@@ -31,7 +31,7 @@ public:
optimize_pos_at_ = config.max_position;
sigmoid_table_ = nullptr;
if (sigmoid_ <= 0.0) {
Log::Fatal("sigmoid param %f should greater than zero", sigmoid_);
Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_);
}
}
~LambdarankNDCG() {
......@@ -47,7 +47,7 @@ public:
// get boundries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("For lambdarank tasks, should have query information");
Log::Fatal("Lambdarank tasks require query information");
}
num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid computation many times
......
......@@ -112,7 +112,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
if (has_ordered_bin_) {
is_data_in_leaf_ = new char[num_data_];
}
Log::Info("Number of data:%d, Number of features:%d", num_data_, num_features_);
Log::Info("Number of data: %d, number of features: %d", num_data_, num_features_);
}
......@@ -142,7 +142,7 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
const SplitInfo& best_leaf_SplitInfo = best_split_per_leaf_[best_leaf];
// cannot split, quit
if (best_leaf_SplitInfo.gain <= 0.0) {
Log::Info("cannot find more split with gain = %f , current #leaves=%d",
Log::Info("No further splits with positive gain, best gain: %f, leaves: %d",
best_leaf_SplitInfo.gain, split + 1);
break;
}
......@@ -266,7 +266,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
if (right_leaf < 0) {
histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
larger_leaf_histogram_array_ = nullptr;
} else if (num_data_in_left_child < num_data_in_right_child) {
smaller_leaf = left_leaf;
larger_leaf = right_leaf;
......
......@@ -40,13 +40,13 @@ public:
double right_sum_hessian;
SplitInfo() {
// initilize with -1 and -inf gain
// initialize with -1 and -inf gain
feature = -1;
gain = kMinScore;
}
inline void Reset() {
// initilize with -1 and -inf gain
// initialize with -1 and -inf gain
feature = -1;
gain = kMinScore;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment