Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
dd5f2b27
Commit
dd5f2b27
authored
Nov 05, 2016
by
Guolin Ke
Browse files
add dataset_loader.h
parent
1c08e71e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
74 additions
and
0 deletions
+74
-0
include/LightGBM/dataset_loader.h
include/LightGBM/dataset_loader.h
+74
-0
No files found.
include/LightGBM/dataset_loader.h
0 → 100644
View file @
dd5f2b27
#ifndef LIGHTGBM_DATASET_LOADER_H_
#define LIGHTGBM_DATASET_LOADER_H_
#include <LightGBM/dataset.h>
namespace
LightGBM
{
class
DatasetLoader
{
public:
DatasetLoader
(
const
IOConfig
&
io_config
,
const
PredictFunction
&
predict_fun
);
~
DatasetLoader
();
void
SetHeadder
(
const
char
*
filename
);
Dataset
*
LoadFromFile
(
const
char
*
filename
,
int
rank
,
int
num_machines
);
Dataset
*
LoadFromFile
(
const
char
*
filename
)
{
LoadFromFile
(
filename
,
0
,
1
);
}
Dataset
*
LoadFromFileLikeOthers
(
const
char
*
filename
,
const
Dataset
*
other
);
Dataset
*
LoadFromBinFile
(
const
char
*
bin_filename
,
int
rank
,
int
num_machines
);
/*! \brief Disable copy */
DatasetLoader
&
operator
=
(
const
DatasetLoader
&
)
=
delete
;
/*! \brief Disable copy */
DatasetLoader
(
const
DatasetLoader
&
)
=
delete
;
private:
void
CheckDataset
(
const
Dataset
*
dataset
);
std
::
vector
<
std
::
string
>
LoadTextDataToMemory
(
const
char
*
filename
,
const
Metadata
&
metadata
,
int
rank
,
int
num_machines
,
int
*
num_global_data
,
std
::
vector
<
data_size_t
>*
used_data_indices
);
std
::
vector
<
std
::
string
>
SampleTextDataFromMemory
(
const
std
::
vector
<
std
::
string
>&
data
);
std
::
vector
<
std
::
string
>
SampleTextDataFromFile
(
const
char
*
filename
,
const
Metadata
&
metadata
,
int
rank
,
int
num_machines
,
int
*
num_global_data
,
std
::
vector
<
data_size_t
>*
used_data_indices
);
void
ConstructBinMappersFromTextData
(
int
rank
,
int
num_machines
,
const
std
::
vector
<
std
::
string
>&
sample_data
,
const
Parser
*
parser
,
Dataset
*
dataset
);
/*! \brief Extract local features from memory */
void
ExtractFeaturesFromMemory
(
std
::
vector
<
std
::
string
>&
text_data
,
const
Parser
*
parser
,
Dataset
*
dataset
);
/*! \brief Extract local features from file */
void
ExtractFeaturesFromFile
(
const
char
*
filename
,
const
Parser
*
parser
,
const
std
::
vector
<
data_size_t
>&
used_data_indices
,
Dataset
*
dataset
);
/*! \brief Check can load from binary file */
bool
CheckCanLoadFromBin
(
const
char
*
filename
);
const
IOConfig
&
io_config_
;
/*! \brief Random generator*/
Random
random_
;
/*! \brief prediction function for initial model */
const
PredictFunction
&
predict_fun_
;
/*! \brief index of label column */
int
label_idx_
=
0
;
/*! \brief index of weight column */
int
weight_idx_
=
-
1
;
/*! \brief index of group column */
int
group_idx_
=
-
1
;
/*! \brief Mapper from real feature index to used index*/
std
::
unordered_set
<
int
>
ignore_features_
;
/*! \brief store feature names */
std
::
vector
<
std
::
string
>
feature_names_
;
};
}
#endif // LIGHTGBM_DATASET_LOADER_H_
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment