pipeline_reader.h 1.93 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
#ifndef LIGHTGBM_UTILS_PIPELINE_READER_H_
#define LIGHTGBM_UTILS_PIPELINE_READER_H_

#include <LightGBM/utils/log.h>

#include <cstdio>

#include <functional>
#include <thread>
Guolin Ke's avatar
Guolin Ke committed
10
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
11
#include <algorithm>
Guolin Ke's avatar
Guolin Ke committed
12
13
14
15
16
17
18
19
20
21
22
23
24

namespace LightGBM{

/*!
* \brief A pipeline file reader, use 2 threads, one read block from file, the other process the block
*/
class PipelineReader {
public:
  /*!
  * \brief Read data from a file, use pipeline methods
  * \param filename Filename of data
  * \process_fun Process function
  */
Guolin Ke's avatar
Guolin Ke committed
25
  static size_t Read(const char* filename, int skip_bytes, const std::function<size_t (const char*, size_t)>& process_fun) {
Guolin Ke's avatar
Guolin Ke committed
26
27
28
29
30
31
32
33
34
35
36
37
38
    FILE* file;

#ifdef _MSC_VER
    fopen_s(&file, filename, "rb");
#else
    file = fopen(filename, "rb");
#endif
    if (file == NULL) {
      return 0;
    }
    size_t cnt = 0;
    const size_t buffer_size =  16 * 1024 * 1024 ;
    // buffer used for the process_fun
Guolin Ke's avatar
Guolin Ke committed
39
    auto buffer_process = std::vector<char>(buffer_size);
Guolin Ke's avatar
Guolin Ke committed
40
    // buffer used for the file reading
Guolin Ke's avatar
Guolin Ke committed
41
    auto buffer_read = std::vector<char>(buffer_size);
Guolin Ke's avatar
Guolin Ke committed
42
    size_t read_cnt = 0;
Guolin Ke's avatar
Guolin Ke committed
43
44
    if (skip_bytes > 0) {
      // skip first k bytes
Guolin Ke's avatar
Guolin Ke committed
45
      read_cnt = fread(buffer_process.data(), 1, skip_bytes, file);
Guolin Ke's avatar
Guolin Ke committed
46
    }
Guolin Ke's avatar
Guolin Ke committed
47
    // read first block
Guolin Ke's avatar
Guolin Ke committed
48
    read_cnt = fread(buffer_process.data(), 1, buffer_size, file);
Guolin Ke's avatar
Guolin Ke committed
49
50
    size_t last_read_cnt = 0;
    while (read_cnt > 0) {
51
      // start read thread
Guolin Ke's avatar
Guolin Ke committed
52
      std::thread read_worker = std::thread(
Guolin Ke's avatar
Guolin Ke committed
53
54
        [file, &buffer_read, buffer_size, &last_read_cnt] {
        last_read_cnt = fread(buffer_read.data(), 1, buffer_size, file);
Guolin Ke's avatar
Guolin Ke committed
55
56
57
      }
      );
      // start process
Guolin Ke's avatar
Guolin Ke committed
58
      cnt += process_fun(buffer_process.data(), read_cnt);
Guolin Ke's avatar
Guolin Ke committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
      // wait for read thread
      read_worker.join();
      // exchange the buffer
      std::swap(buffer_process, buffer_read);
      read_cnt = last_read_cnt;
    }
    // close file
    fclose(file);
    return cnt;
  }

};

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
74
#endif   // LightGBM_UTILS_PIPELINE_READER_H_