pipeline_reader.h 1.8 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
#ifndef LIGHTGBM_UTILS_PIPELINE_READER_H_
#define LIGHTGBM_UTILS_PIPELINE_READER_H_

#include <LightGBM/utils/log.h>

#include <cstdio>

#include <functional>
#include <thread>
Guolin Ke's avatar
Guolin Ke committed
10
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
11
#include <algorithm>
12
#include "file_io.h"
Guolin Ke's avatar
Guolin Ke committed
13
14
15
16
17
18
19
20
21
22
23
24
25

namespace LightGBM{

/*!
* \brief A pipeline file reader, use 2 threads, one read block from file, the other process the block
*/
class PipelineReader {
public:
  /*!
  * \brief Read data from a file, use pipeline methods
  * \param filename Filename of data
  * \process_fun Process function
  */
Guolin Ke's avatar
Guolin Ke committed
26
  static size_t Read(const char* filename, int skip_bytes, const std::function<size_t (const char*, size_t)>& process_fun) {
27
28
    auto reader = VirtualFileReader::Make(filename);
    if (!reader->Init()) {
Guolin Ke's avatar
Guolin Ke committed
29
30
31
32
33
      return 0;
    }
    size_t cnt = 0;
    const size_t buffer_size =  16 * 1024 * 1024 ;
    // buffer used for the process_fun
Guolin Ke's avatar
Guolin Ke committed
34
    auto buffer_process = std::vector<char>(buffer_size);
Guolin Ke's avatar
Guolin Ke committed
35
    // buffer used for the file reading
Guolin Ke's avatar
Guolin Ke committed
36
    auto buffer_read = std::vector<char>(buffer_size);
Guolin Ke's avatar
Guolin Ke committed
37
    size_t read_cnt = 0;
Guolin Ke's avatar
Guolin Ke committed
38
39
    if (skip_bytes > 0) {
      // skip first k bytes
40
      read_cnt = reader->Read(buffer_process.data(), skip_bytes);
Guolin Ke's avatar
Guolin Ke committed
41
    }
Guolin Ke's avatar
Guolin Ke committed
42
    // read first block
43
44
    read_cnt = reader->Read(buffer_process.data(), buffer_size);

Guolin Ke's avatar
Guolin Ke committed
45
46
    size_t last_read_cnt = 0;
    while (read_cnt > 0) {
47
      // start read thread
Guolin Ke's avatar
Guolin Ke committed
48
      std::thread read_worker = std::thread(
49
        [&] {
50
        last_read_cnt = reader->Read(buffer_read.data(), buffer_size);
Guolin Ke's avatar
Guolin Ke committed
51
52
53
      }
      );
      // start process
Guolin Ke's avatar
Guolin Ke committed
54
      cnt += process_fun(buffer_process.data(), read_cnt);
Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
60
61
62
63
64
65
66
67
      // wait for read thread
      read_worker.join();
      // exchange the buffer
      std::swap(buffer_process, buffer_read);
      read_cnt = last_read_cnt;
    }
    return cnt;
  }

};

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
68
#endif   // LightGBM_UTILS_PIPELINE_READER_H_