deepspeed_aio_thread.h 1.39 KB
Newer Older
aiss's avatar
aiss committed
1
2
3
4
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0

// DeepSpeed Team
aiss's avatar
aiss committed
5

aiss's avatar
aiss committed
6
/*
aiss's avatar
aiss committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/

#include <condition_variable>
#include <memory>
#include <queue>
#include "deepspeed_py_aio.h"

struct io_op_desc_t {
    const bool _read_op;
    torch::Tensor _buffer;
    int _fd;
    const std::string _filename;
    const long long int _num_bytes;
    torch::Tensor _cpu_buffer;
    torch::Tensor _contiguous_buffer;
    const bool _validate;

    io_op_desc_t(const bool read_op,
                 const torch::Tensor& buffer,
                 const int fd,
                 const char* filename,
                 const long long int num_bytes,
                 const bool validate);

    char* data_ptr() const;
    void fini();
};

struct thread_sync_t {
    std::mutex _mutex;
    std::condition_variable _cond_var;
};

struct deepspeed_aio_thread_t {
    const int _tid;
    deepspeed_aio_config_t& _aio_config;

    std::unique_ptr<struct aio_context> _aio_ctxt;
    std::queue<std::shared_ptr<struct io_op_desc_t>> _work_queue;
    std::queue<std::shared_ptr<struct io_op_desc_t>> _complete_queue;

    bool _time_to_exit;

    struct thread_sync_t _work_sync;
    struct thread_sync_t _complete_sync;

    deepspeed_aio_thread_t(const int tid, deepspeed_aio_config_t& aio_config);

    ~deepspeed_aio_thread_t();

    void run();
};