fmoe_cuda.cpp 2.39 KB
Newer Older
Rick Ho's avatar
Rick Ho committed
1
2
3
4
#include <iostream>
#include <vector>
#include <torch/extension.h>

Rick Ho's avatar
Rick Ho committed
5
// global_exchange
Rick Ho's avatar
Rick Ho committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#ifdef FMOE_USE_NCCL
#include <c10d/ProcessGroupNCCL.hpp>
std::vector<torch::Tensor> _expert_exchange(
        torch::Tensor local_expert_count,
        long n_expert, long n_workers);
std::vector<torch::Tensor> _global_scatter(
        torch::Tensor input_buf,
        torch::Tensor local_expert_count,
        torch::Tensor global_expert_count,
        long batch_size, long n_workers);
std::vector<torch::Tensor> _global_gather(
        torch::Tensor output_buf,
        torch::Tensor local_expert_count,
        torch::Tensor global_expert_count,
        long batch_size, long n_workers);
void _ensure_nccl(c10d::ProcessGroupNCCL& p, torch::Tensor t);
#endif  // FMOE_USE_NCCL

Rick Ho's avatar
Rick Ho committed
24
// local_exchange
25
void _assign_pos(
26
27
28
        torch::Tensor cum_count,
        torch::Tensor gate,
        torch::Tensor pos);
Rick Ho's avatar
Rick Ho committed
29

Rick Ho's avatar
Rick Ho committed
30
// parallel_linear
Rick Ho's avatar
Rick Ho committed
31
32
std::vector<torch::Tensor> _linear_forward(
        torch::Tensor input_buf,
33
        torch::Tensor expert_count,
Rick Ho's avatar
Rick Ho committed
34
        torch::Tensor weight,
35
36
        at::optional<torch::Tensor> bias
        );
Rick Ho's avatar
Rick Ho committed
37
std::vector<torch::Tensor> _linear_backward(
38
39
40
41
42
43
        torch::Tensor grad_output_buf,
        torch::Tensor input_buf,
        torch::Tensor expert_count,
        torch::Tensor weight,
        at::optional<torch::Tensor> bias
        );
Rick Ho's avatar
Rick Ho committed
44

Rick Ho's avatar
Rick Ho committed
45
// balancing
46
torch::Tensor _limit_by_capacity(
Rick Ho's avatar
Rick Ho committed
47
48
        torch::Tensor expert_count, torch::Tensor capacity,
        long n_expert, long n_experts);
49
torch::Tensor _prune_gate_by_capacity(
Rick Ho's avatar
Rick Ho committed
50
51
        torch::Tensor gate_idx, torch::Tensor expert_count,
        long n_expert, long n_worker);
Rick Ho's avatar
Rick Ho committed
52

Rick Ho's avatar
Rick Ho committed
53
54
55
56
57
58
59
60
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
#ifdef FMOE_USE_NCCL
    m.def("expert_exchange", &_expert_exchange, "FastMoE expert exchange (CUDA)");
    m.def("global_scatter", &_global_scatter, "FastMoE global scatter (CUDA)");
    m.def("global_gather", &_global_gather, "FastMoE global gather (CUDA)");
    m.def("ensure_nccl", &_ensure_nccl, "FastMoE ensure torch nccl comm");
#endif

61
    m.def("assign_pos_", &_assign_pos, "FastMoE assign pos by gate(CUDA)");
Rick Ho's avatar
Rick Ho committed
62
63
64

    m.def("linear_forward", &_linear_forward, "FastMoE forward (CUDA)");
    m.def("linear_backward", &_linear_backward, "FastMoE backward (CUDA)");
Rick Ho's avatar
Rick Ho committed
65

Rick Ho's avatar
Rick Ho committed
66
67
    m.def("limit_by_capacity", &_limit_by_capacity, "FastMoE limit experts by capacity(CUDA)");
    m.def("prune_gate_by_capacity", &_prune_gate_by_capacity, "FastMoE prune gate by capacity(CUDA)");
Rick Ho's avatar
Rick Ho committed
68
}