parallel_linear.cu 3.09 KB
Newer Older
1
#include "parallel_linear.cuh"
Rick Ho's avatar
Rick Ho committed
2
3
4
#include "utils/fmoe_utils.h"
#include <torch/extension.h>

5
torch::Tensor _linear_forward(
Rick Ho's avatar
Rick Ho committed
6
        torch::Tensor input_buf,
7
        torch::Tensor expert_count,
Rick Ho's avatar
Rick Ho committed
8
        torch::Tensor weight,
9
        at::optional<torch::Tensor> bias
Rick Ho's avatar
Rick Ho committed
10
11
12
13
14
15
        ) {
    auto smgr = getCudaStreamManager(input_buf.device().index());
    const auto batch_size = input_buf.size(0);
    const auto num_expert = weight.size(0);
    const auto out_feat = weight.size(1);
    const auto in_feat = weight.size(2);
16
17
18

#ifdef MOE_DEBUG
    printf("[forward] expert=%ld, in_feat (d_model)=%ld, out_feat (d_ffn)=%ld\n",
Rick Ho's avatar
Rick Ho committed
19
20
            num_expert, in_feat, out_feat);
#endif
21
22
23
24
25
26
27
28
29
30
31
32

    torch::Tensor output;

    if (bias.has_value()) {
        output = bias.value().repeat_interleave(expert_count.to(bias.value().device()), 0);
    } else{
        auto out_options = torch::TensorOptions()
            .device(input_buf.device())
            .dtype(input_buf.dtype());
        output = torch::empty({batch_size, out_feat}, out_options);
    }

Rick Ho's avatar
Rick Ho committed
33
34
    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16,
            input_buf.scalar_type(), "moe_forward_cuda",
Rick Ho's avatar
Rick Ho committed
35
            ([&] {
36
        fmoe_cuda_linear_forward_impl<scalar_t>(
Rick Ho's avatar
Rick Ho committed
37
38
39
40
            input_buf.data_ptr<scalar_t>(),
            weight.data_ptr<scalar_t>(),
            expert_count.data_ptr<long>(),
            output.data_ptr<scalar_t>(),
41
            bias.has_value(),
Rick Ho's avatar
Rick Ho committed
42
43
44
45
46
47
            in_feat,
            out_feat,
            num_expert,
            smgr
        );
    }));
48

49
    return output;
Rick Ho's avatar
Rick Ho committed
50
51
}

52

Rick Ho's avatar
Rick Ho committed
53
std::vector<torch::Tensor> _linear_backward(
54
55
56
57
58
    torch::Tensor grad_output_buf,
    torch::Tensor input_buf,
    torch::Tensor expert_count,
    torch::Tensor weight,
    at::optional<torch::Tensor> bias
Rick Ho's avatar
Rick Ho committed
59
60
61
62
63
64
65
) {
    auto smgr = getCudaStreamManager(input_buf.device().index());
    const auto batch_size = input_buf.size(0);
    const auto num_expert = weight.size(0);
    const auto out_feat = weight.size(1);
    const auto in_feat = weight.size(2);

66
#ifdef MOE_DEBUG
Rick Ho's avatar
Rick Ho committed
67
68
69
70
71
    printf("[backward] b=%ld, expert=%ld, in_feat (d_model)=%ld, "
            "out_feat (d_ffn)=%ld\n",
            batch_size, num_expert, in_feat, out_feat);
#endif

72
    auto grad_input_buf = grad_output_buf.new_empty({batch_size, in_feat});
Rick Ho's avatar
Rick Ho committed
73
    auto grad_weight = grad_output_buf.new_empty({num_expert, out_feat, in_feat});
74
    auto grad_bias = grad_output_buf.new_empty({num_expert, out_feat});
Rick Ho's avatar
Rick Ho committed
75

Rick Ho's avatar
Rick Ho committed
76
77
    AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16,
            input_buf.scalar_type(), "moe_cuda_backward", ([&] {
78
        fmoe_cuda_linear_backward_impl<scalar_t>(
Rick Ho's avatar
Rick Ho committed
79
80
81
82
83
84
            grad_output_buf.data_ptr<scalar_t>(),
            input_buf.data_ptr<scalar_t>(),
            weight.data_ptr<scalar_t>(),
            expert_count.data_ptr<long>(),
            grad_input_buf.data_ptr<scalar_t>(),
            grad_weight.data_ptr<scalar_t>(),
85
86
            grad_bias.data_ptr<scalar_t>(),
            bias.has_value(),
Rick Ho's avatar
Rick Ho committed
87
88
89
90
91
92
93
94
            batch_size,
            in_feat,
            out_feat,
            num_expert,
            smgr
        );
    }));

95
    return {grad_input_buf, grad_weight, grad_bias};
Rick Ho's avatar
Rick Ho committed
96
97
}