spatial_cuda_layers.h 915 Bytes
Newer Older
aiss's avatar
aiss committed
1
2
3
4
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0

// DeepSpeed Team
aiss's avatar
aiss committed
5
6
7
8
9
10
11

#pragma once

#if __CUDA_ARCH__ >= 530
#define HALF_PRECISION_AVAILABLE = 1
#endif

aiss's avatar
aiss committed
12
#ifdef __HIP_PLATFORM_HCC__
aiss's avatar
aiss committed
13
14
15
#include <hip/hip_cooperative_groups.h>
#else
#include <cooperative_groups.h>
aiss's avatar
aiss committed
16
#endif
aiss's avatar
aiss committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

#include <cuda.h>
#include <cuda_fp16.h>

/*********** Group Norm Kernels, Structs, and Helpers ************/

struct {
    int64_t batch_size;
    int64_t seq_len;
    int64_t channels;
} typedef ChannelsLastProblem;

void launch_opt_bias_add(__half* result,
                         const __half* activation,
                         const __half* bias,
                         const __half* other,
                         const __half* other_bias,
                         int batch_size,
                         int seq_len,
                         int channels,
                         cudaStream_t stream);