"platforms/cuda/vscode:/vscode.git/clone" did not exist on "ca4c03c362446abaf66b832ddd500ec049b2ec60"
config.h 655 Bytes
Newer Older
1
2
3
#pragma once

#include <cutlass/numeric_types.h>
zhanghj2's avatar
zhanghj2 committed
4
// #include <cutlass/arch/barrier.h>
5
#include <cute/tensor.hpp>
6
#include "defines.h"
7
8
9

using namespace cute;

10
namespace sm90::decode::sparse_fp8 {
11
12
13
14
15

static constexpr int HEAD_DIM_K = 576;
static constexpr int HEAD_DIM_V = 512;
static constexpr int HEAD_DIM_NOPE = HEAD_DIM_V;
static constexpr int HEAD_DIM_ROPE = HEAD_DIM_K - HEAD_DIM_V;
16
static constexpr int QUANT_TILE_SIZE = 128;
17
18
static constexpr int NUM_SCALES = HEAD_DIM_NOPE / QUANT_TILE_SIZE;
static constexpr int NUM_BYTES_PER_TOKEN = HEAD_DIM_NOPE + NUM_SCALES*sizeof(float) + HEAD_DIM_ROPE*sizeof(bf16);
19
static constexpr int PAGE_BLOCK_SIZE = 64;
20

21
}