config.h 652 Bytes
Newer Older
1
2
3
4
5
#pragma once

#include <cutlass/numeric_types.h>
#include <cutlass/arch/barrier.h>
#include <cute/tensor.hpp>
6
#include "defines.h"
7
8
9

using namespace cute;

10
namespace sm90::decode::sparse_fp8 {
11
12
13
14
15

static constexpr int HEAD_DIM_K = 576;
static constexpr int HEAD_DIM_V = 512;
static constexpr int HEAD_DIM_NOPE = HEAD_DIM_V;
static constexpr int HEAD_DIM_ROPE = HEAD_DIM_K - HEAD_DIM_V;
16
static constexpr int QUANT_TILE_SIZE = 128;
17
18
static constexpr int NUM_SCALES = HEAD_DIM_NOPE / QUANT_TILE_SIZE;
static constexpr int NUM_BYTES_PER_TOKEN = HEAD_DIM_NOPE + NUM_SCALES*sizeof(float) + HEAD_DIM_ROPE*sizeof(bf16);
19
static constexpr int PAGE_BLOCK_SIZE = 64;
20

21
}