common.cpp 1.3 KB
Newer Older
Max Ryabinin's avatar
Max Ryabinin committed
1
2
3
#include <common.h>
#include <float.h>

4
void quantize_block(const quantize_block_args& args) {
Max Ryabinin's avatar
Max Ryabinin committed
5
6
7
8
9
10
11
12
    // 1. find absmax in block
    // 2. divide input value by absmax to normalize into [-1.0, 1.0]
    // 3. do binary search to find the closest value
    // 4. check minimal distance
    // 5. store index

    // 1. find absmax in block
    float absmax_block = -FLT_MAX;
13
14
    for (long long i = args.block_idx; i < args.block_end; i++)
        absmax_block = fmax(absmax_block, fabs(args.A[i]));
Max Ryabinin's avatar
Max Ryabinin committed
15

16
    args.absmax[args.block_idx / args.blocksize] = absmax_block;
Max Ryabinin's avatar
Max Ryabinin committed
17

18
    for (long long i = args.block_idx; i < args.block_end; i++) {
Max Ryabinin's avatar
Max Ryabinin committed
19
20
        // 2. divide input value by absmax to normalize into [-1.0, 1.0]
        // 3. do binary search to find the closest value
21
22
        float normed_value = args.A[i] / absmax_block;
        long long idx = args.bin_searcher->scalar(normed_value);
Max Ryabinin's avatar
Max Ryabinin committed
23
24
25
26

        // 4. check minimal distance
        // The binary search returns always the value to the left, which might not be the closest value
        if (idx < 255) {
27
28
            float dist_left = fabs(normed_value - (args.code[idx]));
            float dist_right = fabs(normed_value - (args.code[idx + 1]));
Max Ryabinin's avatar
Max Ryabinin committed
29
30
31
32
            if (dist_right < dist_left) { idx += 1; }
        }

        // 5. store index
33
        args.out[i] = (unsigned char) idx;
Max Ryabinin's avatar
Max Ryabinin committed
34
35
    }
}