common.js 2.16 KB
Newer Older
1
2
3
4
5
6
7
8
9
import { check, randomSeed } from 'k6';
import http from 'k6/http';
import { Trend, Counter } from 'k6/metrics';
import { randomItem } from 'https://jslib.k6.io/k6-utils/1.2.0/index.js';

const seed = 0;

const host = __ENV.HOST || '127.0.0.1:8000';
const timePerToken = new Trend('time_per_token', true);
Nicolas Patry's avatar
Nicolas Patry committed
10
11
12
const tokens = new Counter('tokens');
const new_tokens = new Counter('new_tokens');
const input_tokens = new Counter('input_tokens');
13
14
15
16
17
18
19
20
21
22
23

randomSeed(seed);
// const shareGPT = JSON.parse(open("ShareGPT_V3_unfiltered_cleaned_split.json"))
const shareGPT = JSON.parse(open("small.json"))


export function get_options(reference_latency_ms){
    return {
        thresholds: {
            http_req_failed: ['rate==0'],
            time_per_token: [{
Nicolas Patry's avatar
Nicolas Patry committed
24
                threshold: `p(50)<${5 * reference_latency_ms}`,
25
26
27
28
29
30
31
32
                abortOnFail: true,
                delayAbortEval: '10s'
            }],
        },
        scenarios: {
            load_test: {
                executor: 'constant-arrival-rate',
                duration: '60s',
Nicolas Patry's avatar
Nicolas Patry committed
33
                preAllocatedVUs: 10,
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
                rate: 10,
                timeUnit: '1s',
            },
        },
    };
}


export function run(host, generate_payload, max_new_tokens) {
    const headers = {'Content-Type': 'application/json'};
    const query = randomItem(shareGPT);
    const payload = JSON.stringify(generate_payload(query));
    const res = http.post(`http://${host}/generate`, payload, {
        headers,
    });
    if(res.status >= 400 && res.status < 500){
        return;
    }

Nicolas Patry's avatar
Nicolas Patry committed
53

54
55
56
    check(res, {
        'Post status is 200': (r) => res.status === 200,
    });
Nicolas Patry's avatar
Nicolas Patry committed
57
    const duration = res.timings.duration;
58
59

    if (res.status === 200) {
OlivierDehaene's avatar
OlivierDehaene committed
60
        const body = res.json();
Nicolas Patry's avatar
Nicolas Patry committed
61
62
        const n_tokens = body.details.tokens.length;
        const latency_ms_per_token = duration / n_tokens;
63
64
65
        timePerToken.add(latency_ms_per_token);
        const latency_in_s = latency_ms_per_token / 1000;
        const individual_throughput = 1 / latency_in_s;
Nicolas Patry's avatar
Nicolas Patry committed
66
67
68
69
        const _input_tokens = body.details.prefill.length;
        tokens.add(n_tokens + _input_tokens);
        input_tokens.add(_input_tokens);
        new_tokens.add(n_tokens);
70
71
    }
}