"vllm/vscode:/vscode.git/clone" did not exist on "47532cd9f4bb751955d10989eda2078966deb0aa"
utils.py 661 Bytes
Newer Older
lizhigong's avatar
lizhigong committed
1
2
3
4
5


import os

zero_overhead = os.environ.get('VLLM_ZERO_OVERHEAD') == '1'
6
zero_no_thread = os.environ.get('VLLM_ZERO_NO_THREAD') == '1'
lizhigong's avatar
lizhigong committed
7
8

def is_zero_overhead():
9
10
11
    return zero_overhead

def is_zero_no_thread():
12
13
14
15
16
17
18
19
20
    return zero_no_thread and zero_overhead

def UpdateInputTokens(input_tokens, last_sample, indices):
    global _update_input_tokens_ptr
    grid = [input_tokens.shape[0], 1, 1]
    if _update_input_tokens_ptr is None:
        _update_input_tokens_ptr = _update_input_tokens[grid](last_sample, input_tokens, indices, input_tokens.shape[0])
    else:
        _update_input_tokens_ptr[grid](last_sample, input_tokens, indices, input_tokens.shape[0])