import os zero_overhead = os.environ.get('VLLM_ZERO_OVERHEAD') == '1' zero_no_thread = os.environ.get('VLLM_ZERO_NO_THREAD') == '1' def is_zero_overhead(): return zero_overhead def is_zero_no_thread(): return zero_no_thread and zero_overhead def UpdateInputTokens(input_tokens, last_sample, indices): global _update_input_tokens_ptr grid = [input_tokens.shape[0], 1, 1] if _update_input_tokens_ptr is None: _update_input_tokens_ptr = _update_input_tokens[grid](last_sample, input_tokens, indices, input_tokens.shape[0]) else: _update_input_tokens_ptr[grid](last_sample, input_tokens, indices, input_tokens.shape[0])