from enum import Enum import os zero_overhead = os.environ.get('VLLM_ZERO_OVERHEAD') == '1' zero_no_thread = os.environ.get('VLLM_ZERO_NO_THREAD') == '1' def is_zero_overhead(): return zero_overhead def is_zero_no_thread(): return zero_no_thread and zero_overhead class SpecStepKind(Enum): KIND_DEFAULT = 0 PREFILL = 1 FIRST_PROPOSAL = 2 OTHER_PROPOSAL = 3 SCORE_DECODE = 4 class ZeroOverheadSpecContext(): def __init__(self): self.step_kind = SpecStepKind.KIND_DEFAULT self.last_step = SpecStepKind.KIND_DEFAULT self.proposal_lens_list = None self.proposal_token_ids = None self.accepted_token_ids = None self.accepted_seq_ids = None spec_context = ZeroOverheadSpecContext() def set_spec_step(_step): global spec_context spec_context.last_step = spec_context.step_kind spec_context.step_kind = _step def get_spec_step(): return spec_context.step_kind def get_spec_last_step(): return spec_context.last_step def record_proposal_lens_list(list): global spec_context spec_context.proposal_lens_list = list def get_proposal_lens_list(): return spec_context.proposal_lens_list def record_proposal_token_ids(tensor): global spec_context spec_context.proposal_token_ids = tensor def get_proposal_token_ids(): return spec_context.proposal_token_ids def record_accepted_token_ids(tensor, seq_ids): global spec_context spec_context.accepted_token_ids = tensor spec_context.accepted_seq_ids = seq_ids def get_accepted_token_ids(): return spec_context.accepted_token_ids, spec_context.accepted_seq_ids