Unverified Commit 93e53f6e authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Logging and minor fixes to two batch overlap and EPLB (#6595)

parent a191a0e4
...@@ -152,8 +152,10 @@ class ExpertLocationMetadata: ...@@ -152,8 +152,10 @@ class ExpertLocationMetadata:
return ExpertLocationMetadata._init_raw( return ExpertLocationMetadata._init_raw(
ep_size=common["ep_size"], ep_size=common["ep_size"],
physical_to_logical_map=physical_to_logical_map, physical_to_logical_map=physical_to_logical_map.to(server_args.device),
logical_to_all_physical_map=logical_to_all_physical_map, logical_to_all_physical_map=logical_to_all_physical_map.to(
server_args.device
),
) )
@staticmethod @staticmethod
......
import dataclasses import dataclasses
import logging
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence from typing import TYPE_CHECKING, Dict, List, Optional, Sequence
import torch import torch
...@@ -11,11 +12,15 @@ from sglang.srt.managers.schedule_batch import global_server_args_dict ...@@ -11,11 +12,15 @@ from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.operations import execute_operations, execute_overlapped_operations from sglang.srt.operations import execute_operations, execute_overlapped_operations
from sglang.srt.operations_strategy import OperationsStrategy from sglang.srt.operations_strategy import OperationsStrategy
from sglang.srt.utils import BumpAllocator, DeepEPMode from sglang.srt.utils import BumpAllocator, DeepEPMode, get_bool_env_var
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
_tbo_debug = get_bool_env_var("SGLANG_TBO_DEBUG")
logger = logging.getLogger(__name__)
# -------------------------------- Compute Basic Info --------------------------------------- # -------------------------------- Compute Basic Info ---------------------------------------
...@@ -178,6 +183,14 @@ class TboForwardBatchPreparer: ...@@ -178,6 +183,14 @@ class TboForwardBatchPreparer:
extend_seq_lens=batch.extend_seq_lens_cpu, extend_seq_lens=batch.extend_seq_lens_cpu,
) )
if _tbo_debug:
logger.info(
f"TboForwardBatchPreparer.prepare "
f"tbo_split_seq_index={batch.tbo_split_seq_index} "
f"tbo_split_token_index={tbo_split_token_index} "
f"extend_seq_lens={batch.extend_seq_lens_cpu}"
)
assert isinstance(batch.attn_backend, TboAttnBackend) assert isinstance(batch.attn_backend, TboAttnBackend)
attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children
......
...@@ -44,6 +44,7 @@ from functools import lru_cache ...@@ -44,6 +44,7 @@ from functools import lru_cache
from importlib.metadata import PackageNotFoundError, version from importlib.metadata import PackageNotFoundError, version
from importlib.util import find_spec from importlib.util import find_spec
from io import BytesIO from io import BytesIO
from json import JSONDecodeError
from multiprocessing.reduction import ForkingPickler from multiprocessing.reduction import ForkingPickler
from pathlib import Path from pathlib import Path
from typing import ( from typing import (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment