Unverified Commit d334dd26 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Move decode context parallel validationn to `ParallelConfig` (#33239)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 070c811d
...@@ -348,6 +348,17 @@ class ParallelConfig: ...@@ -348,6 +348,17 @@ class ParallelConfig:
"num_redundant_experts." "num_redundant_experts."
) )
# Note(hc): In the current implementation of decode context
# parallel(DCP), tp_size needs to be divisible by dcp_size,
# because the world size does not change by dcp, it simply
# reuses the GPUs of TP group, and split one TP group into
# tp_size//dcp_size DCP groups.
if self.tensor_parallel_size % self.decode_context_parallel_size != 0:
raise ValueError(
f"tp_size={self.tensor_parallel_size} must be divisible by"
f"dcp_size={self.decode_context_parallel_size}."
)
return self return self
@property @property
......
...@@ -1389,16 +1389,6 @@ class EngineArgs: ...@@ -1389,16 +1389,6 @@ class EngineArgs:
# global layers in interleaved sliding window models. # global layers in interleaved sliding window models.
sliding_window = model_config.get_sliding_window() sliding_window = model_config.get_sliding_window()
# Note(hc): In the current implementation of decode context
# parallel(DCP), tp_size needs to be divisible by dcp_size,
# because the world size does not change by dcp, it simply
# reuses the GPUs of TP group, and split one TP group into
# tp_size//dcp_size DCP groups.
assert self.tensor_parallel_size % self.decode_context_parallel_size == 0, (
f"tp_size={self.tensor_parallel_size} must be divisible by"
f"dcp_size={self.decode_context_parallel_size}."
)
# Resolve "auto" kv_cache_dtype to actual value from model config # Resolve "auto" kv_cache_dtype to actual value from model config
resolved_cache_dtype = resolve_kv_cache_dtype_string( resolved_cache_dtype = resolve_kv_cache_dtype_string(
self.kv_cache_dtype, model_config self.kv_cache_dtype, model_config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment