Commit 472f8d5d authored by guanyu1's avatar guanyu1
Browse files

增加mrope的1d环境变量

parent 341b1b91
...@@ -396,6 +396,7 @@ class GPUModelRunner( ...@@ -396,6 +396,7 @@ class GPUModelRunner(
self.mm_registry = MULTIMODAL_REGISTRY self.mm_registry = MULTIMODAL_REGISTRY
self.uses_mrope = model_config.uses_mrope self.uses_mrope = model_config.uses_mrope
self.uses_xdrope_dim = model_config.uses_xdrope_dim self.uses_xdrope_dim = model_config.uses_xdrope_dim
self.use_1d_mrope = self.uses_mrope and envs.VLLM_1D_MROPE
self.supports_mm_inputs = self.mm_registry.supports_multimodal_inputs( self.supports_mm_inputs = self.mm_registry.supports_multimodal_inputs(
model_config model_config
) )
...@@ -610,10 +611,14 @@ class GPUModelRunner( ...@@ -610,10 +611,14 @@ class GPUModelRunner(
# identical position IDs, making M-RoPE functionally equivalent to # identical position IDs, making M-RoPE functionally equivalent to
# 1D-RoPE. # 1D-RoPE.
# See page 5 of https://arxiv.org/abs/2409.12191 # See page 5 of https://arxiv.org/abs/2409.12191
self.mrope_positions = self._make_buffer( if self.use_1d_mrope:
(3, self.max_num_tokens + 1), dtype=torch.int64 self.mrope_positions = self._make_buffer(
) 3 * (self.max_num_tokens + 1), dtype=torch.int64
)
else:
self.mrope_positions = self._make_buffer(
(3, self.max_num_tokens + 1), dtype=torch.int64
)
# Only relevant for models using XD-RoPE (e.g, HunYuan-VL) # Only relevant for models using XD-RoPE (e.g, HunYuan-VL)
if self.uses_xdrope_dim > 0: if self.uses_xdrope_dim > 0:
# Similar to mrope but use assigned dimension number for RoPE, 4 as default. # Similar to mrope but use assigned dimension number for RoPE, 4 as default.
...@@ -789,6 +794,7 @@ class GPUModelRunner( ...@@ -789,6 +794,7 @@ class GPUModelRunner(
pin_memory=self.pin_memory, pin_memory=self.pin_memory,
with_numpy=numpy, with_numpy=numpy,
) )
def _copy_mrope_positions_to_gpu(self, num_tokens: int) -> None: def _copy_mrope_positions_to_gpu(self, num_tokens: int) -> None:
if not self.uses_mrope: if not self.uses_mrope:
return return
...@@ -1616,6 +1622,7 @@ class GPUModelRunner( ...@@ -1616,6 +1622,7 @@ class GPUModelRunner(
total_num_scheduled_tokens, total_num_scheduled_tokens,
cu_num_tokens, cu_num_tokens,
) )
print(f'======================={total_num_scheduled_tokens=}')
if self.uses_mrope: if self.uses_mrope:
# Only relevant for models using M-RoPE (e.g, Qwen2-VL) # Only relevant for models using M-RoPE (e.g, Qwen2-VL)
self._copy_mrope_positions_to_gpu(total_num_scheduled_tokens) self._copy_mrope_positions_to_gpu(total_num_scheduled_tokens)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment