Unverified Commit de28f8e7 authored by Kevin Xiang Li's avatar Kevin Xiang Li Committed by GitHub
Browse files

vlm: remove redundant d2h movement of mm feature tensors (#9987)


Co-authored-by: default avatarXiang (Kevin) Li <lik@nvidia.com>
parent 56405076
...@@ -241,12 +241,13 @@ class BaseMultimodalProcessor(ABC): ...@@ -241,12 +241,13 @@ class BaseMultimodalProcessor(ABC):
return_tensors="pt", return_tensors="pt",
**kwargs, **kwargs,
) )
# move feature tensors to cpu if not self.server_args.keep_mm_feature_on_device:
for feature_name in self.FEATURE_NAMES: # move feature tensors to cpu
if feature_name in result and isinstance( for feature_name in self.FEATURE_NAMES:
result[feature_name], torch.Tensor if feature_name in result and isinstance(
): result[feature_name], torch.Tensor
result[feature_name] = result[feature_name].to("cpu") ):
result[feature_name] = result[feature_name].to("cpu")
return result return result
......
...@@ -381,6 +381,7 @@ class ServerArgs: ...@@ -381,6 +381,7 @@ class ServerArgs:
disable_shared_experts_fusion: bool = False disable_shared_experts_fusion: bool = False
disable_chunked_prefix_cache: bool = False disable_chunked_prefix_cache: bool = False
disable_fast_image_processor: bool = False disable_fast_image_processor: bool = False
keep_mm_feature_on_device: bool = False
enable_return_hidden_states: bool = False enable_return_hidden_states: bool = False
scheduler_recv_interval: int = 1 scheduler_recv_interval: int = 1
numa_node: Optional[List[int]] = None numa_node: Optional[List[int]] = None
...@@ -2213,6 +2214,11 @@ class ServerArgs: ...@@ -2213,6 +2214,11 @@ class ServerArgs:
action="store_true", action="store_true",
help="Adopt base image processor instead of fast image processor.", help="Adopt base image processor instead of fast image processor.",
) )
parser.add_argument(
"--keep-mm-feature-on-device",
action="store_true",
help="Keep multimodal feature tensors on device after processing to save D2H copy.",
)
parser.add_argument( parser.add_argument(
"--enable-return-hidden-states", "--enable-return-hidden-states",
action="store_true", action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment