Unverified Commit de28f8e7 authored by Kevin Xiang Li's avatar Kevin Xiang Li Committed by GitHub
Browse files

vlm: remove redundant d2h movement of mm feature tensors (#9987)


Co-authored-by: default avatarXiang (Kevin) Li <lik@nvidia.com>
parent 56405076
......@@ -241,12 +241,13 @@ class BaseMultimodalProcessor(ABC):
return_tensors="pt",
**kwargs,
)
# move feature tensors to cpu
for feature_name in self.FEATURE_NAMES:
if feature_name in result and isinstance(
result[feature_name], torch.Tensor
):
result[feature_name] = result[feature_name].to("cpu")
if not self.server_args.keep_mm_feature_on_device:
# move feature tensors to cpu
for feature_name in self.FEATURE_NAMES:
if feature_name in result and isinstance(
result[feature_name], torch.Tensor
):
result[feature_name] = result[feature_name].to("cpu")
return result
......
......@@ -381,6 +381,7 @@ class ServerArgs:
disable_shared_experts_fusion: bool = False
disable_chunked_prefix_cache: bool = False
disable_fast_image_processor: bool = False
keep_mm_feature_on_device: bool = False
enable_return_hidden_states: bool = False
scheduler_recv_interval: int = 1
numa_node: Optional[List[int]] = None
......@@ -2213,6 +2214,11 @@ class ServerArgs:
action="store_true",
help="Adopt base image processor instead of fast image processor.",
)
parser.add_argument(
"--keep-mm-feature-on-device",
action="store_true",
help="Keep multimodal feature tensors on device after processing to save D2H copy.",
)
parser.add_argument(
"--enable-return-hidden-states",
action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment