Unverified Commit 6317c5c6 authored by HAI's avatar HAI Committed by GitHub
Browse files

Address performance regression: disable multiple streams on ROCm (#6412)

parent cba1cdbc
...@@ -1315,7 +1315,8 @@ class DeepseekV2Model(nn.Module): ...@@ -1315,7 +1315,8 @@ class DeepseekV2Model(nn.Module):
config.hidden_size, config.hidden_size,
enable_tp=not global_server_args_dict["enable_dp_attention"], enable_tp=not global_server_args_dict["enable_dp_attention"],
) )
self.alt_stream = torch.cuda.Stream() # TODO(haishaw): multi-stream performance on ROCm
self.alt_stream = None if _is_hip else torch.cuda.Stream()
self.layers = nn.ModuleList( self.layers = nn.ModuleList(
[ [
DeepseekV2DecoderLayer( DeepseekV2DecoderLayer(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment