"cacheflow/model_executor/model_loader.py" did not exist on "e9d3f2ff7772c8efe41dc805cec71c223ec18ec8"
Commit 4a7d8ab8 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.8.4-dev-wm' into 'v0.8.4-dev'

[fix]修复多卡eager模式精度问题

See merge request dcutoolkit/deeplearing/vllm!102
parents f9a784a7 9f29bc2d
...@@ -1028,8 +1028,11 @@ def current_stream() -> torch.cuda.Stream: ...@@ -1028,8 +1028,11 @@ def current_stream() -> torch.cuda.Stream:
# On ROCm using the default 0 stream in combination with RCCL # On ROCm using the default 0 stream in combination with RCCL
# is hurting performance. Therefore creating a dedicated stream # is hurting performance. Therefore creating a dedicated stream
# per process # per process
_current_stream = torch.cuda.Stream() if current_platform.is_rocm(
) else torch.cuda.current_stream() # fix computational precision issue in eager mode
# _current_stream = torch.cuda.Stream() if current_platform.is_rocm(
# ) else torch.cuda.current_stream()
_current_stream = torch.cuda.current_stream()
return _current_stream return _current_stream
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment