Unverified Commit 7e0ec5a8 authored by Edenzzzz's avatar Edenzzzz Committed by GitHub
Browse files

fix incorrect sharding without zero (#5545)


Co-authored-by: default avatarEdenzzzz <wtan45@wisc.edu>
parent e614aa34
...@@ -74,8 +74,10 @@ class ShardConfig: ...@@ -74,8 +74,10 @@ class ShardConfig:
self.enable_fused_normalization = True self.enable_fused_normalization = True
self.enable_flash_attention = True self.enable_flash_attention = True
self.enable_jit_fused = True self.enable_jit_fused = True
self.enable_sequence_parallelism = True # This can cause non-in-place param sharding when used without ZeRO.
self.enable_sequence_overlap = True # It may also slow down training when seq len is small. Plz enable manually.
# self.enable_sequence_parallelism = True
# self.enable_sequence_overlap = True
def _infer(self): def _infer(self):
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment