"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "66394bf6c798a93a1e9536dac4999f77b690174c"
Unverified Commit 6f28ea30 authored by Haibin Lin's avatar Haibin Lin Committed by GitHub
Browse files

fix typos (#446)

parent 5412a334
...@@ -14,7 +14,7 @@ def init_process_groups(grid): ...@@ -14,7 +14,7 @@ def init_process_groups(grid):
global _groups, _grid global _groups, _grid
_grid = grid _grid = grid
assert _grid.pipe_parallel_size > 1, "There is no model parallelism" assert _grid.pipe_parallel_size > 1, "There is no pipeline parallelism"
_groups = [dist.new_group(ranks=group) for group in _grid.p2p_groups] _groups = [dist.new_group(ranks=group) for group in _grid.p2p_groups]
......
...@@ -260,7 +260,7 @@ class FP16_DeepSpeedZeroOptimizer(object): ...@@ -260,7 +260,7 @@ class FP16_DeepSpeedZeroOptimizer(object):
for p, q in zip(self.fp16_groups[i], updated_params): for p, q in zip(self.fp16_groups[i], updated_params):
p.data = q.data p.data = q.data
#divide the flat weights into near equal paritition equal to the data parallel degree #divide the flat weights into near equal partition equal to the data parallel degree
#each process will compute on a different part of the partition #each process will compute on a different part of the partition
data_parallel_partitions = self.get_data_parallel_partitions( data_parallel_partitions = self.get_data_parallel_partitions(
self.fp16_groups_flat[i]) self.fp16_groups_flat[i])
...@@ -367,10 +367,10 @@ class FP16_DeepSpeedZeroOptimizer(object): ...@@ -367,10 +367,10 @@ class FP16_DeepSpeedZeroOptimizer(object):
#stores the offset at which a parameter gradient needs to be inserted in a partition #stores the offset at which a parameter gradient needs to be inserted in a partition
self.grad_partition_insertion_offset = {} self.grad_partition_insertion_offset = {}
#the offset in the gradient at which it must be inserted at the beginning of the paritition #the offset in the gradient at which it must be inserted at the beginning of the partition
self.grad_start_offset = {} self.grad_start_offset = {}
#will store the averaged gradients required by this parititon #will store the averaged gradients required by this partition
self.averaged_gradients = {} self.averaged_gradients = {}
# store index of first parameter in each partition # store index of first parameter in each partition
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment