"projects/GLM/tokenizer/glm_tokenizer.py" did not exist on "478602ba59c0bfe7ab9a094b9f1b7b33cfeecba4"
Commit 99a0c39e authored by xingjinliang's avatar xingjinliang
Browse files

同步最新代码

parent 50fe58fa
Pipeline #2152 passed with stage
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
......@@ -270,13 +270,12 @@ class _ParamAndGradBucketGroup:
if self.ddp_config.average_in_collective:
reduce_op = torch.distributed.ReduceOp.AVG
# Stream synchronization logic of the CUDA streams that is
# implemented below for the gradient reduction within and across
# distributed optimizer instances.
# We use the following stream synchronization for the gradient reduction
# within and across DistOpt instances.
# Compute Stream - -------------Gradient Compute-------------------
# Comm. Stream - ------(wait for nccl)-----(wait for nccl)-------
# NCCL Stream - -------RS------ -------AR------
# Compute Stream: -------------Gradient compute-------------------
# Comm. Stream: ------(wait for NCCL)-----(wait for NCCL)-------
# NCCL Stream: -------RS------ -------AR------
# Use async communications only when overlap_grad_reduce is True.
async_op = (
......@@ -287,13 +286,13 @@ class _ParamAndGradBucketGroup:
self.ddp_config.num_distributed_optimizer_instances > 1
and self.ddp_config.overlap_grad_reduce
):
# Assign a communication stream if we use partial DP DistOpt and we
# need to overlap communication
# Assign a communication stream if we have multiple DistOpt instances and we
# need to overlap communication.
stream_context = torch.cuda.stream(self.communication_stream)
# The RS/AR communication stream needs to wait for the default stream
# to complete its gradient computation before launching the next
# gradient reduction collective
# gradient reduction collective.
self.communication_stream.wait_stream(torch.cuda.default_stream())
else:
stream_context = nullcontext()
......@@ -314,24 +313,21 @@ class _ParamAndGradBucketGroup:
local_data_view,
bucket.grad_data,
op=reduce_op,
group=self.intra_distributed_optimizer_instance_group,
group=communication_group,
async_op=async_op,
)
else:
torch.distributed.all_reduce(
bucket.grad_data,
op=reduce_op,
group=self.data_parallel_group,
async_op=async_op,
bucket.grad_data, op=reduce_op, group=communication_group, async_op=async_op
)
# When enabling partial DP domain DistOpt, we need to All-Reduce across all partial domains
# With multiple DistOpt instances, we need to all-reduce across instances.
if (
self.ddp_config.use_distributed_optimizer
and self.ddp_config.num_distributed_optimizer_instances > 1
):
# Create a new coalescing facility for the inter partial DP-AllReduce here
# Create a new coalescing manager for the inter-instance all-reduce.
with stream_context, _coalescing_manager(
self.inter_distributed_optimizer_instance_group, async_ops=async_op
) as cm:
......@@ -366,13 +362,13 @@ class _ParamAndGradBucketGroup:
communication call to complete. When ddp_config.overlap_grad_reduce is set to False,
makes synchronous call.
"""
# If overlap_grad_reduce is False, start (and finish) synchronous communication call here.
self.param_gather_dispatched = False
# If overlap_grad_reduce is False, start (and finish) synchronous communication call here.
if not self.ddp_config.overlap_grad_reduce:
self.start_grad_sync()
return
# When using partial DP DistOpt, we don't need to sync as we launch comms on a separate
# communication stream
# When using multiple DistOpt instances, we don't need to sync here as we launch
# communications on a separate communication stream.
if self.ddp_config.num_distributed_optimizer_instances > 1:
torch.cuda.default_stream().wait_stream(self.communication_stream)
return
......
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment