Unverified Commit e2d81eba authored by digger yu's avatar digger yu Committed by GitHub
Browse files

[nfc] fix typo colossalai/ applications/ (#3831)

* fix typo colossalai/autochunk auto_parallel amp

* fix typo colossalai/auto_parallel nn utils etc.

* fix typo colossalai/auto_parallel autochunk fx/passes  etc.

* fix typo docs/

* change placememt_policy to placement_policy in docs/ and examples/

* fix typo colossalai/ applications/
parent a64df3fa
...@@ -34,7 +34,7 @@ class DetachedReplayBuffer: ...@@ -34,7 +34,7 @@ class DetachedReplayBuffer:
''' '''
Workers in the same tp group share this buffer and need same sample for one step. Workers in the same tp group share this buffer and need same sample for one step.
Therefore a held_sample should be returned tp_world_size times before it could be dropped. Therefore a held_sample should be returned tp_world_size times before it could be dropped.
worker_state records wheter a worker got the held_sample worker_state records whether a worker got the held_sample
''' '''
self.tp_world_size = tp_world_size self.tp_world_size = tp_world_size
self.worker_state = [False] * self.tp_world_size self.worker_state = [False] * self.tp_world_size
......
...@@ -22,7 +22,7 @@ from .utils import is_rank_0, get_strategy_from_args, set_dist_env ...@@ -22,7 +22,7 @@ from .utils import is_rank_0, get_strategy_from_args, set_dist_env
class ExperienceMakerHolder: class ExperienceMakerHolder:
''' '''
Args: Args:
detached_trainer_name_list: str list to get ray actor handleskkk detached_trainer_name_list: str list to get ray actor handles
strategy: strategy:
experience_batch_size: batch size of generated experience experience_batch_size: batch size of generated experience
kl_coef: the coefficient of kl divergence loss kl_coef: the coefficient of kl divergence loss
......
...@@ -26,7 +26,7 @@ rpc_is_initialized = _is_current_rpc_agent_set ...@@ -26,7 +26,7 @@ rpc_is_initialized = _is_current_rpc_agent_set
class PipelineModel(torch.nn.Module): class PipelineModel(torch.nn.Module):
''' '''
Actor has 2 kinds of jobs: forward and generate. Actor has 2 kinds of jobs: forward and generate.
better to just pipelinize the inner model better to just pipeline the inner model
''' '''
def __init__(self, def __init__(self,
model: torch.nn.Module, model: torch.nn.Module,
......
...@@ -119,7 +119,7 @@ class Evaluator(object): ...@@ -119,7 +119,7 @@ class Evaluator(object):
jdump(all_evaluations, jdump(all_evaluations,
os.path.join(evaluation_results_save_path, f"{model_name_list[0]}_evaluation_results.json")) os.path.join(evaluation_results_save_path, f"{model_name_list[0]}_evaluation_results.json"))
# Start to calculate scores and save statictics. # Start to calculate scores and save statistics.
evaluation_statistics_save_path = os.path.join(base_save_path, "evaluation_statistics") evaluation_statistics_save_path = os.path.join(base_save_path, "evaluation_statistics")
gpt_evaluate.save_gpt35_evaluation_statistics(model_name_list[0], all_evaluations, gpt_evaluate.save_gpt35_evaluation_statistics(model_name_list[0], all_evaluations,
evaluation_statistics_save_path) evaluation_statistics_save_path)
......
...@@ -111,7 +111,7 @@ def calculate_precision_recall_f1(preds: list, targets: list) -> dict: ...@@ -111,7 +111,7 @@ def calculate_precision_recall_f1(preds: list, targets: list) -> dict:
The calculation of precision, recall and f1-score is realized by counting The calculation of precision, recall and f1-score is realized by counting
the number f overlaps between the preds and target. The comparison length the number f overlaps between the preds and target. The comparison length
limited by the shorter one of preds and targets. This design is mainly limited by the shorter one of preds and targets. This design is mainly
considered for classifiction and extraction categories. considered for classification and extraction categories.
""" """
precision_recall_f1 = {"precision": 0, "recall": 0, "f1_score": 0} precision_recall_f1 = {"precision": 0, "recall": 0, "f1_score": 0}
precision_scores = [] precision_scores = []
...@@ -138,7 +138,7 @@ def calculate_precision_recall_f1(preds: list, targets: list) -> dict: ...@@ -138,7 +138,7 @@ def calculate_precision_recall_f1(preds: list, targets: list) -> dict:
def precision(preds: list, targets: list) -> dict: def precision(preds: list, targets: list) -> dict:
"""Calculate Precision Metric """Calculate Precision Metric
(design for classifiction and extraction categories) (design for classification and extraction categories)
Calculating precision by counting the number of overlaps between the preds and target. Calculating precision by counting the number of overlaps between the preds and target.
""" """
...@@ -149,7 +149,7 @@ def precision(preds: list, targets: list) -> dict: ...@@ -149,7 +149,7 @@ def precision(preds: list, targets: list) -> dict:
def recall(preds: list, targets: list) -> dict: def recall(preds: list, targets: list) -> dict:
"""Calculate Recall Metric """Calculate Recall Metric
(design for classifiction and extraction categories) (design for classification and extraction categories)
Calculating recall by counting the number of overlaps between the preds and target. Calculating recall by counting the number of overlaps between the preds and target.
""" """
...@@ -160,7 +160,7 @@ def recall(preds: list, targets: list) -> dict: ...@@ -160,7 +160,7 @@ def recall(preds: list, targets: list) -> dict:
def F1_score(preds: list, targets: list) -> dict: def F1_score(preds: list, targets: list) -> dict:
"""Calculate F1-score Metric """Calculate F1-score Metric
(design for classifiction and extraction categories) (design for classification and extraction categories)
Calculating f1-score by counting the number of overlaps between the preds and target. Calculating f1-score by counting the number of overlaps between the preds and target.
""" """
......
...@@ -206,7 +206,7 @@ class Broadcaster(BmmTransform): ...@@ -206,7 +206,7 @@ class Broadcaster(BmmTransform):
# e.g. [1, 2, 4] x [4, 4, 8] -> [4, 2, 8] # e.g. [1, 2, 4] x [4, 4, 8] -> [4, 2, 8]
# the dim 0 of [1, 2, 4] is multiplied to 4 # the dim 0 of [1, 2, 4] is multiplied to 4
tensor_shape[dim_idx] = 1 tensor_shape[dim_idx] = 1
elif broadcast_type == BroadcastType.PADDDING: elif broadcast_type == BroadcastType.PADDING:
# if the dim is padded # if the dim is padded
# we remove its sharding # we remove its sharding
tensor_shape[dim_idx] = None tensor_shape[dim_idx] = None
......
...@@ -21,7 +21,7 @@ __all__ = [ ...@@ -21,7 +21,7 @@ __all__ = [
class BroadcastType(Enum): class BroadcastType(Enum):
EQUAL = auto() EQUAL = auto()
PADDDING = auto() PADDING = auto()
MULTIPLE = auto() MULTIPLE = auto()
...@@ -69,18 +69,18 @@ def get_broadcast_dim_info(logical_shape, physical_shape): ...@@ -69,18 +69,18 @@ def get_broadcast_dim_info(logical_shape, physical_shape):
for i in range(logical_num_dims): for i in range(logical_num_dims):
# get the trailing dim size # get the trailing dim size
logical_dim_idx = logical_num_dims - i - 1 logical_dim_idx = logical_num_dims - i - 1
phyiscal_dim_idx = physical_num_dims - i - 1 physical_dim_idx = physical_num_dims - i - 1
logical_dim_size = logical_shape[logical_dim_idx] logical_dim_size = logical_shape[logical_dim_idx]
if phyiscal_dim_idx >= 0: if physical_dim_idx >= 0:
physical_dim_size = physical_shape[phyiscal_dim_idx] physical_dim_size = physical_shape[physical_dim_idx]
if physical_dim_size == logical_dim_size: if physical_dim_size == logical_dim_size:
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.EQUAL logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.EQUAL
elif physical_dim_size == 1 and physical_dim_size != logical_dim_size: elif physical_dim_size == 1 and physical_dim_size != logical_dim_size:
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.MULTIPLE logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.MULTIPLE
else: else:
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.PADDDING logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.PADDING
return logical_dim_broadcast_info return logical_dim_broadcast_info
...@@ -117,7 +117,7 @@ def recover_sharding_spec_for_broadcast_shape(logical_sharding_spec: ShardingSpe ...@@ -117,7 +117,7 @@ def recover_sharding_spec_for_broadcast_shape(logical_sharding_spec: ShardingSpe
for shape_dim, mesh_dim in logical_dim_partition.items(): for shape_dim, mesh_dim in logical_dim_partition.items():
logical_broadcast_type = logical_dim_broadcast_info[shape_dim] logical_broadcast_type = logical_dim_broadcast_info[shape_dim]
if logical_broadcast_type == BroadcastType.PADDDING or logical_broadcast_type == BroadcastType.MULTIPLE: if logical_broadcast_type == BroadcastType.PADDING or logical_broadcast_type == BroadcastType.MULTIPLE:
removed_dims.extend(mesh_dim) removed_dims.extend(mesh_dim)
else: else:
# get the corresponding physical dim # get the corresponding physical dim
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment