"docs/vscode:/vscode.git/clone" did not exist on "9fc661f8b325142323b1925109ccf87ebb5904f2"
Unverified Commit 62d3272e authored by kouml's avatar kouml Committed by GitHub
Browse files

removed session_params from deepspeed_constants.py (#162)

* remove session_params in deepspeed_constants.py

* add constants info at README.md
parent 1496247a
...@@ -278,8 +278,9 @@ the `step` value is stored as part of the `client_sd`. ...@@ -278,8 +278,9 @@ the `step` value is stored as part of the `client_sd`.
## DeepSpeed Configuration ## DeepSpeed Configuration
DeepSpeed features can be enabled, disabled, or configured using a config JSON DeepSpeed features can be enabled, disabled, or configured using a config JSON
file that should be specified as `args.deepspeed_config`. A sample config file file that should be specified as `args.deepspeed_config`. Available configs are at
is shown below. For a full set of features see [core API [deepspeed/pt/deepspeed_constants.py](deepspeed/pt/deepspeed_constants.py).
A sample config file is shown below. For a full set of features see [core API
doc](https://deepspeed.readthedocs.io/en/latest/). doc](https://deepspeed.readthedocs.io/en/latest/).
```json ```json
......
...@@ -46,12 +46,10 @@ STEPS_PER_PRINT_DEFAULT = 10 ...@@ -46,12 +46,10 @@ STEPS_PER_PRINT_DEFAULT = 10
# Batch size for one training step. This is used when the # Batch size for one training step. This is used when the
# TRAIN_BATCH_SIZE cannot fit in GPU memory to determine # TRAIN_BATCH_SIZE cannot fit in GPU memory to determine
# the number of gradient accumulation steps. By default, this # the number of gradient accumulation steps. By default, this
# is set to None. Users can configure in params.json as below example: # is set to None. Users can configure in ds_config.json as below example:
TRAIN_MICRO_BATCH_SIZE_PER_GPU = ''' TRAIN_MICRO_BATCH_SIZE_PER_GPU = '''
TRAIN_MICRO_BATCH_SIZE_PER_GPU is defined in this format: TRAIN_MICRO_BATCH_SIZE_PER_GPU is defined in this format:
"session_params": { "train_micro_batch_size_per_gpu": 1
"train_micro_batch_size_per_gpu": 1
}
''' '''
TRAIN_MICRO_BATCH_SIZE_PER_GPU = "train_micro_batch_size_per_gpu" TRAIN_MICRO_BATCH_SIZE_PER_GPU = "train_micro_batch_size_per_gpu"
TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None
...@@ -60,12 +58,10 @@ TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None ...@@ -60,12 +58,10 @@ TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None
# Gradient Accumulation # Gradient Accumulation
######################################### #########################################
# Gradient accumulation feature. By default, this feature is not enabled. # Gradient accumulation feature. By default, this feature is not enabled.
# Users have to configure in params.json in section "session_params" as below example: # Users can configure in ds_config.json as below example:
GRADIENT_ACCUMULATION_FORMAT = ''' GRADIENT_ACCUMULATION_FORMAT = '''
Gradient Accumulation should be of the format: Gradient Accumulation should be of the format:
"session_params": { "gradient_accumulation_steps": 1
"gradient_accumulation_steps": 1
}
''' '''
GRADIENT_ACCUMULATION_STEPS = "gradient_accumulation_steps" GRADIENT_ACCUMULATION_STEPS = "gradient_accumulation_steps"
GRADIENT_ACCUMULATION_STEPS_DEFAULT = None GRADIENT_ACCUMULATION_STEPS_DEFAULT = None
...@@ -78,18 +74,16 @@ SPARSE_GRADIENTS_DEFAULT = False ...@@ -78,18 +74,16 @@ SPARSE_GRADIENTS_DEFAULT = False
# FP16 support # FP16 support
######################################### #########################################
# FP16 feature. By default, this feature is not enabled. # FP16 feature. By default, this feature is not enabled.
# Users have to configure in params.json in section "session_params" as below example: # Users can configure in ds_config.json as below example:
FP16_FORMAT = ''' FP16_FORMAT = '''
FP16 parameters should be of the format: FP16 parameters should be of the format:
"session_params": { "fp16": {
"fp16": {
"enabled": true, "enabled": true,
"loss_scale": 0, "loss_scale": 0,
"initial_scale_power": 32, "initial_scale_power": 32,
"loss_scale_window": 1000, "loss_scale_window": 1000,
"hysteresis": 2, "hysteresis": 2,
"min_loss_scale": 1 "min_loss_scale": 1
}
} }
''' '''
FP16 = "fp16" FP16 = "fp16"
...@@ -121,12 +115,10 @@ FP16_MIN_LOSS_SCALE_DEFAULT = 1 ...@@ -121,12 +115,10 @@ FP16_MIN_LOSS_SCALE_DEFAULT = 1
# Gradient clipping # Gradient clipping
######################################### #########################################
# Gradient clipping. By default, this feature is not enabled. # Gradient clipping. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
GRADIENT_CLIPPING_FORMAT = ''' GRADIENT_CLIPPING_FORMAT = '''
Dump state should be enabled as: Dump state should be enabled as:
"session_params": { "gradient_clipping": 1.0
"gradient_clipping": 1.0
}
''' '''
GRADIENT_CLIPPING = 'gradient_clipping' GRADIENT_CLIPPING = 'gradient_clipping'
GRADIENT_CLIPPING_DEFAULT = 0. GRADIENT_CLIPPING_DEFAULT = 0.
...@@ -135,13 +127,11 @@ GRADIENT_CLIPPING_DEFAULT = 0. ...@@ -135,13 +127,11 @@ GRADIENT_CLIPPING_DEFAULT = 0.
# ZeRO optimization # ZeRO optimization
######################################### #########################################
# ZeRO optimization. By default, this optimization is not enabled. # ZeRO optimization. By default, this optimization is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
ZERO_FORMAT = ''' ZERO_FORMAT = '''
ZeRO optimization should be enabled as: ZeRO optimization should be enabled as:
"session_params": { "zero_optimization": true,
"zero_optimization": true, "zero_all_gather_size": 200
"zero_all_gather_size": 200
}
''' '''
ZERO_OPTIMIZATION = 'zero_optimization' ZERO_OPTIMIZATION = 'zero_optimization'
ZERO_OPTIMIZATION_DEFAULT = False ZERO_OPTIMIZATION_DEFAULT = False
...@@ -153,12 +143,10 @@ ALLGATHER_SIZE_DEFAULT = 500000000 ...@@ -153,12 +143,10 @@ ALLGATHER_SIZE_DEFAULT = 500000000
# FP32 AllReduce # FP32 AllReduce
######################################### #########################################
# FP32 All reduce. By default, this feature is not enabled. # FP32 All reduce. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
FP32_ALLREDUCE_FORMAT = ''' FP32_ALLREDUCE_FORMAT = '''
FP32 Allreduce should be enabled as: FP32 Allreduce should be enabled as:
"session_params": { "fp32_allreduce": true
"fp32_allreduce": true
}
''' '''
FP32_ALLREDUCE = "fp32_allreduce" FP32_ALLREDUCE = "fp32_allreduce"
FP32_ALLREDUCE_DEFAULT = False FP32_ALLREDUCE_DEFAULT = False
...@@ -167,12 +155,10 @@ FP32_ALLREDUCE_DEFAULT = False ...@@ -167,12 +155,10 @@ FP32_ALLREDUCE_DEFAULT = False
# Scale gradients before allreduce # Scale gradients before allreduce
######################################### #########################################
# Prescale gradients. By default, this feature is not enabled. # Prescale gradients. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
PRESCALE_GRADIENTS_FORMAT = ''' PRESCALE_GRADIENTS_FORMAT = '''
Gradient prescaling should be enabled as: Gradient prescaling should be enabled as:
"session_params": { "prescale_gradients": true
"prescale_gradients": true
}
''' '''
PRESCALE_GRADIENTS = "prescale_gradients" PRESCALE_GRADIENTS = "prescale_gradients"
PRESCALE_GRADIENTS_DEFAULT = False PRESCALE_GRADIENTS_DEFAULT = False
...@@ -181,12 +167,10 @@ PRESCALE_GRADIENTS_DEFAULT = False ...@@ -181,12 +167,10 @@ PRESCALE_GRADIENTS_DEFAULT = False
# Disable AllGather # Disable AllGather
######################################### #########################################
# Disable AllGather. By default, this feature is not enabled. # Disable AllGather. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
DISABLE_ALLGATHER_FORMAT = ''' DISABLE_ALLGATHER_FORMAT = '''
Disable AllGather should be enabled as: Disable AllGather should be enabled as:
"session_params": { "disable_allgather": true
"disable_allgather": true
}
''' '''
DISABLE_ALLGATHER = "disable_allgather" DISABLE_ALLGATHER = "disable_allgather"
DISABLE_ALLGATHER_DEFAULT = False DISABLE_ALLGATHER_DEFAULT = False
...@@ -195,12 +179,10 @@ DISABLE_ALLGATHER_DEFAULT = False ...@@ -195,12 +179,10 @@ DISABLE_ALLGATHER_DEFAULT = False
# Dump DeepSpeed state # Dump DeepSpeed state
######################################### #########################################
# Dump State. By default, this feature is not enabled. # Dump State. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
DUMP_STATE_FORMAT = ''' DUMP_STATE_FORMAT = '''
Dump state should be enabled as: Dump state should be enabled as:
"session_params": { "dump_state": true
"dump_state": true
}
''' '''
DUMP_STATE = 'dump_state' DUMP_STATE = 'dump_state'
DUMP_STATE_DEFAULT = False DUMP_STATE_DEFAULT = False
...@@ -209,12 +191,10 @@ DUMP_STATE_DEFAULT = False ...@@ -209,12 +191,10 @@ DUMP_STATE_DEFAULT = False
# Vocabulary size # Vocabulary size
######################################### #########################################
# Vocabulary size. # Vocabulary size.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
VOCABULARY_SIZE_FORMAT = ''' VOCABULARY_SIZE_FORMAT = '''
Vocabulary size can be specified as: Vocabulary size can be specified as:
"session_params": { "vocabulary_size": 1024
"vocabulary_size": 1024
}
''' '''
VOCABULARY_SIZE = 'vocabulary_size' VOCABULARY_SIZE = 'vocabulary_size'
VOCABULARY_SIZE_DEFAULT = None VOCABULARY_SIZE_DEFAULT = None
...@@ -223,12 +203,10 @@ VOCABULARY_SIZE_DEFAULT = None ...@@ -223,12 +203,10 @@ VOCABULARY_SIZE_DEFAULT = None
# Wall block breakdown # Wall block breakdown
######################################### #########################################
# Wall clock breakdown. By default, this feature is not enabled. # Wall clock breakdown. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
WALL_CLOCK_BREAKDOWN_FORMAT = ''' WALL_CLOCK_BREAKDOWN_FORMAT = '''
Wall block breakdown should be enabled as: Wall block breakdown should be enabled as:
"session_params": { "wall_clock_breakdown": true
"wall_clock_breakdown": true
}
''' '''
WALL_CLOCK_BREAKDOWN = 'wall_clock_breakdown' WALL_CLOCK_BREAKDOWN = 'wall_clock_breakdown'
WALL_CLOCK_BREAKDOWN_DEFAULT = False WALL_CLOCK_BREAKDOWN_DEFAULT = False
...@@ -237,15 +215,13 @@ WALL_CLOCK_BREAKDOWN_DEFAULT = False ...@@ -237,15 +215,13 @@ WALL_CLOCK_BREAKDOWN_DEFAULT = False
# Tensorboard # Tensorboard
######################################### #########################################
# Tensorboard. By default, this feature is not enabled. # Tensorboard. By default, this feature is not enabled.
# Users have to configure params.json as below example: # Users can configure in ds_config.json as below example:
TENSORBOARD_FORMAT = ''' TENSORBOARD_FORMAT = '''
Tensorboard can be specified as: Tensorboard can be specified as:
"session_params": { "tensorboard": {
"tensorboard": {
"enabled": true, "enabled": true,
"output_path": "/home/myname/foo", "output_path": "/home/myname/foo",
"job_name": "model_lr2e-5_epoch3_seed2_seq64" "job_name": "model_lr2e-5_epoch3_seed2_seq64"
}
} }
''' '''
TENSORBOARD = "tensorboard" TENSORBOARD = "tensorboard"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment