"docs/vscode:/vscode.git/clone" did not exist on "6f88bd9390a3edce1dfec025a526d6c2849effa4"
Unverified Commit 62d3272e authored by kouml's avatar kouml Committed by GitHub
Browse files

removed session_params from deepspeed_constants.py (#162)

* remove session_params in deepspeed_constants.py

* add constants info at README.md
parent 1496247a
......@@ -278,8 +278,9 @@ the `step` value is stored as part of the `client_sd`.
## DeepSpeed Configuration
DeepSpeed features can be enabled, disabled, or configured using a config JSON
file that should be specified as `args.deepspeed_config`. A sample config file
is shown below. For a full set of features see [core API
file that should be specified as `args.deepspeed_config`. Available configs are at
[deepspeed/pt/deepspeed_constants.py](deepspeed/pt/deepspeed_constants.py).
A sample config file is shown below. For a full set of features see [core API
doc](https://deepspeed.readthedocs.io/en/latest/).
```json
......
......@@ -46,12 +46,10 @@ STEPS_PER_PRINT_DEFAULT = 10
# Batch size for one training step. This is used when the
# TRAIN_BATCH_SIZE cannot fit in GPU memory to determine
# the number of gradient accumulation steps. By default, this
# is set to None. Users can configure in params.json as below example:
# is set to None. Users can configure in ds_config.json as below example:
TRAIN_MICRO_BATCH_SIZE_PER_GPU = '''
TRAIN_MICRO_BATCH_SIZE_PER_GPU is defined in this format:
"session_params": {
"train_micro_batch_size_per_gpu": 1
}
"train_micro_batch_size_per_gpu": 1
'''
TRAIN_MICRO_BATCH_SIZE_PER_GPU = "train_micro_batch_size_per_gpu"
TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None
......@@ -60,12 +58,10 @@ TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None
# Gradient Accumulation
#########################################
# Gradient accumulation feature. By default, this feature is not enabled.
# Users have to configure in params.json in section "session_params" as below example:
# Users can configure in ds_config.json as below example:
GRADIENT_ACCUMULATION_FORMAT = '''
Gradient Accumulation should be of the format:
"session_params": {
"gradient_accumulation_steps": 1
}
"gradient_accumulation_steps": 1
'''
GRADIENT_ACCUMULATION_STEPS = "gradient_accumulation_steps"
GRADIENT_ACCUMULATION_STEPS_DEFAULT = None
......@@ -78,18 +74,16 @@ SPARSE_GRADIENTS_DEFAULT = False
# FP16 support
#########################################
# FP16 feature. By default, this feature is not enabled.
# Users have to configure in params.json in section "session_params" as below example:
# Users can configure in ds_config.json as below example:
FP16_FORMAT = '''
FP16 parameters should be of the format:
"session_params": {
"fp16": {
"enabled": true,
"loss_scale": 0,
"initial_scale_power": 32,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
}
"fp16": {
"enabled": true,
"loss_scale": 0,
"initial_scale_power": 32,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
}
'''
FP16 = "fp16"
......@@ -121,12 +115,10 @@ FP16_MIN_LOSS_SCALE_DEFAULT = 1
# Gradient clipping
#########################################
# Gradient clipping. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
GRADIENT_CLIPPING_FORMAT = '''
Dump state should be enabled as:
"session_params": {
"gradient_clipping": 1.0
}
"gradient_clipping": 1.0
'''
GRADIENT_CLIPPING = 'gradient_clipping'
GRADIENT_CLIPPING_DEFAULT = 0.
......@@ -135,13 +127,11 @@ GRADIENT_CLIPPING_DEFAULT = 0.
# ZeRO optimization
#########################################
# ZeRO optimization. By default, this optimization is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
ZERO_FORMAT = '''
ZeRO optimization should be enabled as:
"session_params": {
"zero_optimization": true,
"zero_all_gather_size": 200
}
"zero_optimization": true,
"zero_all_gather_size": 200
'''
ZERO_OPTIMIZATION = 'zero_optimization'
ZERO_OPTIMIZATION_DEFAULT = False
......@@ -153,12 +143,10 @@ ALLGATHER_SIZE_DEFAULT = 500000000
# FP32 AllReduce
#########################################
# FP32 All reduce. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
FP32_ALLREDUCE_FORMAT = '''
FP32 Allreduce should be enabled as:
"session_params": {
"fp32_allreduce": true
}
"fp32_allreduce": true
'''
FP32_ALLREDUCE = "fp32_allreduce"
FP32_ALLREDUCE_DEFAULT = False
......@@ -167,12 +155,10 @@ FP32_ALLREDUCE_DEFAULT = False
# Scale gradients before allreduce
#########################################
# Prescale gradients. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
PRESCALE_GRADIENTS_FORMAT = '''
Gradient prescaling should be enabled as:
"session_params": {
"prescale_gradients": true
}
"prescale_gradients": true
'''
PRESCALE_GRADIENTS = "prescale_gradients"
PRESCALE_GRADIENTS_DEFAULT = False
......@@ -181,12 +167,10 @@ PRESCALE_GRADIENTS_DEFAULT = False
# Disable AllGather
#########################################
# Disable AllGather. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
DISABLE_ALLGATHER_FORMAT = '''
Disable AllGather should be enabled as:
"session_params": {
"disable_allgather": true
}
"disable_allgather": true
'''
DISABLE_ALLGATHER = "disable_allgather"
DISABLE_ALLGATHER_DEFAULT = False
......@@ -195,12 +179,10 @@ DISABLE_ALLGATHER_DEFAULT = False
# Dump DeepSpeed state
#########################################
# Dump State. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
DUMP_STATE_FORMAT = '''
Dump state should be enabled as:
"session_params": {
"dump_state": true
}
"dump_state": true
'''
DUMP_STATE = 'dump_state'
DUMP_STATE_DEFAULT = False
......@@ -209,12 +191,10 @@ DUMP_STATE_DEFAULT = False
# Vocabulary size
#########################################
# Vocabulary size.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
VOCABULARY_SIZE_FORMAT = '''
Vocabulary size can be specified as:
"session_params": {
"vocabulary_size": 1024
}
"vocabulary_size": 1024
'''
VOCABULARY_SIZE = 'vocabulary_size'
VOCABULARY_SIZE_DEFAULT = None
......@@ -223,12 +203,10 @@ VOCABULARY_SIZE_DEFAULT = None
# Wall block breakdown
#########################################
# Wall clock breakdown. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
WALL_CLOCK_BREAKDOWN_FORMAT = '''
Wall block breakdown should be enabled as:
"session_params": {
"wall_clock_breakdown": true
}
"wall_clock_breakdown": true
'''
WALL_CLOCK_BREAKDOWN = 'wall_clock_breakdown'
WALL_CLOCK_BREAKDOWN_DEFAULT = False
......@@ -237,15 +215,13 @@ WALL_CLOCK_BREAKDOWN_DEFAULT = False
# Tensorboard
#########################################
# Tensorboard. By default, this feature is not enabled.
# Users have to configure params.json as below example:
# Users can configure in ds_config.json as below example:
TENSORBOARD_FORMAT = '''
Tensorboard can be specified as:
"session_params": {
"tensorboard": {
"enabled": true,
"output_path": "/home/myname/foo",
"job_name": "model_lr2e-5_epoch3_seed2_seq64"
}
"tensorboard": {
"enabled": true,
"output_path": "/home/myname/foo",
"job_name": "model_lr2e-5_epoch3_seed2_seq64"
}
'''
TENSORBOARD = "tensorboard"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment