removed session_params from deepspeed_constants.py (#162)

* remove session_params in deepspeed_constants.py * add constants info at README.md

removed session_params from deepspeed_constants.py (#162)
* remove session_params in deepspeed_constants.py * add constants info at README.md
62d3272e · kouml · GitHub · 1496247a · 62d3272e · 62d3272e
Unverified Commit 62d3272e authored Mar 23, 2020 by kouml Committed by GitHub Mar 22, 2020
Show whitespace changes
Inline Side-by-side

Showing with 37 additions and 60 deletions

README.md README.md +3 -2

deepspeed/pt/deepspeed_constants.py deepspeed/pt/deepspeed_constants.py +34 -58

No files found.
--- a/README.md
+++ b/README.md
@@ -278,8 +278,9 @@ the `step` value is stored as part of the `client_sd`.
 ## DeepSpeed Configuration
 DeepSpeed features can be enabled, disabled, or configured using a config JSON
-file that should be specified as `args.deepspeed_config`. A sample config file
+file that should be specified as `args.deepspeed_config`. Available configs are at
-is shown below. For a full set of features see [core API
+[deepspeed/pt/deepspeed_constants.py](deepspeed/pt/deepspeed_constants.py).
+A sample config file is shown below. For a full set of features see [core API
 doc](https://deepspeed.readthedocs.io/en/latest/).
 ```json

--- a/deepspeed/pt/deepspeed_constants.py
+++ b/deepspeed/pt/deepspeed_constants.py
@@ -46,12 +46,10 @@ STEPS_PER_PRINT_DEFAULT = 10
 # Batch size for one training step. This is used when the
 # TRAIN_BATCH_SIZE cannot fit in GPU memory to determine
 # the number of gradient accumulation steps. By default, this
-# is set to None. Users can configure in params.json as below example:
+# is set to None. Users can configure in ds_config.json as below example:
 TRAIN_MICRO_BATCH_SIZE_PER_GPU = '''
 TRAIN_MICRO_BATCH_SIZE_PER_GPU is defined in this format:
-"session_params": {
+"train_micro_batch_size_per_gpu": 1
-  "train_micro_batch_size_per_gpu": 1
-}
 '''
 TRAIN_MICRO_BATCH_SIZE_PER_GPU = "train_micro_batch_size_per_gpu"
 TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None
@@ -60,12 +58,10 @@ TRAIN_MICRO_BATCH_SIZE_PER_GPU_DEFAULT = None
 # Gradient Accumulation
 #########################################
 # Gradient accumulation feature. By default, this feature is not enabled.
-# Users have to configure in params.json in section "session_params" as below example:
+# Users can configure in ds_config.json as below example:
 GRADIENT_ACCUMULATION_FORMAT = '''
 Gradient Accumulation should be of the format:
-"session_params": {
+"gradient_accumulation_steps": 1
-  "gradient_accumulation_steps": 1
-}
 '''
 GRADIENT_ACCUMULATION_STEPS = "gradient_accumulation_steps"
 GRADIENT_ACCUMULATION_STEPS_DEFAULT = None
@@ -78,18 +74,16 @@ SPARSE_GRADIENTS_DEFAULT = False
 # FP16 support
 #########################################
 # FP16 feature. By default, this feature is not enabled.
-# Users have to configure in params.json in section "session_params" as below example:
+# Users can configure in ds_config.json as below example:
 FP16_FORMAT = '''
 FP16 parameters should be of the format:
-"session_params": {
+"fp16": {
-  "fp16": {
  "enabled": true,
  "loss_scale": 0,
  "initial_scale_power": 32,
  "loss_scale_window": 1000,
  "hysteresis": 2,
  "min_loss_scale": 1
-  }
 }
 '''
 FP16 = "fp16"
@@ -121,12 +115,10 @@ FP16_MIN_LOSS_SCALE_DEFAULT = 1
 # Gradient clipping
 #########################################
 # Gradient clipping. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 GRADIENT_CLIPPING_FORMAT = '''
 Dump state should be enabled as:
-"session_params": {
+"gradient_clipping": 1.0
-  "gradient_clipping": 1.0
-}
 '''
 GRADIENT_CLIPPING = 'gradient_clipping'
 GRADIENT_CLIPPING_DEFAULT = 0.
@@ -135,13 +127,11 @@ GRADIENT_CLIPPING_DEFAULT = 0.
 # ZeRO optimization
 #########################################
 # ZeRO optimization. By default, this optimization is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 ZERO_FORMAT = '''
 ZeRO optimization should be enabled as:
-"session_params": {
+"zero_optimization": true,
-  "zero_optimization": true,
+"zero_all_gather_size": 200
-  "zero_all_gather_size": 200
-}
 '''
 ZERO_OPTIMIZATION = 'zero_optimization'
 ZERO_OPTIMIZATION_DEFAULT = False
@@ -153,12 +143,10 @@ ALLGATHER_SIZE_DEFAULT = 500000000
 # FP32 AllReduce
 #########################################
 # FP32 All reduce. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 FP32_ALLREDUCE_FORMAT = '''
 FP32 Allreduce should be enabled as:
-"session_params": {
+"fp32_allreduce": true
-  "fp32_allreduce": true
-}
 '''
 FP32_ALLREDUCE = "fp32_allreduce"
 FP32_ALLREDUCE_DEFAULT = False
@@ -167,12 +155,10 @@ FP32_ALLREDUCE_DEFAULT = False
 # Scale gradients before allreduce
 #########################################
 # Prescale gradients. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 PRESCALE_GRADIENTS_FORMAT = '''
 Gradient prescaling should be enabled as:
-"session_params": {
+"prescale_gradients": true
-  "prescale_gradients": true
-}
 '''
 PRESCALE_GRADIENTS = "prescale_gradients"
 PRESCALE_GRADIENTS_DEFAULT = False
@@ -181,12 +167,10 @@ PRESCALE_GRADIENTS_DEFAULT = False
 # Disable AllGather
 #########################################
 # Disable AllGather. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 DISABLE_ALLGATHER_FORMAT = '''
 Disable AllGather should be enabled as:
-"session_params": {
+"disable_allgather": true
-  "disable_allgather": true
-}
 '''
 DISABLE_ALLGATHER = "disable_allgather"
 DISABLE_ALLGATHER_DEFAULT = False
@@ -195,12 +179,10 @@ DISABLE_ALLGATHER_DEFAULT = False
 # Dump DeepSpeed state
 #########################################
 # Dump State. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 DUMP_STATE_FORMAT = '''
 Dump state should be enabled as:
-"session_params": {
+"dump_state": true
-  "dump_state": true
-}
 '''
 DUMP_STATE = 'dump_state'
 DUMP_STATE_DEFAULT = False
@@ -209,12 +191,10 @@ DUMP_STATE_DEFAULT = False
 # Vocabulary size
 #########################################
 # Vocabulary size.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 VOCABULARY_SIZE_FORMAT = '''
 Vocabulary size can be specified as:
-"session_params": {
+"vocabulary_size": 1024
-  "vocabulary_size": 1024
-}
 '''
 VOCABULARY_SIZE = 'vocabulary_size'
 VOCABULARY_SIZE_DEFAULT = None
@@ -223,12 +203,10 @@ VOCABULARY_SIZE_DEFAULT = None
 # Wall block breakdown
 #########################################
 # Wall clock breakdown. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 WALL_CLOCK_BREAKDOWN_FORMAT = '''
 Wall block breakdown should be enabled as:
-"session_params": {
+"wall_clock_breakdown": true
-  "wall_clock_breakdown": true
-}
 '''
 WALL_CLOCK_BREAKDOWN = 'wall_clock_breakdown'
 WALL_CLOCK_BREAKDOWN_DEFAULT = False
@@ -237,15 +215,13 @@ WALL_CLOCK_BREAKDOWN_DEFAULT = False
 # Tensorboard
 #########################################
 # Tensorboard. By default, this feature is not enabled.
-# Users have to configure params.json as below example:
+# Users can configure in ds_config.json as below example:
 TENSORBOARD_FORMAT = '''
 Tensorboard can be specified as:
-"session_params": {
+"tensorboard": {
-  "tensorboard": {
  "enabled": true,
  "output_path": "/home/myname/foo",
  "job_name": "model_lr2e-5_epoch3_seed2_seq64"
-  }
 }
 '''
 TENSORBOARD = "tensorboard"