main: - title: 'Getting Started' url: /getting-started/ - title: 'Blog' url: /posts/ - title: 'Tutorials' url: /tutorials/ - title: 'Documentation' url: https://deepspeed.readthedocs.io/ - title: 'GitHub' url: https://github.com/microsoft/DeepSpeed lnav: - title: 'Training' url: /training/ - title: 'Inference' url: /inference/ - title: 'Compression' url: /compression/ - title: 'Getting Started' url: /getting-started/ - title: 'ds_config' url: /docs/config-json/ children: - title: 'Autotuning' url: /docs/config-json/#autotuning - title: 'Batch size' url: /docs/config-json/#batch-size-related-parameters - title: 'Optimizer' url: /docs/config-json/#optimizer-parameters - title: 'FP16' url: /docs/config-json/#fp16-training-options - title: 'BFLOAT16' url: /docs/config-json/#bfloat16-training-options - title: 'ZeRO optimizations' url: /docs/config-json/#zero-optimizations-for-fp16-training - title: 'Logging' url: /docs/config-json/#logging - title: 'Flops Profiler' url: /docs/config-json/#flops-profiler - title: 'Monitoring' url: /docs/config-json/#monitoring-module-tensorboard-wandb-csv - title: 'Communication Logging' url: /docs/config-json/#communication-logging - title: 'Model Compression' url: /docs/config-json/#compression - title: 'Data Efficiency' url: /docs/config-json/#data-efficiency - title: 'Tutorials' url: /tutorials/ children: - title: 'Getting started' url: /getting-started/ - title: 'Getting started on Azure' url: /tutorials/azure/ - title: 'Automatic Tensor Parallelism' url: /tutorials/automatic-tensor-parallelism/ - title: 'Autotuning' url: /tutorials/autotuning/ - title: 'BingBertSQuAD Fine-tuning' url: /tutorials/bert-finetuning/ - title: 'BERT Pre-training' url: /tutorials/bert-pretraining/ - title: 'CIFAR-10' url: /tutorials/cifar-10/ - title: 'Curriculum Learning' url: /tutorials/curriculum-learning/ - title: 'Data Efficiency' url: /tutorials/data-efficiency/ - title: 'Flops Profiler' url: /tutorials/flops-profiler/ - title: 'PyTorch Profiler' url: /tutorials/pytorch-profiler/ - title: 'GAN' url: /tutorials/gan/ - title: 'Inference' url: /tutorials/inference-tutorial/ - title: 'Learning Rate Range Test' url: /tutorials/lrrt/ - title: 'Megatron-LM GPT2' url: /tutorials/megatron/ - title: 'Mixture-of-Experts (MoE)' url: /tutorials/mixture-of-experts/ - title: 'MoE for NLG' url: /tutorials/mixture-of-experts-nlg/ - title: 'MoE Inference' url: /tutorials/mixture-of-experts-inference/ - title: 'Model Compression' url: /tutorials/model-compression/ - title: 'Mixture-of-Quantization' url: /tutorials/MoQ-tutorial/ - title: 'Monitoring' url: /tutorials/monitor - title: 'Communication Logging' url: /tutorials/comms-logging - title: 'One-Cycle Schedule' url: /tutorials/one-cycle/ - title: 'One-Bit Adam' url: /tutorials/onebit-adam/ - title: "Zero-One Adam" url: /tutorials/zero-one-adam/ - title: "One-Bit LAMB" url: /tutorials/onebit-lamb/ - title: 'Pipeline Parallelism' url: /tutorials/pipeline/ - title: 'Progressive Layer Dropping' url: /tutorials/progressive_layer_dropping/ - title: 'Sparse Attention' url: /tutorials/sparse-attention/ - title: 'Transformer Kernel' url: /tutorials/transformer_kernel/ - title: 'ZeRO-Offload' url: /tutorials/zero-offload/ - title: 'ZeRO' url: /tutorials/zero/ - title: 'Contributing' url: /contributing/