Commit c25a91b6 authored by aiss's avatar aiss
Browse files

Merge branch 'ds-v0.9.2-rocm' into 'main'

Ds v0.9.2 rocm

See merge request dcutoolkit/deeplearing/deepspeed!2
parents d1596c94 af82b300
""" # Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
...@@ -19,10 +20,7 @@ METRIC_SEARCH = {READ_SPEED: 'E2E Read Speed', WRITE_SPEED: 'E2E Write Speed'} ...@@ -19,10 +20,7 @@ METRIC_SEARCH = {READ_SPEED: 'E2E Read Speed', WRITE_SPEED: 'E2E Write Speed'}
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--log_dir', parser.add_argument('--log_dir', type=str, required=True, help='Folder of statistics logs')
type=str,
required=True,
help='Folder of statistics logs')
parser.add_argument('--metric', parser.add_argument('--metric',
type=str, type=str,
...@@ -125,10 +123,7 @@ def get_results(log_files, metric): ...@@ -125,10 +123,7 @@ def get_results(log_files, metric):
def get_sorted_results(log_dir, metric): def get_sorted_results(log_dir, metric):
log_files = [ log_files = [f for f in os.listdir(log_dir) if os.path.isfile(os.path.join(log_dir, f))]
f for f in os.listdir(log_dir) if os.path.isfile(os.path.join(log_dir,
f))
]
log_files_path = [os.path.join(log_dir, f) for f in log_files] log_files_path = [os.path.join(log_dir, f) for f in log_files]
results = get_results(log_files_path, metric) results = get_results(log_files_path, metric)
......
'''Copyright The Microsoft DeepSpeed Team''' # Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
SCRIPT_PREFIX = '_aio_bench' SCRIPT_PREFIX = '_aio_bench'
WRITE_OP_DESC = 'write' WRITE_OP_DESC = 'write'
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
...@@ -20,46 +21,29 @@ def parse_arguments(): ...@@ -20,46 +21,29 @@ def parse_arguments():
parser.add_argument('--write_file', type=str, default=None, help='Write file.') parser.add_argument('--write_file', type=str, default=None, help='Write file.')
parser.add_argument('--write_size', parser.add_argument('--write_size', type=str, default=None, help='Number of bytes to write.')
type=str,
default=None,
help='Number of bytes to write.')
parser.add_argument('--block_size', type=str, default='1M', help='I/O block size.') parser.add_argument('--block_size', type=str, default='1M', help='I/O block size.')
parser.add_argument('--queue_depth', type=int, default=32, help='I/O queue depth.') parser.add_argument('--queue_depth', type=int, default=32, help='I/O queue depth.')
parser.add_argument('--threads', parser.add_argument('--threads', type=int, default=1, help='Thread parallelism count.')
type=int,
default=1,
help='Thread parallelism count.')
parser.add_argument( parser.add_argument('--single_submit',
'--single_submit', action='store_true',
action='store_true', help='Submit I/O requests in singles (default is submit queue_depth amount at once.).')
help=
'Submit I/O requests in singles (default is submit queue_depth amount at once.).'
)
parser.add_argument('--overlap_events', parser.add_argument('--overlap_events',
action='store_true', action='store_true',
help='Overlap I/O submission and completion requests.') help='Overlap I/O submission and completion requests.')
parser.add_argument('--validate', parser.add_argument('--validate', action='store_true', help='Perform validation in library.')
action='store_true',
help='Perform validation in library.')
parser.add_argument('--handle', action='store_true', help='Use AIO handle.') parser.add_argument('--handle', action='store_true', help='Use AIO handle.')
parser.add_argument('--loops', parser.add_argument('--loops', type=int, default=1, help='Count of operation repetitions')
type=int,
default=1,
help='Count of operation repetitions')
parser.add_argument('--io_parallel', parser.add_argument('--io_parallel', type=int, default=None, help='Per iop parallelism')
type=int,
default=None,
help='Per iop parallelism')
parser.add_argument('--gpu', action='store_true', help='Use GPU memory') parser.add_argument('--gpu', action='store_true', help='Use GPU memory')
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2021 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
from deepspeed.ops.op_builder import AsyncIOBuilder from deepspeed.ops.op_builder import AsyncIOBuilder
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#ifdef __HIPCC__ #ifdef __HIPCC__
#include "custom_hip_layers.h" #include "custom_hip_layers.h"
#else #else
#include "custom_cuda_layers.h" #include "custom_cuda_layers.h"
#endif #endif
__global__ void param_update_kernel(const float* input, __half* output, int size) __global__ void param_update_kernel(const float* input, __half* output, int size)
{ {
int id = blockIdx.x * blockDim.x + threadIdx.x; int id = blockIdx.x * blockDim.x + threadIdx.x;
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
#ifdef _WIN32 #ifdef _WIN32
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#ifndef __TIMER_H__ #ifndef __TIMER_H__
#define __TIMER_H__ #define __TIMER_H__
......
/* Copyright 2020 The Microsoft DeepSpeed Team // Copyright (c) Microsoft Corporation.
Copyright NVIDIA/apex // SPDX-License-Identifier: Apache-2.0
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
// DeepSpeed Team
/*
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/ */
#ifndef TORCH_CHECK #ifndef TORCH_CHECK
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
...@@ -43,9 +44,9 @@ inline int DS_GET_BLOCKS(const int N) ...@@ -43,9 +44,9 @@ inline int DS_GET_BLOCKS(const int N)
1); 1);
} }
class Context { class TrainingContext {
public: public:
Context() : _workspace(nullptr), _seed(42), _curr_offset(0) TrainingContext() : _workspace(nullptr), _seed(42), _curr_offset(0)
{ {
curandCreateGenerator(&_gen, CURAND_RNG_PSEUDO_DEFAULT); curandCreateGenerator(&_gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(_gen, 123); curandSetPseudoRandomGeneratorSeed(_gen, 123);
...@@ -56,15 +57,15 @@ public: ...@@ -56,15 +57,15 @@ public:
} }
} }
virtual ~Context() virtual ~TrainingContext()
{ {
cublasDestroy(_cublasHandle); cublasDestroy(_cublasHandle);
cudaFree(_workspace); cudaFree(_workspace);
} }
static Context& Instance() static TrainingContext& Instance()
{ {
static Context _ctx; static TrainingContext _ctx;
return _ctx; return _ctx;
} }
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
...@@ -262,12 +263,16 @@ DS_D_INLINE float2 to(__nv_bfloat162 val) ...@@ -262,12 +263,16 @@ DS_D_INLINE float2 to(__nv_bfloat162 val)
#endif #endif
/********************* To Half Conversions *********************/ /********************* To Half Conversions *********************/
//aiss template <>
//template <> DS_D_INLINE __half to(double val)
//DS_D_INLINE __half to(double val) {
//{ #ifdef __HIP_PLATFORM_HCC__
// return __double2half(val); float val_f = __double2float_rn(val);
//} return __float2half(val_f);
#else
return __double2half(val);
#endif
}
template <> template <>
DS_D_INLINE __half to(float val) DS_D_INLINE __half to(float val)
{ {
...@@ -329,6 +334,11 @@ DS_D_INLINE __half2 to(float2 val) ...@@ -329,6 +334,11 @@ DS_D_INLINE __half2 to(float2 val)
{ {
return __float22half2_rn(val); return __float22half2_rn(val);
} }
template <>
DS_D_INLINE __half2 to(float val)
{
return __float2half2_rn(val);
}
#ifdef BF16_AVAILABLE #ifdef BF16_AVAILABLE
// No direct conversion // No direct conversion
...@@ -401,6 +411,11 @@ DS_D_INLINE __nv_bfloat162 to(float2 val) ...@@ -401,6 +411,11 @@ DS_D_INLINE __nv_bfloat162 to(float2 val)
return __float22bfloat162_rn(val); return __float22bfloat162_rn(val);
} }
template <> template <>
DS_D_INLINE __nv_bfloat162 to(float val)
{
return __float2bfloat162_rn(val);
}
template <>
DS_D_INLINE __nv_bfloat162 to(__half2 val) DS_D_INLINE __nv_bfloat162 to(__half2 val)
{ {
return to<__nv_bfloat162>(to<float2>(val)); return to<__nv_bfloat162>(to<float2>(val));
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
...@@ -38,8 +39,8 @@ public: ...@@ -38,8 +39,8 @@ public:
cudaMallocHost((void**)_doubled_buffer, TILE * sizeof(float)); cudaMallocHost((void**)_doubled_buffer, TILE * sizeof(float));
cudaMallocHost((void**)(_doubled_buffer + 1), TILE * sizeof(float)); cudaMallocHost((void**)(_doubled_buffer + 1), TILE * sizeof(float));
_streams[0] = Context::Instance().GetCurrentStream(); _streams[0] = TrainingContext::Instance().GetCurrentStream();
_streams[1] = Context::Instance().GetNewStream(); _streams[1] = TrainingContext::Instance().GetNewStream();
_buf_index = false; _buf_index = false;
#endif #endif
} }
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
...@@ -53,8 +54,8 @@ public: ...@@ -53,8 +54,8 @@ public:
cudaMallocHost((void**)_doubled_buffer, TILE * sizeof(float)); cudaMallocHost((void**)_doubled_buffer, TILE * sizeof(float));
cudaMallocHost((void**)(_doubled_buffer + 1), TILE * sizeof(float)); cudaMallocHost((void**)(_doubled_buffer + 1), TILE * sizeof(float));
_streams[0] = Context::Instance().GetCurrentStream(); _streams[0] = TrainingContext::Instance().GetCurrentStream();
_streams[1] = Context::Instance().GetNewStream(); _streams[1] = TrainingContext::Instance().GetNewStream();
_buf_index = false; _buf_index = false;
#endif #endif
} }
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#include "conversion_utils.h" #include "conversion_utils.h"
#include "ds_kernel_utils.h" #include "ds_kernel_utils.h"
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Centralized header file for preprocessor macros and constants Centralized header file for preprocessor macros and constants
used throughout the codebase. used throughout the codebase.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once #pragma once
......
/* // Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#ifndef __FEEDFORWARD_H__ #ifndef __FEEDFORWARD_H__
#define __FEEDFORWARD_H__ #define __FEEDFORWARD_H__
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment