"examples/community/run_tensorrt_controlnet.py" did not exist on "e828232780554d54bdb527d3390fab0be042b72a"
Commit c25a91b6 authored by aiss's avatar aiss
Browse files

Merge branch 'ds-v0.9.2-rocm' into 'main'

Ds v0.9.2 rocm

See merge request dcutoolkit/deeplearing/deepspeed!2
parents d1596c94 af82b300
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
......@@ -19,10 +20,7 @@ METRIC_SEARCH = {READ_SPEED: 'E2E Read Speed', WRITE_SPEED: 'E2E Write Speed'}
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('--log_dir',
type=str,
required=True,
help='Folder of statistics logs')
parser.add_argument('--log_dir', type=str, required=True, help='Folder of statistics logs')
parser.add_argument('--metric',
type=str,
......@@ -125,10 +123,7 @@ def get_results(log_files, metric):
def get_sorted_results(log_dir, metric):
log_files = [
f for f in os.listdir(log_dir) if os.path.isfile(os.path.join(log_dir,
f))
]
log_files = [f for f in os.listdir(log_dir) if os.path.isfile(os.path.join(log_dir, f))]
log_files_path = [os.path.join(log_dir, f) for f in log_files]
results = get_results(log_files_path, metric)
......
'''Copyright The Microsoft DeepSpeed Team'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
SCRIPT_PREFIX = '_aio_bench'
WRITE_OP_DESC = 'write'
......
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
......@@ -20,46 +21,29 @@ def parse_arguments():
parser.add_argument('--write_file', type=str, default=None, help='Write file.')
parser.add_argument('--write_size',
type=str,
default=None,
help='Number of bytes to write.')
parser.add_argument('--write_size', type=str, default=None, help='Number of bytes to write.')
parser.add_argument('--block_size', type=str, default='1M', help='I/O block size.')
parser.add_argument('--queue_depth', type=int, default=32, help='I/O queue depth.')
parser.add_argument('--threads',
type=int,
default=1,
help='Thread parallelism count.')
parser.add_argument('--threads', type=int, default=1, help='Thread parallelism count.')
parser.add_argument(
'--single_submit',
action='store_true',
help=
'Submit I/O requests in singles (default is submit queue_depth amount at once.).'
)
parser.add_argument('--single_submit',
action='store_true',
help='Submit I/O requests in singles (default is submit queue_depth amount at once.).')
parser.add_argument('--overlap_events',
action='store_true',
help='Overlap I/O submission and completion requests.')
parser.add_argument('--validate',
action='store_true',
help='Perform validation in library.')
parser.add_argument('--validate', action='store_true', help='Perform validation in library.')
parser.add_argument('--handle', action='store_true', help='Use AIO handle.')
parser.add_argument('--loops',
type=int,
default=1,
help='Count of operation repetitions')
parser.add_argument('--loops', type=int, default=1, help='Count of operation repetitions')
parser.add_argument('--io_parallel',
type=int,
default=None,
help='Per iop parallelism')
parser.add_argument('--io_parallel', type=int, default=None, help='Per iop parallelism')
parser.add_argument('--gpu', action='store_true', help='Use GPU memory')
......
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
......
"""
Copyright 2021 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
from deepspeed.ops.op_builder import AsyncIOBuilder
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#ifdef __HIPCC__
#include "custom_hip_layers.h"
#else
#include "custom_cuda_layers.h"
#endif
__global__ void param_update_kernel(const float* input, __half* output, int size)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
#ifdef _WIN32
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#ifndef __TIMER_H__
#define __TIMER_H__
......
/* Copyright 2020 The Microsoft DeepSpeed Team
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/
#ifndef TORCH_CHECK
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......@@ -43,9 +44,9 @@ inline int DS_GET_BLOCKS(const int N)
1);
}
class Context {
class TrainingContext {
public:
Context() : _workspace(nullptr), _seed(42), _curr_offset(0)
TrainingContext() : _workspace(nullptr), _seed(42), _curr_offset(0)
{
curandCreateGenerator(&_gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(_gen, 123);
......@@ -56,15 +57,15 @@ public:
}
}
virtual ~Context()
virtual ~TrainingContext()
{
cublasDestroy(_cublasHandle);
cudaFree(_workspace);
}
static Context& Instance()
static TrainingContext& Instance()
{
static Context _ctx;
static TrainingContext _ctx;
return _ctx;
}
......
/*
Copyright 2022 The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......@@ -262,12 +263,16 @@ DS_D_INLINE float2 to(__nv_bfloat162 val)
#endif
/********************* To Half Conversions *********************/
//aiss
//template <>
//DS_D_INLINE __half to(double val)
//{
// return __double2half(val);
//}
template <>
DS_D_INLINE __half to(double val)
{
#ifdef __HIP_PLATFORM_HCC__
float val_f = __double2float_rn(val);
return __float2half(val_f);
#else
return __double2half(val);
#endif
}
template <>
DS_D_INLINE __half to(float val)
{
......@@ -329,6 +334,11 @@ DS_D_INLINE __half2 to(float2 val)
{
return __float22half2_rn(val);
}
template <>
DS_D_INLINE __half2 to(float val)
{
return __float2half2_rn(val);
}
#ifdef BF16_AVAILABLE
// No direct conversion
......@@ -401,6 +411,11 @@ DS_D_INLINE __nv_bfloat162 to(float2 val)
return __float22bfloat162_rn(val);
}
template <>
DS_D_INLINE __nv_bfloat162 to(float val)
{
return __float2bfloat162_rn(val);
}
template <>
DS_D_INLINE __nv_bfloat162 to(__half2 val)
{
return to<__nv_bfloat162>(to<float2>(val));
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......@@ -38,8 +39,8 @@ public:
cudaMallocHost((void**)_doubled_buffer, TILE * sizeof(float));
cudaMallocHost((void**)(_doubled_buffer + 1), TILE * sizeof(float));
_streams[0] = Context::Instance().GetCurrentStream();
_streams[1] = Context::Instance().GetNewStream();
_streams[0] = TrainingContext::Instance().GetCurrentStream();
_streams[1] = TrainingContext::Instance().GetNewStream();
_buf_index = false;
#endif
}
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......@@ -53,8 +54,8 @@ public:
cudaMallocHost((void**)_doubled_buffer, TILE * sizeof(float));
cudaMallocHost((void**)(_doubled_buffer + 1), TILE * sizeof(float));
_streams[0] = Context::Instance().GetCurrentStream();
_streams[1] = Context::Instance().GetNewStream();
_streams[0] = TrainingContext::Instance().GetCurrentStream();
_streams[1] = TrainingContext::Instance().GetNewStream();
_buf_index = false;
#endif
}
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......
/*
Copyright 2022 The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......
/*
Copyright 2022 The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#include "conversion_utils.h"
#include "ds_kernel_utils.h"
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......
/*
Copyright 2022 The Microsoft DeepSpeed Team
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Centralized header file for preprocessor macros and constants
used throughout the codebase.
*/
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
......
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#ifndef __FEEDFORWARD_H__
#define __FEEDFORWARD_H__
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment