Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
5bcc463d
Commit
5bcc463d
authored
May 29, 2023
by
aiss
Browse files
update v0.9.2
parent
ac5fbab4
Changes
714
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
127 additions
and
105 deletions
+127
-105
csrc/aio/py_test/parse_aio_stats.py
csrc/aio/py_test/parse_aio_stats.py
+6
-11
csrc/aio/py_test/perf_sweep_utils.py
csrc/aio/py_test/perf_sweep_utils.py
+4
-1
csrc/aio/py_test/test_ds_aio.py
csrc/aio/py_test/test_ds_aio.py
+12
-28
csrc/aio/py_test/test_ds_aio_utils.py
csrc/aio/py_test/test_ds_aio_utils.py
+4
-3
csrc/aio/py_test/validate_async_io.py
csrc/aio/py_test/validate_async_io.py
+4
-3
csrc/common/custom_cuda_kernel.cu
csrc/common/custom_cuda_kernel.cu
+7
-3
csrc/includes/StopWatch.h
csrc/includes/StopWatch.h
+4
-3
csrc/includes/Timer.h
csrc/includes/Timer.h
+4
-3
csrc/includes/compat.h
csrc/includes/compat.h
+8
-3
csrc/includes/context.h
csrc/includes/context.h
+9
-8
csrc/includes/conversion_utils.h
csrc/includes/conversion_utils.h
+24
-9
csrc/includes/cpu_adagrad.h
csrc/includes/cpu_adagrad.h
+6
-5
csrc/includes/cpu_adam.h
csrc/includes/cpu_adam.h
+6
-5
csrc/includes/cublas_wrappers.h
csrc/includes/cublas_wrappers.h
+4
-3
csrc/includes/custom_cuda_layers.h
csrc/includes/custom_cuda_layers.h
+4
-3
csrc/includes/dequantization_utils.h
csrc/includes/dequantization_utils.h
+4
-3
csrc/includes/dropout.h
csrc/includes/dropout.h
+4
-3
csrc/includes/ds_kernel_utils.h
csrc/includes/ds_kernel_utils.h
+5
-2
csrc/includes/ds_transformer_cuda.h
csrc/includes/ds_transformer_cuda.h
+4
-3
csrc/includes/feed_forward.h
csrc/includes/feed_forward.h
+4
-3
No files found.
csrc/aio/py_test/parse_aio_stats.py
View file @
5bcc463d
"""
# Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
...
@@ -19,10 +20,7 @@ METRIC_SEARCH = {READ_SPEED: 'E2E Read Speed', WRITE_SPEED: 'E2E Write Speed'}
...
@@ -19,10 +20,7 @@ METRIC_SEARCH = {READ_SPEED: 'E2E Read Speed', WRITE_SPEED: 'E2E Write Speed'}
def
parse_arguments
():
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--log_dir'
,
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
required
=
True
,
help
=
'Folder of statistics logs'
)
type
=
str
,
required
=
True
,
help
=
'Folder of statistics logs'
)
parser
.
add_argument
(
'--metric'
,
parser
.
add_argument
(
'--metric'
,
type
=
str
,
type
=
str
,
...
@@ -125,10 +123,7 @@ def get_results(log_files, metric):
...
@@ -125,10 +123,7 @@ def get_results(log_files, metric):
def
get_sorted_results
(
log_dir
,
metric
):
def
get_sorted_results
(
log_dir
,
metric
):
log_files
=
[
log_files
=
[
f
for
f
in
os
.
listdir
(
log_dir
)
if
os
.
path
.
isfile
(
os
.
path
.
join
(
log_dir
,
f
))]
f
for
f
in
os
.
listdir
(
log_dir
)
if
os
.
path
.
isfile
(
os
.
path
.
join
(
log_dir
,
f
))
]
log_files_path
=
[
os
.
path
.
join
(
log_dir
,
f
)
for
f
in
log_files
]
log_files_path
=
[
os
.
path
.
join
(
log_dir
,
f
)
for
f
in
log_files
]
results
=
get_results
(
log_files_path
,
metric
)
results
=
get_results
(
log_files_path
,
metric
)
...
...
csrc/aio/py_test/perf_sweep_utils.py
View file @
5bcc463d
'''Copyright The Microsoft DeepSpeed Team'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
SCRIPT_PREFIX
=
'_aio_bench'
SCRIPT_PREFIX
=
'_aio_bench'
WRITE_OP_DESC
=
'write'
WRITE_OP_DESC
=
'write'
...
...
csrc/aio/py_test/test_ds_aio.py
View file @
5bcc463d
"""
# Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
...
@@ -20,46 +21,29 @@ def parse_arguments():
...
@@ -20,46 +21,29 @@ def parse_arguments():
parser
.
add_argument
(
'--write_file'
,
type
=
str
,
default
=
None
,
help
=
'Write file.'
)
parser
.
add_argument
(
'--write_file'
,
type
=
str
,
default
=
None
,
help
=
'Write file.'
)
parser
.
add_argument
(
'--write_size'
,
parser
.
add_argument
(
'--write_size'
,
type
=
str
,
default
=
None
,
help
=
'Number of bytes to write.'
)
type
=
str
,
default
=
None
,
help
=
'Number of bytes to write.'
)
parser
.
add_argument
(
'--block_size'
,
type
=
str
,
default
=
'1M'
,
help
=
'I/O block size.'
)
parser
.
add_argument
(
'--block_size'
,
type
=
str
,
default
=
'1M'
,
help
=
'I/O block size.'
)
parser
.
add_argument
(
'--queue_depth'
,
type
=
int
,
default
=
32
,
help
=
'I/O queue depth.'
)
parser
.
add_argument
(
'--queue_depth'
,
type
=
int
,
default
=
32
,
help
=
'I/O queue depth.'
)
parser
.
add_argument
(
'--threads'
,
parser
.
add_argument
(
'--threads'
,
type
=
int
,
default
=
1
,
help
=
'Thread parallelism count.'
)
type
=
int
,
default
=
1
,
help
=
'Thread parallelism count.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--single_submit'
,
'--single_submit'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
help
=
'Submit I/O requests in singles (default is submit queue_depth amount at once.).'
)
'Submit I/O requests in singles (default is submit queue_depth amount at once.).'
)
parser
.
add_argument
(
'--overlap_events'
,
parser
.
add_argument
(
'--overlap_events'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'Overlap I/O submission and completion requests.'
)
help
=
'Overlap I/O submission and completion requests.'
)
parser
.
add_argument
(
'--validate'
,
parser
.
add_argument
(
'--validate'
,
action
=
'store_true'
,
help
=
'Perform validation in library.'
)
action
=
'store_true'
,
help
=
'Perform validation in library.'
)
parser
.
add_argument
(
'--handle'
,
action
=
'store_true'
,
help
=
'Use AIO handle.'
)
parser
.
add_argument
(
'--handle'
,
action
=
'store_true'
,
help
=
'Use AIO handle.'
)
parser
.
add_argument
(
'--loops'
,
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
parser
.
add_argument
(
'--io_parallel'
,
parser
.
add_argument
(
'--io_parallel'
,
type
=
int
,
default
=
None
,
help
=
'Per iop parallelism'
)
type
=
int
,
default
=
None
,
help
=
'Per iop parallelism'
)
parser
.
add_argument
(
'--gpu'
,
action
=
'store_true'
,
help
=
'Use GPU memory'
)
parser
.
add_argument
(
'--gpu'
,
action
=
'store_true'
,
help
=
'Use GPU memory'
)
...
...
csrc/aio/py_test/test_ds_aio_utils.py
View file @
5bcc463d
"""
# Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
...
...
csrc/aio/py_test/validate_async_io.py
View file @
5bcc463d
"""
# Copyright (c) Microsoft Corporation.
Copyright 2021 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
from
deepspeed.ops.op_builder
import
AsyncIOBuilder
from
deepspeed.ops.op_builder
import
AsyncIOBuilder
...
...
csrc/common/custom_cuda_kernel.cu
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#ifdef __HIPCC__
#ifdef __HIPCC__
#include "custom_hip_layers.h"
#include "custom_hip_layers.h"
#else
#else
#include "custom_cuda_layers.h"
#include "custom_cuda_layers.h"
#endif
#endif
__global__
void
param_update_kernel
(
const
float
*
input
,
__half
*
output
,
int
size
)
__global__
void
param_update_kernel
(
const
float
*
input
,
__half
*
output
,
int
size
)
{
{
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
csrc/includes/StopWatch.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
#ifdef _WIN32
#ifdef _WIN32
...
...
csrc/includes/Timer.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#ifndef __TIMER_H__
#ifndef __TIMER_H__
#define __TIMER_H__
#define __TIMER_H__
...
...
csrc/includes/compat.h
View file @
5bcc463d
/* Copyright 2020 The Microsoft DeepSpeed Team
// Copyright (c) Microsoft Corporation.
Copyright NVIDIA/apex
// SPDX-License-Identifier: Apache-2.0
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
// DeepSpeed Team
/*
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/
*/
#ifndef TORCH_CHECK
#ifndef TORCH_CHECK
...
...
csrc/includes/context.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
@@ -43,9 +44,9 @@ inline int DS_GET_BLOCKS(const int N)
...
@@ -43,9 +44,9 @@ inline int DS_GET_BLOCKS(const int N)
1
);
1
);
}
}
class
Context
{
class
Training
Context
{
public:
public:
Context
()
:
_workspace
(
nullptr
),
_seed
(
42
),
_curr_offset
(
0
)
Training
Context
()
:
_workspace
(
nullptr
),
_seed
(
42
),
_curr_offset
(
0
)
{
{
curandCreateGenerator
(
&
_gen
,
CURAND_RNG_PSEUDO_DEFAULT
);
curandCreateGenerator
(
&
_gen
,
CURAND_RNG_PSEUDO_DEFAULT
);
curandSetPseudoRandomGeneratorSeed
(
_gen
,
123
);
curandSetPseudoRandomGeneratorSeed
(
_gen
,
123
);
...
@@ -56,15 +57,15 @@ public:
...
@@ -56,15 +57,15 @@ public:
}
}
}
}
virtual
~
Context
()
virtual
~
Training
Context
()
{
{
cublasDestroy
(
_cublasHandle
);
cublasDestroy
(
_cublasHandle
);
cudaFree
(
_workspace
);
cudaFree
(
_workspace
);
}
}
static
Context
&
Instance
()
static
Training
Context
&
Instance
()
{
{
static
Context
_ctx
;
static
Training
Context
_ctx
;
return
_ctx
;
return
_ctx
;
}
}
...
...
csrc/includes/conversion_utils.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
@@ -262,12 +263,16 @@ DS_D_INLINE float2 to(__nv_bfloat162 val)
...
@@ -262,12 +263,16 @@ DS_D_INLINE float2 to(__nv_bfloat162 val)
#endif
#endif
/********************* To Half Conversions *********************/
/********************* To Half Conversions *********************/
//aiss
template
<
>
//template <>
DS_D_INLINE
__half
to
(
double
val
)
//DS_D_INLINE __half to(double val)
{
//{
#ifdef __HIP_PLATFORM_HCC__
// return __double2half(val);
float
val_f
=
__double2float_rn
(
val
);
//}
return
__float2half
(
val_f
);
#else
return
__double2half
(
val
);
#endif
}
template
<
>
template
<
>
DS_D_INLINE
__half
to
(
float
val
)
DS_D_INLINE
__half
to
(
float
val
)
{
{
...
@@ -329,6 +334,11 @@ DS_D_INLINE __half2 to(float2 val)
...
@@ -329,6 +334,11 @@ DS_D_INLINE __half2 to(float2 val)
{
{
return
__float22half2_rn
(
val
);
return
__float22half2_rn
(
val
);
}
}
template
<
>
DS_D_INLINE
__half2
to
(
float
val
)
{
return
__float2half2_rn
(
val
);
}
#ifdef BF16_AVAILABLE
#ifdef BF16_AVAILABLE
// No direct conversion
// No direct conversion
...
@@ -401,6 +411,11 @@ DS_D_INLINE __nv_bfloat162 to(float2 val)
...
@@ -401,6 +411,11 @@ DS_D_INLINE __nv_bfloat162 to(float2 val)
return
__float22bfloat162_rn
(
val
);
return
__float22bfloat162_rn
(
val
);
}
}
template
<
>
template
<
>
DS_D_INLINE
__nv_bfloat162
to
(
float
val
)
{
return
__float2bfloat162_rn
(
val
);
}
template
<
>
DS_D_INLINE
__nv_bfloat162
to
(
__half2
val
)
DS_D_INLINE
__nv_bfloat162
to
(
__half2
val
)
{
{
return
to
<
__nv_bfloat162
>
(
to
<
float2
>
(
val
));
return
to
<
__nv_bfloat162
>
(
to
<
float2
>
(
val
));
...
...
csrc/includes/cpu_adagrad.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
@@ -38,8 +39,8 @@ public:
...
@@ -38,8 +39,8 @@ public:
cudaMallocHost
((
void
**
)
_doubled_buffer
,
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)
_doubled_buffer
,
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)(
_doubled_buffer
+
1
),
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)(
_doubled_buffer
+
1
),
TILE
*
sizeof
(
float
));
_streams
[
0
]
=
Context
::
Instance
().
GetCurrentStream
();
_streams
[
0
]
=
Training
Context
::
Instance
().
GetCurrentStream
();
_streams
[
1
]
=
Context
::
Instance
().
GetNewStream
();
_streams
[
1
]
=
Training
Context
::
Instance
().
GetNewStream
();
_buf_index
=
false
;
_buf_index
=
false
;
#endif
#endif
}
}
...
...
csrc/includes/cpu_adam.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
@@ -53,8 +54,8 @@ public:
...
@@ -53,8 +54,8 @@ public:
cudaMallocHost
((
void
**
)
_doubled_buffer
,
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)
_doubled_buffer
,
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)(
_doubled_buffer
+
1
),
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)(
_doubled_buffer
+
1
),
TILE
*
sizeof
(
float
));
_streams
[
0
]
=
Context
::
Instance
().
GetCurrentStream
();
_streams
[
0
]
=
Training
Context
::
Instance
().
GetCurrentStream
();
_streams
[
1
]
=
Context
::
Instance
().
GetNewStream
();
_streams
[
1
]
=
Training
Context
::
Instance
().
GetNewStream
();
_buf_index
=
false
;
_buf_index
=
false
;
#endif
#endif
}
}
...
...
csrc/includes/cublas_wrappers.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
...
csrc/includes/custom_cuda_layers.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
...
csrc/includes/dequantization_utils.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#include "conversion_utils.h"
#include "conversion_utils.h"
#include "ds_kernel_utils.h"
#include "ds_kernel_utils.h"
...
...
csrc/includes/dropout.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
...
csrc/includes/ds_kernel_utils.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright 2022 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Centralized header file for preprocessor macros and constants
Centralized header file for preprocessor macros and constants
used throughout the codebase.
used throughout the codebase.
*/
*/
...
...
csrc/includes/ds_transformer_cuda.h
100755 → 100644
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#pragma once
#pragma once
...
...
csrc/includes/feed_forward.h
View file @
5bcc463d
/*
// Copyright (c) Microsoft Corporation.
Copyright The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
*/
// DeepSpeed Team
#ifndef __FEEDFORWARD_H__
#ifndef __FEEDFORWARD_H__
#define __FEEDFORWARD_H__
#define __FEEDFORWARD_H__
...
...
Prev
1
2
3
4
5
6
7
8
9
…
36
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment