Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
5bcc463d
Commit
5bcc463d
authored
May 29, 2023
by
aiss
Browse files
update v0.9.2
parent
ac5fbab4
Changes
714
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
127 additions
and
105 deletions
+127
-105
csrc/aio/py_test/parse_aio_stats.py
csrc/aio/py_test/parse_aio_stats.py
+6
-11
csrc/aio/py_test/perf_sweep_utils.py
csrc/aio/py_test/perf_sweep_utils.py
+4
-1
csrc/aio/py_test/test_ds_aio.py
csrc/aio/py_test/test_ds_aio.py
+12
-28
csrc/aio/py_test/test_ds_aio_utils.py
csrc/aio/py_test/test_ds_aio_utils.py
+4
-3
csrc/aio/py_test/validate_async_io.py
csrc/aio/py_test/validate_async_io.py
+4
-3
csrc/common/custom_cuda_kernel.cu
csrc/common/custom_cuda_kernel.cu
+7
-3
csrc/includes/StopWatch.h
csrc/includes/StopWatch.h
+4
-3
csrc/includes/Timer.h
csrc/includes/Timer.h
+4
-3
csrc/includes/compat.h
csrc/includes/compat.h
+8
-3
csrc/includes/context.h
csrc/includes/context.h
+9
-8
csrc/includes/conversion_utils.h
csrc/includes/conversion_utils.h
+24
-9
csrc/includes/cpu_adagrad.h
csrc/includes/cpu_adagrad.h
+6
-5
csrc/includes/cpu_adam.h
csrc/includes/cpu_adam.h
+6
-5
csrc/includes/cublas_wrappers.h
csrc/includes/cublas_wrappers.h
+4
-3
csrc/includes/custom_cuda_layers.h
csrc/includes/custom_cuda_layers.h
+4
-3
csrc/includes/dequantization_utils.h
csrc/includes/dequantization_utils.h
+4
-3
csrc/includes/dropout.h
csrc/includes/dropout.h
+4
-3
csrc/includes/ds_kernel_utils.h
csrc/includes/ds_kernel_utils.h
+5
-2
csrc/includes/ds_transformer_cuda.h
csrc/includes/ds_transformer_cuda.h
+4
-3
csrc/includes/feed_forward.h
csrc/includes/feed_forward.h
+4
-3
No files found.
csrc/aio/py_test/parse_aio_stats.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
@@ -19,10 +20,7 @@ METRIC_SEARCH = {READ_SPEED: 'E2E Read Speed', WRITE_SPEED: 'E2E Write Speed'}
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
required
=
True
,
help
=
'Folder of statistics logs'
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
required
=
True
,
help
=
'Folder of statistics logs'
)
parser
.
add_argument
(
'--metric'
,
type
=
str
,
...
...
@@ -125,10 +123,7 @@ def get_results(log_files, metric):
def
get_sorted_results
(
log_dir
,
metric
):
log_files
=
[
f
for
f
in
os
.
listdir
(
log_dir
)
if
os
.
path
.
isfile
(
os
.
path
.
join
(
log_dir
,
f
))
]
log_files
=
[
f
for
f
in
os
.
listdir
(
log_dir
)
if
os
.
path
.
isfile
(
os
.
path
.
join
(
log_dir
,
f
))]
log_files_path
=
[
os
.
path
.
join
(
log_dir
,
f
)
for
f
in
log_files
]
results
=
get_results
(
log_files_path
,
metric
)
...
...
csrc/aio/py_test/perf_sweep_utils.py
View file @
5bcc463d
'''Copyright The Microsoft DeepSpeed Team'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
SCRIPT_PREFIX
=
'_aio_bench'
WRITE_OP_DESC
=
'write'
...
...
csrc/aio/py_test/test_ds_aio.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
@@ -20,46 +21,29 @@ def parse_arguments():
parser
.
add_argument
(
'--write_file'
,
type
=
str
,
default
=
None
,
help
=
'Write file.'
)
parser
.
add_argument
(
'--write_size'
,
type
=
str
,
default
=
None
,
help
=
'Number of bytes to write.'
)
parser
.
add_argument
(
'--write_size'
,
type
=
str
,
default
=
None
,
help
=
'Number of bytes to write.'
)
parser
.
add_argument
(
'--block_size'
,
type
=
str
,
default
=
'1M'
,
help
=
'I/O block size.'
)
parser
.
add_argument
(
'--queue_depth'
,
type
=
int
,
default
=
32
,
help
=
'I/O queue depth.'
)
parser
.
add_argument
(
'--threads'
,
type
=
int
,
default
=
1
,
help
=
'Thread parallelism count.'
)
parser
.
add_argument
(
'--threads'
,
type
=
int
,
default
=
1
,
help
=
'Thread parallelism count.'
)
parser
.
add_argument
(
'--single_submit'
,
parser
.
add_argument
(
'--single_submit'
,
action
=
'store_true'
,
help
=
'Submit I/O requests in singles (default is submit queue_depth amount at once.).'
)
help
=
'Submit I/O requests in singles (default is submit queue_depth amount at once.).'
)
parser
.
add_argument
(
'--overlap_events'
,
action
=
'store_true'
,
help
=
'Overlap I/O submission and completion requests.'
)
parser
.
add_argument
(
'--validate'
,
action
=
'store_true'
,
help
=
'Perform validation in library.'
)
parser
.
add_argument
(
'--validate'
,
action
=
'store_true'
,
help
=
'Perform validation in library.'
)
parser
.
add_argument
(
'--handle'
,
action
=
'store_true'
,
help
=
'Use AIO handle.'
)
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
parser
.
add_argument
(
'--io_parallel'
,
type
=
int
,
default
=
None
,
help
=
'Per iop parallelism'
)
parser
.
add_argument
(
'--io_parallel'
,
type
=
int
,
default
=
None
,
help
=
'Per iop parallelism'
)
parser
.
add_argument
(
'--gpu'
,
action
=
'store_true'
,
help
=
'Use GPU memory'
)
...
...
csrc/aio/py_test/test_ds_aio_utils.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
csrc/aio/py_test/validate_async_io.py
View file @
5bcc463d
"""
Copyright 2021 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
from
deepspeed.ops.op_builder
import
AsyncIOBuilder
...
...
csrc/common/custom_cuda_kernel.cu
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#ifdef __HIPCC__
#include "custom_hip_layers.h"
#else
#include "custom_cuda_layers.h"
#endif
__global__
void
param_update_kernel
(
const
float
*
input
,
__half
*
output
,
int
size
)
{
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
csrc/includes/StopWatch.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
#ifdef _WIN32
...
...
csrc/includes/Timer.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#ifndef __TIMER_H__
#define __TIMER_H__
...
...
csrc/includes/compat.h
View file @
5bcc463d
/* Copyright 2020 The Microsoft DeepSpeed Team
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/
#ifndef TORCH_CHECK
...
...
csrc/includes/context.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
@@ -43,9 +44,9 @@ inline int DS_GET_BLOCKS(const int N)
1
);
}
class
Context
{
class
Training
Context
{
public:
Context
()
:
_workspace
(
nullptr
),
_seed
(
42
),
_curr_offset
(
0
)
Training
Context
()
:
_workspace
(
nullptr
),
_seed
(
42
),
_curr_offset
(
0
)
{
curandCreateGenerator
(
&
_gen
,
CURAND_RNG_PSEUDO_DEFAULT
);
curandSetPseudoRandomGeneratorSeed
(
_gen
,
123
);
...
...
@@ -56,15 +57,15 @@ public:
}
}
virtual
~
Context
()
virtual
~
Training
Context
()
{
cublasDestroy
(
_cublasHandle
);
cudaFree
(
_workspace
);
}
static
Context
&
Instance
()
static
Training
Context
&
Instance
()
{
static
Context
_ctx
;
static
Training
Context
_ctx
;
return
_ctx
;
}
...
...
csrc/includes/conversion_utils.h
View file @
5bcc463d
/*
Copyright 2022 The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
@@ -262,12 +263,16 @@ DS_D_INLINE float2 to(__nv_bfloat162 val)
#endif
/********************* To Half Conversions *********************/
//aiss
//template <>
//DS_D_INLINE __half to(double val)
//{
// return __double2half(val);
//}
template
<
>
DS_D_INLINE
__half
to
(
double
val
)
{
#ifdef __HIP_PLATFORM_HCC__
float
val_f
=
__double2float_rn
(
val
);
return
__float2half
(
val_f
);
#else
return
__double2half
(
val
);
#endif
}
template
<
>
DS_D_INLINE
__half
to
(
float
val
)
{
...
...
@@ -329,6 +334,11 @@ DS_D_INLINE __half2 to(float2 val)
{
return
__float22half2_rn
(
val
);
}
template
<
>
DS_D_INLINE
__half2
to
(
float
val
)
{
return
__float2half2_rn
(
val
);
}
#ifdef BF16_AVAILABLE
// No direct conversion
...
...
@@ -401,6 +411,11 @@ DS_D_INLINE __nv_bfloat162 to(float2 val)
return
__float22bfloat162_rn
(
val
);
}
template
<
>
DS_D_INLINE
__nv_bfloat162
to
(
float
val
)
{
return
__float2bfloat162_rn
(
val
);
}
template
<
>
DS_D_INLINE
__nv_bfloat162
to
(
__half2
val
)
{
return
to
<
__nv_bfloat162
>
(
to
<
float2
>
(
val
));
...
...
csrc/includes/cpu_adagrad.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
@@ -38,8 +39,8 @@ public:
cudaMallocHost
((
void
**
)
_doubled_buffer
,
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)(
_doubled_buffer
+
1
),
TILE
*
sizeof
(
float
));
_streams
[
0
]
=
Context
::
Instance
().
GetCurrentStream
();
_streams
[
1
]
=
Context
::
Instance
().
GetNewStream
();
_streams
[
0
]
=
Training
Context
::
Instance
().
GetCurrentStream
();
_streams
[
1
]
=
Training
Context
::
Instance
().
GetNewStream
();
_buf_index
=
false
;
#endif
}
...
...
csrc/includes/cpu_adam.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
@@ -53,8 +54,8 @@ public:
cudaMallocHost
((
void
**
)
_doubled_buffer
,
TILE
*
sizeof
(
float
));
cudaMallocHost
((
void
**
)(
_doubled_buffer
+
1
),
TILE
*
sizeof
(
float
));
_streams
[
0
]
=
Context
::
Instance
().
GetCurrentStream
();
_streams
[
1
]
=
Context
::
Instance
().
GetNewStream
();
_streams
[
0
]
=
Training
Context
::
Instance
().
GetCurrentStream
();
_streams
[
1
]
=
Training
Context
::
Instance
().
GetNewStream
();
_buf_index
=
false
;
#endif
}
...
...
csrc/includes/cublas_wrappers.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
csrc/includes/custom_cuda_layers.h
View file @
5bcc463d
/*
Copyright 2022 The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
csrc/includes/dequantization_utils.h
View file @
5bcc463d
/*
Copyright 2022 The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#include "conversion_utils.h"
#include "ds_kernel_utils.h"
...
...
csrc/includes/dropout.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
csrc/includes/ds_kernel_utils.h
View file @
5bcc463d
/*
Copyright 2022 The Microsoft DeepSpeed Team
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Centralized header file for preprocessor macros and constants
used throughout the codebase.
*/
...
...
csrc/includes/ds_transformer_cuda.h
100755 → 100644
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
...
...
csrc/includes/feed_forward.h
View file @
5bcc463d
/*
Copyright The Microsoft DeepSpeed Team
*/
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#ifndef __FEEDFORWARD_H__
#define __FEEDFORWARD_H__
...
...
Prev
1
2
3
4
5
6
7
8
9
…
36
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment