Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
c25a91b6
Commit
c25a91b6
authored
May 30, 2023
by
aiss
Browse files
Merge branch 'ds-v0.9.2-rocm' into 'main'
Ds v0.9.2 rocm See merge request dcutoolkit/deeplearing/deepspeed!2
parents
d1596c94
af82b300
Changes
710
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
145 additions
and
183 deletions
+145
-183
csrc/aio/common/deepspeed_aio_common.h
csrc/aio/common/deepspeed_aio_common.h
+5
-3
csrc/aio/common/deepspeed_aio_types.cpp
csrc/aio/common/deepspeed_aio_types.cpp
+5
-3
csrc/aio/common/deepspeed_aio_types.h
csrc/aio/common/deepspeed_aio_types.h
+5
-3
csrc/aio/common/deepspeed_aio_utils.cpp
csrc/aio/common/deepspeed_aio_utils.cpp
+5
-3
csrc/aio/common/deepspeed_aio_utils.h
csrc/aio/common/deepspeed_aio_utils.h
+5
-3
csrc/aio/py_lib/deepspeed_aio_thread.cpp
csrc/aio/py_lib/deepspeed_aio_thread.cpp
+5
-3
csrc/aio/py_lib/deepspeed_aio_thread.h
csrc/aio/py_lib/deepspeed_aio_thread.h
+5
-3
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
+5
-3
csrc/aio/py_lib/deepspeed_pin_tensor.h
csrc/aio/py_lib/deepspeed_pin_tensor.h
+8
-5
csrc/aio/py_lib/deepspeed_py_aio.cpp
csrc/aio/py_lib/deepspeed_py_aio.cpp
+4
-0
csrc/aio/py_lib/deepspeed_py_aio.h
csrc/aio/py_lib/deepspeed_py_aio.h
+4
-0
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
+4
-0
csrc/aio/py_lib/deepspeed_py_aio_handle.h
csrc/aio/py_lib/deepspeed_py_aio_handle.h
+5
-3
csrc/aio/py_lib/deepspeed_py_copy.cpp
csrc/aio/py_lib/deepspeed_py_copy.cpp
+5
-3
csrc/aio/py_lib/deepspeed_py_copy.h
csrc/aio/py_lib/deepspeed_py_copy.h
+4
-0
csrc/aio/py_lib/py_ds_aio.cpp
csrc/aio/py_lib/py_ds_aio.cpp
+5
-3
csrc/aio/py_test/aio_bench_generate_param.py
csrc/aio/py_test/aio_bench_generate_param.py
+9
-13
csrc/aio/py_test/aio_bench_perf_sweep.py
csrc/aio/py_test/aio_bench_perf_sweep.py
+37
-85
csrc/aio/py_test/ds_aio_basic.py
csrc/aio/py_test/ds_aio_basic.py
+10
-25
csrc/aio/py_test/ds_aio_handle.py
csrc/aio/py_test/ds_aio_handle.py
+10
-22
No files found.
csrc/aio/common/deepspeed_aio_common.h
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/common/deepspeed_aio_types.cpp
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/common/deepspeed_aio_types.h
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/common/deepspeed_aio_utils.cpp
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/common/deepspeed_aio_utils.h
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/py_lib/deepspeed_aio_thread.cpp
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/py_lib/deepspeed_aio_thread.h
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2023 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory.
Functionality for managing CPU tensors occupying page-locked memory.
*/
*/
...
...
csrc/aio/py_lib/deepspeed_pin_tensor.h
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2023 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory.
Functionality for managing CPU tensors occupying page-locked memory.
TODO: Implement a full-featured manager that
TODO: Implement a full-featured manager that
1. Avoid page-locked memory leaks
1. Avoid page-locked memory leaks
2. Minimize page-locked memory usage by reducing internal fragmentation
2. Minimize page-locked memory usage by reducing internal fragmentation
Functionality for managing CPU tensors occupying page-locked memory.
*/
*/
#include <map>
#include <map>
...
...
csrc/aio/py_lib/deepspeed_py_aio.cpp
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
/*
Copyright 2020 The Microsoft DeepSpeed Team
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio.h
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
/*
Copyright 2020 The Microsoft DeepSpeed Team
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
/*
Copyright 2020 The Microsoft DeepSpeed Team
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio_handle.h
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/py_lib/deepspeed_py_copy.cpp
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/py_lib/deepspeed_py_copy.h
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
/*
Copyright 2020 The Microsoft DeepSpeed Team
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/py_ds_aio.cpp
View file @
c25a91b6
/*
// Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
// SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
*/
...
...
csrc/aio/py_test/aio_bench_generate_param.py
View file @
c25a91b6
"""
# Copyright (c) Microsoft Corporation.
Copyright 2021 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
import
os
import
os
...
@@ -14,13 +15,10 @@ from perf_sweep_utils import BENCH_LOG_DIR, READ_LOG_DIR, WRITE_LOG_DIR
...
@@ -14,13 +15,10 @@ from perf_sweep_utils import BENCH_LOG_DIR, READ_LOG_DIR, WRITE_LOG_DIR
def
parse_arguments
():
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
parser
.
add_argument
(
'--log_dir'
,
'--log_dir'
,
type
=
str
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Folder of performance sweep logs. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
help
=
f
'Folder of performance sweep logs. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
print
(
f
'args =
{
args
}
'
)
print
(
f
'args =
{
args
}
'
)
...
@@ -75,9 +73,7 @@ def generate_aio_param(read_log_dir, write_log_dir):
...
@@ -75,9 +73,7 @@ def generate_aio_param(read_log_dir, write_log_dir):
optimal_config_read
=
read_results
.
get
(
read_perf_keys
[
optimal_key
],
None
)
optimal_config_read
=
read_results
.
get
(
read_perf_keys
[
optimal_key
],
None
)
optimal_config_write
=
write_results
.
get
(
write_perf_keys
[
optimal_key
],
None
)
optimal_config_write
=
write_results
.
get
(
write_perf_keys
[
optimal_key
],
None
)
print
(
print
(
f
'Best performance (GB/sec): read =
{
optimal_config_read
:
5.2
f
}
, write =
{
optimal_config_write
:
5.2
f
}
'
)
f
'Best performance (GB/sec): read =
{
optimal_config_read
:
5.2
f
}
, write =
{
optimal_config_write
:
5.2
f
}
'
)
print
(
json
.
dumps
(
aio_param
,
indent
=
3
))
print
(
json
.
dumps
(
aio_param
,
indent
=
3
))
...
...
csrc/aio/py_test/aio_bench_perf_sweep.py
View file @
c25a91b6
"""
# Copyright (c) Microsoft Corporation.
Copyright 2021 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
import
os
import
os
...
@@ -20,20 +21,16 @@ from deepspeed.ops.op_builder import AsyncIOBuilder
...
@@ -20,20 +21,16 @@ from deepspeed.ops.op_builder import AsyncIOBuilder
OTHER_OPTIONS
=
'--handle'
OTHER_OPTIONS
=
'--handle'
PERF_SCRIPT
=
'test_ds_aio.py'
PERF_SCRIPT
=
'test_ds_aio.py'
DEFAULT_SWEEP_CONFIG
=
{
DEFAULT_SWEEP_CONFIG
=
{
"block_size"
:
[
"128K"
,
"block_size"
:
[
"128K"
,
"256K"
],
"256K"
],
"queue_depth"
:
[
4
,
16
,
32
],
"queue_depth"
:
[
4
,
"overlap_events"
:
[
True
,
False
],
16
,
"io_parallel"
:
[
2
,
8
],
32
],
"overlap_events"
:
[
True
,
False
],
"io_parallel"
:
[
2
,
8
],
"single_submit"
:
[
False
]
"single_submit"
:
[
False
]
}
}
class
Job
(
object
):
class
Job
(
object
):
def
__init__
(
self
,
cmd_line
,
output_file
=
None
,
work_dir
=
None
):
def
__init__
(
self
,
cmd_line
,
output_file
=
None
,
work_dir
=
None
):
self
.
cmd_line
=
cmd_line
self
.
cmd_line
=
cmd_line
self
.
output_file
=
output_file
self
.
output_file
=
output_file
...
@@ -63,6 +60,7 @@ class Job(object):
...
@@ -63,6 +60,7 @@ class Job(object):
class
SweepConfig
(
object
):
class
SweepConfig
(
object
):
def
__init__
(
self
,
args
):
def
__init__
(
self
,
args
):
self
.
nvme_dir
=
args
.
nvme_dir
self
.
nvme_dir
=
args
.
nvme_dir
self
.
io_size
=
args
.
io_size
self
.
io_size
=
args
.
io_size
...
@@ -78,52 +76,35 @@ class SweepConfig(object):
...
@@ -78,52 +76,35 @@ class SweepConfig(object):
def
parse_arguments
():
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
parser
.
add_argument
(
'--nvme_dir'
,
'--nvme_dir'
,
required
=
True
,
required
=
True
,
type
=
str
,
help
=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
parser
.
add_argument
(
'--sweep_config'
,
type
=
str
,
type
=
str
,
default
=
None
,
help
=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
help
=
'Performance sweep configuration json file.'
)
parser
.
add_argument
(
'--no_read'
,
parser
.
add_argument
(
'--sweep_config'
,
type
=
str
,
default
=
None
,
help
=
'Performance sweep configuration json file.'
)
action
=
'store_true'
,
help
=
'Disable read performance measurements.'
)
parser
.
add_argument
(
'--no_write'
,
parser
.
add_argument
(
'--no_read'
,
action
=
'store_true'
,
help
=
'Disable read performance measurements.'
)
action
=
'store_true'
,
help
=
'Disable write performance measurements.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--no_write'
,
action
=
'store_true'
,
help
=
'Disable write performance measurements.'
)
'--io_size'
,
type
=
str
,
parser
.
add_argument
(
'--io_size'
,
default
=
"400M"
,
type
=
str
,
help
=
'Number of I/O bytes to read/write for performance measurements.'
)
default
=
"400M"
,
help
=
'Number of I/O bytes to read/write for performance measurements.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--no_sudo'
,
'--no_sudo'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
help
=
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.'
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.'
)
)
parser
.
add_argument
(
parser
.
add_argument
(
'--log_dir'
,
'--log_dir'
,
type
=
str
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
default
=
BENCH_LOG_DIR
,
help
=
help
=
f
'Output directory for performance log files. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
f
'Output directory for performance log files. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
parser
.
add_argument
(
'--loops'
,
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
print
(
f
'args =
{
args
}
'
)
print
(
f
'args =
{
args
}
'
)
...
@@ -147,6 +128,7 @@ def get_sweep_config_dict(sweep_config_json):
...
@@ -147,6 +128,7 @@ def get_sweep_config_dict(sweep_config_json):
def
get_sweep_cmd_lines
(
sweep_config_dict
):
def
get_sweep_cmd_lines
(
sweep_config_dict
):
def
flatten_options
(
key
,
value_list
):
def
flatten_options
(
key
,
value_list
):
flat_list
=
[]
flat_list
=
[]
for
v
in
value_list
:
for
v
in
value_list
:
...
@@ -170,11 +152,7 @@ def run_job(job):
...
@@ -170,11 +152,7 @@ def run_job(job):
args
=
' '
.
join
(
job
.
cmd
())
args
=
' '
.
join
(
job
.
cmd
())
print
(
f
'args =
{
args
}
'
)
print
(
f
'args =
{
args
}
'
)
job
.
open_output_file
()
job
.
open_output_file
()
proc
=
subprocess
.
run
(
args
=
args
,
proc
=
subprocess
.
run
(
args
=
args
,
shell
=
True
,
stdout
=
job
.
get_stdout
(),
stderr
=
job
.
get_stderr
(),
cwd
=
job
.
get_cwd
())
shell
=
True
,
stdout
=
job
.
get_stdout
(),
stderr
=
job
.
get_stderr
(),
cwd
=
job
.
get_cwd
())
job
.
close_output_file
()
job
.
close_output_file
()
assert
proc
.
returncode
==
0
,
\
assert
proc
.
returncode
==
0
,
\
f
"This command failed:
{
job
.
cmd
()
}
"
f
"This command failed:
{
job
.
cmd
()
}
"
...
@@ -240,14 +218,7 @@ def get_log_file(io_op_desc, cmd_line):
...
@@ -240,14 +218,7 @@ def get_log_file(io_op_desc, cmd_line):
return
tag_key
return
tag_key
return
f
'
{
tag_key
}{
value
}
'
return
f
'
{
tag_key
}{
value
}
'
tag_list
=
[
tag_list
=
[
SINGLE_SUBMIT
,
OVERLAP_EVENTS
,
THREAD_COUNT
,
IO_PARALLEL
,
QUEUE_DEPTH
,
BLOCK_SIZE
]
SINGLE_SUBMIT
,
OVERLAP_EVENTS
,
THREAD_COUNT
,
IO_PARALLEL
,
QUEUE_DEPTH
,
BLOCK_SIZE
]
log_tags
=
[
io_op_desc
]
log_tags
=
[
io_op_desc
]
cmd_tags
=
create_cmd_tags
(
cmd_line
)
cmd_tags
=
create_cmd_tags
(
cmd_line
)
for
tag
in
tag_list
:
for
tag
in
tag_list
:
...
@@ -298,16 +269,10 @@ def create_read_file(sweep_config):
...
@@ -298,16 +269,10 @@ def create_read_file(sweep_config):
os
.
makedirs
(
read_folder
,
exist_ok
=
True
)
os
.
makedirs
(
read_folder
,
exist_ok
=
True
)
read_file_name
=
os
.
path
.
join
(
read_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
read_file_name
=
os
.
path
.
join
(
read_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
block_size
,
block_count
=
get_block_size_and_count
(
refine_integer_value
(
sweep_config
.
io_size
))
block_size
,
block_count
=
get_block_size_and_count
(
refine_integer_value
(
sweep_config
.
io_size
))
dd_job
=
Job
(
cmd_line
=
[
dd_job
=
Job
(
cmd_line
=
[
f
'dd if=/dev/urandom of=
{
read_file_name
}
bs=
{
block_size
}
count=
{
block_count
}
'
])
f
'dd if=/dev/urandom of=
{
read_file_name
}
bs=
{
block_size
}
count=
{
block_count
}
'
print
(
f
'[Start] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
])
print
(
f
'[Start] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
run_job
(
dd_job
)
run_job
(
dd_job
)
print
(
print
(
f
'[Done] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
f
'[Done] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
return
read_folder
,
read_file_name
return
read_folder
,
read_file_name
...
@@ -319,20 +284,15 @@ def remove_folder(folder):
...
@@ -319,20 +284,15 @@ def remove_folder(folder):
def
run_read_sweep
(
sweep_config
,
flush_cache_job
,
sync_job
,
cmd_lines
):
def
run_read_sweep
(
sweep_config
,
flush_cache_job
,
sync_job
,
cmd_lines
):
read_folder
,
read_file_name
=
create_read_file
(
sweep_config
)
read_folder
,
read_file_name
=
create_read_file
(
sweep_config
)
read_option
=
f
'--read_file
{
read_file_name
}
'
read_option
=
f
'--read_file
{
read_file_name
}
'
read_cmd_lines
=
[[
f
'
{
read_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
read_cmd_lines
=
[[
f
'
{
read_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
for
cmd
in
cmd_lines
]
#dump_cmd_lines(read_cmd_lines)
#dump_cmd_lines(read_cmd_lines)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
READ_LOG_DIR
}
'
)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
READ_LOG_DIR
}
'
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
READ_OP_DESC
,
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
READ_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
read_cmd_lines
)
log_dir
=
log_folder
,
cmd_lines
=
read_cmd_lines
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
remove_folder
(
read_folder
)
remove_folder
(
read_folder
)
...
@@ -342,20 +302,15 @@ def run_write_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines):
...
@@ -342,20 +302,15 @@ def run_write_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines):
os
.
makedirs
(
write_folder
,
exist_ok
=
True
)
os
.
makedirs
(
write_folder
,
exist_ok
=
True
)
write_file_name
=
os
.
path
.
join
(
write_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
write_file_name
=
os
.
path
.
join
(
write_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
write_option
=
f
'--write_size
{
sweep_config
.
io_size
}
--write_file
{
write_file_name
}
'
write_option
=
f
'--write_size
{
sweep_config
.
io_size
}
--write_file
{
write_file_name
}
'
write_cmd_lines
=
[[
f
'
{
write_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
write_cmd_lines
=
[[
f
'
{
write_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
for
cmd
in
cmd_lines
]
#dump_cmd_lines(write_cmd_lines)
#dump_cmd_lines(write_cmd_lines)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
WRITE_LOG_DIR
}
'
)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
WRITE_LOG_DIR
}
'
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
WRITE_OP_DESC
,
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
WRITE_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
write_cmd_lines
)
log_dir
=
log_folder
,
cmd_lines
=
write_cmd_lines
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
remove_folder
(
write_folder
)
remove_folder
(
write_folder
)
...
@@ -376,10 +331,7 @@ def main():
...
@@ -376,10 +331,7 @@ def main():
cmd_lines
=
get_sweep_cmd_lines
(
sweep_config
.
search_space
)
cmd_lines
=
get_sweep_cmd_lines
(
sweep_config
.
search_space
)
if
sweep_config
.
flush_cache
:
if
sweep_config
.
flush_cache
:
flush_cache_job
=
Job
(
flush_cache_job
=
Job
(
cmd_line
=
[
'sudo'
,
'bash -c'
,
"'echo 1 > /proc/sys/vm/drop_caches'"
])
cmd_line
=
[
'sudo'
,
'bash -c'
,
"'echo 1 > /proc/sys/vm/drop_caches'"
])
else
:
else
:
flush_cache_job
=
None
flush_cache_job
=
None
...
...
csrc/aio/py_test/ds_aio_basic.py
View file @
c25a91b6
"""
# Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
...
@@ -20,14 +21,8 @@ def pre_basic(args, tid, read_op):
...
@@ -20,14 +21,8 @@ def pre_basic(args, tid, read_op):
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
buffer
=
get_accelerator
().
pin_memory
(
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
torch
.
empty
(
num_bytes
,
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
ctxt
=
{}
ctxt
=
{}
ctxt
[
'file'
]
=
file
ctxt
[
'file'
]
=
file
...
@@ -60,13 +55,8 @@ def post_basic(pool_params):
...
@@ -60,13 +55,8 @@ def post_basic(pool_params):
def
main_basic_read
(
pool_params
):
def
main_basic_read
(
pool_params
):
args
,
tid
,
ctxt
=
pool_params
args
,
tid
,
ctxt
=
pool_params
start_time
=
time
.
time
()
start_time
=
time
.
time
()
AsyncIOBuilder
().
load
().
aio_read
(
ctxt
[
'buffer'
],
AsyncIOBuilder
().
load
().
aio_read
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
ctxt
[
'file'
],
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
...
@@ -76,13 +66,8 @@ def main_basic_read(pool_params):
...
@@ -76,13 +66,8 @@ def main_basic_read(pool_params):
def
main_basic_write
(
pool_params
):
def
main_basic_write
(
pool_params
):
args
,
tid
,
ctxt
=
pool_params
args
,
tid
,
ctxt
=
pool_params
start_time
=
time
.
time
()
start_time
=
time
.
time
()
AsyncIOBuilder
().
load
().
aio_write
(
ctxt
[
'buffer'
],
AsyncIOBuilder
().
load
().
aio_write
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
ctxt
[
'file'
],
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
...
...
csrc/aio/py_test/ds_aio_handle.py
View file @
c25a91b6
"""
# Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team
# SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
"""
...
@@ -20,27 +21,17 @@ def pre_handle(args, tid, read_op):
...
@@ -20,27 +21,17 @@ def pre_handle(args, tid, read_op):
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
io_parallel
=
args
.
io_parallel
if
args
.
io_parallel
else
1
io_parallel
=
args
.
io_parallel
if
args
.
io_parallel
else
1
handle
=
AsyncIOBuilder
().
load
().
aio_handle
(
args
.
block_size
,
handle
=
AsyncIOBuilder
().
load
().
aio_handle
(
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
queue_depth
,
args
.
overlap_events
,
io_parallel
)
args
.
single_submit
,
args
.
overlap_events
,
io_parallel
)
task_log
(
tid
,
f
'Created deepspeed aio handle'
)
task_log
(
tid
,
f
'Created deepspeed aio handle'
)
if
args
.
gpu
:
if
args
.
gpu
:
buffer
=
torch
.
empty
(
num_bytes
,
buffer
=
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
get_accelerator
().
device_name
())
dtype
=
torch
.
uint8
,
device
=
get_accelerator
().
device_name
())
else
:
else
:
if
args
.
use_accelerator_pin_memory
:
if
args
.
use_accelerator_pin_memory
:
buffer
=
get_accelerator
().
pin_memory
(
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
else
:
else
:
buffer
=
handle
.
new_cpu_locked_tensor
(
num_bytes
,
buffer
=
handle
.
new_cpu_locked_tensor
(
num_bytes
,
torch
.
empty
(
0
,
dtype
=
torch
.
uint8
))
torch
.
empty
(
0
,
dtype
=
torch
.
uint8
))
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
...
@@ -51,10 +42,7 @@ def pre_handle(args, tid, read_op):
...
@@ -51,10 +42,7 @@ def pre_handle(args, tid, read_op):
ctxt
[
'buffer'
]
=
buffer
ctxt
[
'buffer'
]
=
buffer
ctxt
[
'elapsed_sec'
]
=
0
ctxt
[
'elapsed_sec'
]
=
0
task_log
(
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
return
ctxt
return
ctxt
...
...
Prev
1
2
3
4
5
6
7
8
…
36
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment