Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
5bcc463d
Commit
5bcc463d
authored
May 29, 2023
by
aiss
Browse files
update v0.9.2
parent
ac5fbab4
Changes
714
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
145 additions
and
183 deletions
+145
-183
csrc/aio/common/deepspeed_aio_common.h
csrc/aio/common/deepspeed_aio_common.h
+5
-3
csrc/aio/common/deepspeed_aio_types.cpp
csrc/aio/common/deepspeed_aio_types.cpp
+5
-3
csrc/aio/common/deepspeed_aio_types.h
csrc/aio/common/deepspeed_aio_types.h
+5
-3
csrc/aio/common/deepspeed_aio_utils.cpp
csrc/aio/common/deepspeed_aio_utils.cpp
+5
-3
csrc/aio/common/deepspeed_aio_utils.h
csrc/aio/common/deepspeed_aio_utils.h
+5
-3
csrc/aio/py_lib/deepspeed_aio_thread.cpp
csrc/aio/py_lib/deepspeed_aio_thread.cpp
+5
-3
csrc/aio/py_lib/deepspeed_aio_thread.h
csrc/aio/py_lib/deepspeed_aio_thread.h
+5
-3
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
+5
-3
csrc/aio/py_lib/deepspeed_pin_tensor.h
csrc/aio/py_lib/deepspeed_pin_tensor.h
+8
-5
csrc/aio/py_lib/deepspeed_py_aio.cpp
csrc/aio/py_lib/deepspeed_py_aio.cpp
+4
-0
csrc/aio/py_lib/deepspeed_py_aio.h
csrc/aio/py_lib/deepspeed_py_aio.h
+4
-0
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
+4
-0
csrc/aio/py_lib/deepspeed_py_aio_handle.h
csrc/aio/py_lib/deepspeed_py_aio_handle.h
+5
-3
csrc/aio/py_lib/deepspeed_py_copy.cpp
csrc/aio/py_lib/deepspeed_py_copy.cpp
+5
-3
csrc/aio/py_lib/deepspeed_py_copy.h
csrc/aio/py_lib/deepspeed_py_copy.h
+4
-0
csrc/aio/py_lib/py_ds_aio.cpp
csrc/aio/py_lib/py_ds_aio.cpp
+5
-3
csrc/aio/py_test/aio_bench_generate_param.py
csrc/aio/py_test/aio_bench_generate_param.py
+9
-13
csrc/aio/py_test/aio_bench_perf_sweep.py
csrc/aio/py_test/aio_bench_perf_sweep.py
+37
-85
csrc/aio/py_test/ds_aio_basic.py
csrc/aio/py_test/ds_aio_basic.py
+10
-25
csrc/aio/py_test/ds_aio_handle.py
csrc/aio/py_test/ds_aio_handle.py
+10
-22
No files found.
csrc/aio/common/deepspeed_aio_common.h
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_types.cpp
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_types.h
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_utils.cpp
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_utils.h
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_aio_thread.cpp
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_aio_thread.h
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
View file @
5bcc463d
/*
Copyright 2023 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory.
*/
...
...
csrc/aio/py_lib/deepspeed_pin_tensor.h
View file @
5bcc463d
/*
Copyright 2023 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory.
TODO: Implement a full-featured manager that
1. Avoid page-locked memory leaks
2. Minimize page-locked memory usage by reducing internal fragmentation
1. Avoid page-locked memory leaks
2. Minimize page-locked memory usage by reducing internal fragmentation
Functionality for managing CPU tensors occupying page-locked memory.
*/
#include <map>
...
...
csrc/aio/py_lib/deepspeed_py_aio.cpp
View file @
5bcc463d
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio.h
View file @
5bcc463d
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
View file @
5bcc463d
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio_handle.h
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_py_copy.cpp
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_py_copy.h
View file @
5bcc463d
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/py_ds_aio.cpp
View file @
5bcc463d
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_test/aio_bench_generate_param.py
View file @
5bcc463d
"""
Copyright 2021 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
import
os
...
...
@@ -14,13 +15,10 @@ from perf_sweep_utils import BENCH_LOG_DIR, READ_LOG_DIR, WRITE_LOG_DIR
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Folder of performance sweep logs. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Folder of performance sweep logs. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
args
=
parser
.
parse_args
()
print
(
f
'args =
{
args
}
'
)
...
...
@@ -75,9 +73,7 @@ def generate_aio_param(read_log_dir, write_log_dir):
optimal_config_read
=
read_results
.
get
(
read_perf_keys
[
optimal_key
],
None
)
optimal_config_write
=
write_results
.
get
(
write_perf_keys
[
optimal_key
],
None
)
print
(
f
'Best performance (GB/sec): read =
{
optimal_config_read
:
5.2
f
}
, write =
{
optimal_config_write
:
5.2
f
}
'
)
print
(
f
'Best performance (GB/sec): read =
{
optimal_config_read
:
5.2
f
}
, write =
{
optimal_config_write
:
5.2
f
}
'
)
print
(
json
.
dumps
(
aio_param
,
indent
=
3
))
...
...
csrc/aio/py_test/aio_bench_perf_sweep.py
View file @
5bcc463d
"""
Copyright 2021 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
import
os
...
...
@@ -20,20 +21,16 @@ from deepspeed.ops.op_builder import AsyncIOBuilder
OTHER_OPTIONS
=
'--handle'
PERF_SCRIPT
=
'test_ds_aio.py'
DEFAULT_SWEEP_CONFIG
=
{
"block_size"
:
[
"128K"
,
"256K"
],
"queue_depth"
:
[
4
,
16
,
32
],
"overlap_events"
:
[
True
,
False
],
"io_parallel"
:
[
2
,
8
],
"block_size"
:
[
"128K"
,
"256K"
],
"queue_depth"
:
[
4
,
16
,
32
],
"overlap_events"
:
[
True
,
False
],
"io_parallel"
:
[
2
,
8
],
"single_submit"
:
[
False
]
}
class
Job
(
object
):
def
__init__
(
self
,
cmd_line
,
output_file
=
None
,
work_dir
=
None
):
self
.
cmd_line
=
cmd_line
self
.
output_file
=
output_file
...
...
@@ -63,6 +60,7 @@ class Job(object):
class
SweepConfig
(
object
):
def
__init__
(
self
,
args
):
self
.
nvme_dir
=
args
.
nvme_dir
self
.
io_size
=
args
.
io_size
...
...
@@ -78,52 +76,35 @@ class SweepConfig(object):
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--nvme_dir'
,
required
=
True
,
type
=
str
,
help
=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
parser
.
add_argument
(
'--sweep_config'
,
parser
.
add_argument
(
'--nvme_dir'
,
required
=
True
,
type
=
str
,
default
=
None
,
help
=
'Performance sweep configuration json file.'
)
help
=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
parser
.
add_argument
(
'--no_read'
,
action
=
'store_true'
,
help
=
'Disable read performance measurements.'
)
parser
.
add_argument
(
'--sweep_config'
,
type
=
str
,
default
=
None
,
help
=
'Performance sweep configuration json file.'
)
parser
.
add_argument
(
'--no_write'
,
action
=
'store_true'
,
help
=
'Disable write performance measurements.'
)
parser
.
add_argument
(
'--no_read'
,
action
=
'store_true'
,
help
=
'Disable read performance measurements.'
)
parser
.
add_argument
(
'--io_size'
,
type
=
str
,
default
=
"400M"
,
help
=
'Number of I/O bytes to read/write for performance measurements.'
)
parser
.
add_argument
(
'--no_write'
,
action
=
'store_true'
,
help
=
'Disable write performance measurements.'
)
parser
.
add_argument
(
'--io_size'
,
type
=
str
,
default
=
"400M"
,
help
=
'Number of I/O bytes to read/write for performance measurements.'
)
parser
.
add_argument
(
'--no_sudo'
,
action
=
'store_true'
,
help
=
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.'
)
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.'
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Output directory for performance log files. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
help
=
f
'Output directory for performance log files. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
args
=
parser
.
parse_args
()
print
(
f
'args =
{
args
}
'
)
...
...
@@ -147,6 +128,7 @@ def get_sweep_config_dict(sweep_config_json):
def
get_sweep_cmd_lines
(
sweep_config_dict
):
def
flatten_options
(
key
,
value_list
):
flat_list
=
[]
for
v
in
value_list
:
...
...
@@ -170,11 +152,7 @@ def run_job(job):
args
=
' '
.
join
(
job
.
cmd
())
print
(
f
'args =
{
args
}
'
)
job
.
open_output_file
()
proc
=
subprocess
.
run
(
args
=
args
,
shell
=
True
,
stdout
=
job
.
get_stdout
(),
stderr
=
job
.
get_stderr
(),
cwd
=
job
.
get_cwd
())
proc
=
subprocess
.
run
(
args
=
args
,
shell
=
True
,
stdout
=
job
.
get_stdout
(),
stderr
=
job
.
get_stderr
(),
cwd
=
job
.
get_cwd
())
job
.
close_output_file
()
assert
proc
.
returncode
==
0
,
\
f
"This command failed:
{
job
.
cmd
()
}
"
...
...
@@ -240,14 +218,7 @@ def get_log_file(io_op_desc, cmd_line):
return
tag_key
return
f
'
{
tag_key
}{
value
}
'
tag_list
=
[
SINGLE_SUBMIT
,
OVERLAP_EVENTS
,
THREAD_COUNT
,
IO_PARALLEL
,
QUEUE_DEPTH
,
BLOCK_SIZE
]
tag_list
=
[
SINGLE_SUBMIT
,
OVERLAP_EVENTS
,
THREAD_COUNT
,
IO_PARALLEL
,
QUEUE_DEPTH
,
BLOCK_SIZE
]
log_tags
=
[
io_op_desc
]
cmd_tags
=
create_cmd_tags
(
cmd_line
)
for
tag
in
tag_list
:
...
...
@@ -298,16 +269,10 @@ def create_read_file(sweep_config):
os
.
makedirs
(
read_folder
,
exist_ok
=
True
)
read_file_name
=
os
.
path
.
join
(
read_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
block_size
,
block_count
=
get_block_size_and_count
(
refine_integer_value
(
sweep_config
.
io_size
))
dd_job
=
Job
(
cmd_line
=
[
f
'dd if=/dev/urandom of=
{
read_file_name
}
bs=
{
block_size
}
count=
{
block_count
}
'
])
print
(
f
'[Start] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
dd_job
=
Job
(
cmd_line
=
[
f
'dd if=/dev/urandom of=
{
read_file_name
}
bs=
{
block_size
}
count=
{
block_count
}
'
])
print
(
f
'[Start] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
run_job
(
dd_job
)
print
(
f
'[Done] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
print
(
f
'[Done] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
return
read_folder
,
read_file_name
...
...
@@ -319,20 +284,15 @@ def remove_folder(folder):
def
run_read_sweep
(
sweep_config
,
flush_cache_job
,
sync_job
,
cmd_lines
):
read_folder
,
read_file_name
=
create_read_file
(
sweep_config
)
read_option
=
f
'--read_file
{
read_file_name
}
'
read_cmd_lines
=
[[
f
'
{
read_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
read_cmd_lines
=
[[
f
'
{
read_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
#dump_cmd_lines(read_cmd_lines)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
READ_LOG_DIR
}
'
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
READ_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
read_cmd_lines
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
READ_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
read_cmd_lines
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
remove_folder
(
read_folder
)
...
...
@@ -342,20 +302,15 @@ def run_write_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines):
os
.
makedirs
(
write_folder
,
exist_ok
=
True
)
write_file_name
=
os
.
path
.
join
(
write_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
write_option
=
f
'--write_size
{
sweep_config
.
io_size
}
--write_file
{
write_file_name
}
'
write_cmd_lines
=
[[
f
'
{
write_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
write_cmd_lines
=
[[
f
'
{
write_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
#dump_cmd_lines(write_cmd_lines)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
WRITE_LOG_DIR
}
'
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
WRITE_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
write_cmd_lines
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
WRITE_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
write_cmd_lines
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
remove_folder
(
write_folder
)
...
...
@@ -376,10 +331,7 @@ def main():
cmd_lines
=
get_sweep_cmd_lines
(
sweep_config
.
search_space
)
if
sweep_config
.
flush_cache
:
flush_cache_job
=
Job
(
cmd_line
=
[
'sudo'
,
'bash -c'
,
"'echo 1 > /proc/sys/vm/drop_caches'"
])
flush_cache_job
=
Job
(
cmd_line
=
[
'sudo'
,
'bash -c'
,
"'echo 1 > /proc/sys/vm/drop_caches'"
])
else
:
flush_cache_job
=
None
...
...
csrc/aio/py_test/ds_aio_basic.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
@@ -20,14 +21,8 @@ def pre_basic(args, tid, read_op):
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
ctxt
=
{}
ctxt
[
'file'
]
=
file
...
...
@@ -60,13 +55,8 @@ def post_basic(pool_params):
def
main_basic_read
(
pool_params
):
args
,
tid
,
ctxt
=
pool_params
start_time
=
time
.
time
()
AsyncIOBuilder
().
load
().
aio_read
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
AsyncIOBuilder
().
load
().
aio_read
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
end_time
=
time
.
time
()
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
...
...
@@ -76,13 +66,8 @@ def main_basic_read(pool_params):
def
main_basic_write
(
pool_params
):
args
,
tid
,
ctxt
=
pool_params
start_time
=
time
.
time
()
AsyncIOBuilder
().
load
().
aio_write
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
AsyncIOBuilder
().
load
().
aio_write
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
end_time
=
time
.
time
()
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
...
...
csrc/aio/py_test/ds_aio_handle.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
@@ -20,27 +21,17 @@ def pre_handle(args, tid, read_op):
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
io_parallel
=
args
.
io_parallel
if
args
.
io_parallel
else
1
handle
=
AsyncIOBuilder
().
load
().
aio_handle
(
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
io_parallel
)
handle
=
AsyncIOBuilder
().
load
().
aio_handle
(
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
io_parallel
)
task_log
(
tid
,
f
'Created deepspeed aio handle'
)
if
args
.
gpu
:
buffer
=
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
get_accelerator
().
device_name
())
buffer
=
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
get_accelerator
().
device_name
())
else
:
if
args
.
use_accelerator_pin_memory
:
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
else
:
buffer
=
handle
.
new_cpu_locked_tensor
(
num_bytes
,
torch
.
empty
(
0
,
dtype
=
torch
.
uint8
))
buffer
=
handle
.
new_cpu_locked_tensor
(
num_bytes
,
torch
.
empty
(
0
,
dtype
=
torch
.
uint8
))
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
...
...
@@ -51,10 +42,7 @@ def pre_handle(args, tid, read_op):
ctxt
[
'buffer'
]
=
buffer
ctxt
[
'elapsed_sec'
]
=
0
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
return
ctxt
...
...
Prev
1
2
3
4
5
6
7
8
…
36
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment