Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
c25a91b6
Commit
c25a91b6
authored
May 30, 2023
by
aiss
Browse files
Merge branch 'ds-v0.9.2-rocm' into 'main'
Ds v0.9.2 rocm See merge request dcutoolkit/deeplearing/deepspeed!2
parents
d1596c94
af82b300
Changes
710
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
145 additions
and
183 deletions
+145
-183
csrc/aio/common/deepspeed_aio_common.h
csrc/aio/common/deepspeed_aio_common.h
+5
-3
csrc/aio/common/deepspeed_aio_types.cpp
csrc/aio/common/deepspeed_aio_types.cpp
+5
-3
csrc/aio/common/deepspeed_aio_types.h
csrc/aio/common/deepspeed_aio_types.h
+5
-3
csrc/aio/common/deepspeed_aio_utils.cpp
csrc/aio/common/deepspeed_aio_utils.cpp
+5
-3
csrc/aio/common/deepspeed_aio_utils.h
csrc/aio/common/deepspeed_aio_utils.h
+5
-3
csrc/aio/py_lib/deepspeed_aio_thread.cpp
csrc/aio/py_lib/deepspeed_aio_thread.cpp
+5
-3
csrc/aio/py_lib/deepspeed_aio_thread.h
csrc/aio/py_lib/deepspeed_aio_thread.h
+5
-3
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
+5
-3
csrc/aio/py_lib/deepspeed_pin_tensor.h
csrc/aio/py_lib/deepspeed_pin_tensor.h
+8
-5
csrc/aio/py_lib/deepspeed_py_aio.cpp
csrc/aio/py_lib/deepspeed_py_aio.cpp
+4
-0
csrc/aio/py_lib/deepspeed_py_aio.h
csrc/aio/py_lib/deepspeed_py_aio.h
+4
-0
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
+4
-0
csrc/aio/py_lib/deepspeed_py_aio_handle.h
csrc/aio/py_lib/deepspeed_py_aio_handle.h
+5
-3
csrc/aio/py_lib/deepspeed_py_copy.cpp
csrc/aio/py_lib/deepspeed_py_copy.cpp
+5
-3
csrc/aio/py_lib/deepspeed_py_copy.h
csrc/aio/py_lib/deepspeed_py_copy.h
+4
-0
csrc/aio/py_lib/py_ds_aio.cpp
csrc/aio/py_lib/py_ds_aio.cpp
+5
-3
csrc/aio/py_test/aio_bench_generate_param.py
csrc/aio/py_test/aio_bench_generate_param.py
+9
-13
csrc/aio/py_test/aio_bench_perf_sweep.py
csrc/aio/py_test/aio_bench_perf_sweep.py
+37
-85
csrc/aio/py_test/ds_aio_basic.py
csrc/aio/py_test/ds_aio_basic.py
+10
-25
csrc/aio/py_test/ds_aio_handle.py
csrc/aio/py_test/ds_aio_handle.py
+10
-22
No files found.
csrc/aio/common/deepspeed_aio_common.h
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_types.cpp
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_types.h
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_utils.cpp
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/common/deepspeed_aio_utils.h
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_aio_thread.cpp
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_aio_thread.h
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_pin_tensor.cpp
View file @
c25a91b6
/*
Copyright 2023 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory.
*/
...
...
csrc/aio/py_lib/deepspeed_pin_tensor.h
View file @
c25a91b6
/*
Copyright 2023 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory.
TODO: Implement a full-featured manager that
1. Avoid page-locked memory leaks
2. Minimize page-locked memory usage by reducing internal fragmentation
1. Avoid page-locked memory leaks
2. Minimize page-locked memory usage by reducing internal fragmentation
Functionality for managing CPU tensors occupying page-locked memory.
*/
#include <map>
...
...
csrc/aio/py_lib/deepspeed_py_aio.cpp
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio.h
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/deepspeed_py_aio_handle.h
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_py_copy.cpp
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_lib/deepspeed_py_copy.h
View file @
c25a91b6
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
...
...
csrc/aio/py_lib/py_ds_aio.cpp
View file @
c25a91b6
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
...
...
csrc/aio/py_test/aio_bench_generate_param.py
View file @
c25a91b6
"""
Copyright 2021 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
import
os
...
...
@@ -14,13 +15,10 @@ from perf_sweep_utils import BENCH_LOG_DIR, READ_LOG_DIR, WRITE_LOG_DIR
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Folder of performance sweep logs. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Folder of performance sweep logs. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
args
=
parser
.
parse_args
()
print
(
f
'args =
{
args
}
'
)
...
...
@@ -75,9 +73,7 @@ def generate_aio_param(read_log_dir, write_log_dir):
optimal_config_read
=
read_results
.
get
(
read_perf_keys
[
optimal_key
],
None
)
optimal_config_write
=
write_results
.
get
(
write_perf_keys
[
optimal_key
],
None
)
print
(
f
'Best performance (GB/sec): read =
{
optimal_config_read
:
5.2
f
}
, write =
{
optimal_config_write
:
5.2
f
}
'
)
print
(
f
'Best performance (GB/sec): read =
{
optimal_config_read
:
5.2
f
}
, write =
{
optimal_config_write
:
5.2
f
}
'
)
print
(
json
.
dumps
(
aio_param
,
indent
=
3
))
...
...
csrc/aio/py_test/aio_bench_perf_sweep.py
View file @
c25a91b6
"""
Copyright 2021 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
import
os
...
...
@@ -20,20 +21,16 @@ from deepspeed.ops.op_builder import AsyncIOBuilder
OTHER_OPTIONS
=
'--handle'
PERF_SCRIPT
=
'test_ds_aio.py'
DEFAULT_SWEEP_CONFIG
=
{
"block_size"
:
[
"128K"
,
"256K"
],
"queue_depth"
:
[
4
,
16
,
32
],
"overlap_events"
:
[
True
,
False
],
"io_parallel"
:
[
2
,
8
],
"block_size"
:
[
"128K"
,
"256K"
],
"queue_depth"
:
[
4
,
16
,
32
],
"overlap_events"
:
[
True
,
False
],
"io_parallel"
:
[
2
,
8
],
"single_submit"
:
[
False
]
}
class
Job
(
object
):
def
__init__
(
self
,
cmd_line
,
output_file
=
None
,
work_dir
=
None
):
self
.
cmd_line
=
cmd_line
self
.
output_file
=
output_file
...
...
@@ -63,6 +60,7 @@ class Job(object):
class
SweepConfig
(
object
):
def
__init__
(
self
,
args
):
self
.
nvme_dir
=
args
.
nvme_dir
self
.
io_size
=
args
.
io_size
...
...
@@ -78,52 +76,35 @@ class SweepConfig(object):
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--nvme_dir'
,
required
=
True
,
type
=
str
,
help
=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
parser
.
add_argument
(
'--sweep_config'
,
parser
.
add_argument
(
'--nvme_dir'
,
required
=
True
,
type
=
str
,
default
=
None
,
help
=
'Performance sweep configuration json file.'
)
help
=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
parser
.
add_argument
(
'--no_read'
,
action
=
'store_true'
,
help
=
'Disable read performance measurements.'
)
parser
.
add_argument
(
'--sweep_config'
,
type
=
str
,
default
=
None
,
help
=
'Performance sweep configuration json file.'
)
parser
.
add_argument
(
'--no_write'
,
action
=
'store_true'
,
help
=
'Disable write performance measurements.'
)
parser
.
add_argument
(
'--no_read'
,
action
=
'store_true'
,
help
=
'Disable read performance measurements.'
)
parser
.
add_argument
(
'--io_size'
,
type
=
str
,
default
=
"400M"
,
help
=
'Number of I/O bytes to read/write for performance measurements.'
)
parser
.
add_argument
(
'--no_write'
,
action
=
'store_true'
,
help
=
'Disable write performance measurements.'
)
parser
.
add_argument
(
'--io_size'
,
type
=
str
,
default
=
"400M"
,
help
=
'Number of I/O bytes to read/write for performance measurements.'
)
parser
.
add_argument
(
'--no_sudo'
,
action
=
'store_true'
,
help
=
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.'
)
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.'
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
BENCH_LOG_DIR
,
help
=
f
'Output directory for performance log files. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
help
=
f
'Output directory for performance log files. Default is
{
os
.
path
.
join
(
"."
,
BENCH_LOG_DIR
)
}
'
)
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
parser
.
add_argument
(
'--loops'
,
type
=
int
,
default
=
1
,
help
=
'Count of operation repetitions'
)
args
=
parser
.
parse_args
()
print
(
f
'args =
{
args
}
'
)
...
...
@@ -147,6 +128,7 @@ def get_sweep_config_dict(sweep_config_json):
def
get_sweep_cmd_lines
(
sweep_config_dict
):
def
flatten_options
(
key
,
value_list
):
flat_list
=
[]
for
v
in
value_list
:
...
...
@@ -170,11 +152,7 @@ def run_job(job):
args
=
' '
.
join
(
job
.
cmd
())
print
(
f
'args =
{
args
}
'
)
job
.
open_output_file
()
proc
=
subprocess
.
run
(
args
=
args
,
shell
=
True
,
stdout
=
job
.
get_stdout
(),
stderr
=
job
.
get_stderr
(),
cwd
=
job
.
get_cwd
())
proc
=
subprocess
.
run
(
args
=
args
,
shell
=
True
,
stdout
=
job
.
get_stdout
(),
stderr
=
job
.
get_stderr
(),
cwd
=
job
.
get_cwd
())
job
.
close_output_file
()
assert
proc
.
returncode
==
0
,
\
f
"This command failed:
{
job
.
cmd
()
}
"
...
...
@@ -240,14 +218,7 @@ def get_log_file(io_op_desc, cmd_line):
return
tag_key
return
f
'
{
tag_key
}{
value
}
'
tag_list
=
[
SINGLE_SUBMIT
,
OVERLAP_EVENTS
,
THREAD_COUNT
,
IO_PARALLEL
,
QUEUE_DEPTH
,
BLOCK_SIZE
]
tag_list
=
[
SINGLE_SUBMIT
,
OVERLAP_EVENTS
,
THREAD_COUNT
,
IO_PARALLEL
,
QUEUE_DEPTH
,
BLOCK_SIZE
]
log_tags
=
[
io_op_desc
]
cmd_tags
=
create_cmd_tags
(
cmd_line
)
for
tag
in
tag_list
:
...
...
@@ -298,16 +269,10 @@ def create_read_file(sweep_config):
os
.
makedirs
(
read_folder
,
exist_ok
=
True
)
read_file_name
=
os
.
path
.
join
(
read_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
block_size
,
block_count
=
get_block_size_and_count
(
refine_integer_value
(
sweep_config
.
io_size
))
dd_job
=
Job
(
cmd_line
=
[
f
'dd if=/dev/urandom of=
{
read_file_name
}
bs=
{
block_size
}
count=
{
block_count
}
'
])
print
(
f
'[Start] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
dd_job
=
Job
(
cmd_line
=
[
f
'dd if=/dev/urandom of=
{
read_file_name
}
bs=
{
block_size
}
count=
{
block_count
}
'
])
print
(
f
'[Start] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
run_job
(
dd_job
)
print
(
f
'[Done] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
print
(
f
'[Done] Create read file of
{
sweep_config
.
io_size
}
bytes by running
{
dd_job
.
cmd
()
}
....'
)
return
read_folder
,
read_file_name
...
...
@@ -319,20 +284,15 @@ def remove_folder(folder):
def
run_read_sweep
(
sweep_config
,
flush_cache_job
,
sync_job
,
cmd_lines
):
read_folder
,
read_file_name
=
create_read_file
(
sweep_config
)
read_option
=
f
'--read_file
{
read_file_name
}
'
read_cmd_lines
=
[[
f
'
{
read_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
read_cmd_lines
=
[[
f
'
{
read_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
#dump_cmd_lines(read_cmd_lines)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
READ_LOG_DIR
}
'
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
READ_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
read_cmd_lines
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
READ_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
read_cmd_lines
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
remove_folder
(
read_folder
)
...
...
@@ -342,20 +302,15 @@ def run_write_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines):
os
.
makedirs
(
write_folder
,
exist_ok
=
True
)
write_file_name
=
os
.
path
.
join
(
write_folder
,
f
'random_
{
sweep_config
.
io_size
}
B.pt'
)
write_option
=
f
'--write_size
{
sweep_config
.
io_size
}
--write_file
{
write_file_name
}
'
write_cmd_lines
=
[[
f
'
{
write_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
write_cmd_lines
=
[[
f
'
{
write_option
}
{
sweep_config
.
other_options
}
'
]
+
cmd
for
cmd
in
cmd_lines
]
#dump_cmd_lines(write_cmd_lines)
log_folder
=
os
.
path
.
join
(
sweep_config
.
log_dir
,
f
'
{
WRITE_LOG_DIR
}
'
)
os
.
makedirs
(
log_folder
,
exist_ok
=
True
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
WRITE_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
write_cmd_lines
)
perf_jobs
=
create_perf_jobs
(
io_op_desc
=
WRITE_OP_DESC
,
log_dir
=
log_folder
,
cmd_lines
=
write_cmd_lines
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
launch_sweep
(
sweep_jobs
=
perf_jobs
,
sync_job
=
sync_job
,
flush_cache_job
=
flush_cache_job
)
remove_folder
(
write_folder
)
...
...
@@ -376,10 +331,7 @@ def main():
cmd_lines
=
get_sweep_cmd_lines
(
sweep_config
.
search_space
)
if
sweep_config
.
flush_cache
:
flush_cache_job
=
Job
(
cmd_line
=
[
'sudo'
,
'bash -c'
,
"'echo 1 > /proc/sys/vm/drop_caches'"
])
flush_cache_job
=
Job
(
cmd_line
=
[
'sudo'
,
'bash -c'
,
"'echo 1 > /proc/sys/vm/drop_caches'"
])
else
:
flush_cache_job
=
None
...
...
csrc/aio/py_test/ds_aio_basic.py
View file @
c25a91b6
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
@@ -20,14 +21,8 @@ def pre_basic(args, tid, read_op):
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
ctxt
=
{}
ctxt
[
'file'
]
=
file
...
...
@@ -60,13 +55,8 @@ def post_basic(pool_params):
def
main_basic_read
(
pool_params
):
args
,
tid
,
ctxt
=
pool_params
start_time
=
time
.
time
()
AsyncIOBuilder
().
load
().
aio_read
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
AsyncIOBuilder
().
load
().
aio_read
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
end_time
=
time
.
time
()
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
...
...
@@ -76,13 +66,8 @@ def main_basic_read(pool_params):
def
main_basic_write
(
pool_params
):
args
,
tid
,
ctxt
=
pool_params
start_time
=
time
.
time
()
AsyncIOBuilder
().
load
().
aio_write
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
AsyncIOBuilder
().
load
().
aio_write
(
ctxt
[
'buffer'
],
ctxt
[
'file'
],
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
args
.
validate
)
end_time
=
time
.
time
()
ctxt
[
'elapsed_sec'
]
+=
end_time
-
start_time
...
...
csrc/aio/py_test/ds_aio_handle.py
View file @
c25a91b6
"""
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
"""
...
...
@@ -20,27 +21,17 @@ def pre_handle(args, tid, read_op):
file
=
args
.
read_file
if
read_op
else
f
'
{
args
.
write_file
}
.
{
tid
}
'
io_parallel
=
args
.
io_parallel
if
args
.
io_parallel
else
1
handle
=
AsyncIOBuilder
().
load
().
aio_handle
(
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
io_parallel
)
handle
=
AsyncIOBuilder
().
load
().
aio_handle
(
args
.
block_size
,
args
.
queue_depth
,
args
.
single_submit
,
args
.
overlap_events
,
io_parallel
)
task_log
(
tid
,
f
'Created deepspeed aio handle'
)
if
args
.
gpu
:
buffer
=
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
get_accelerator
().
device_name
())
buffer
=
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
get_accelerator
().
device_name
())
else
:
if
args
.
use_accelerator_pin_memory
:
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
buffer
=
get_accelerator
().
pin_memory
(
torch
.
empty
(
num_bytes
,
dtype
=
torch
.
uint8
,
device
=
'cpu'
))
else
:
buffer
=
handle
.
new_cpu_locked_tensor
(
num_bytes
,
torch
.
empty
(
0
,
dtype
=
torch
.
uint8
))
buffer
=
handle
.
new_cpu_locked_tensor
(
num_bytes
,
torch
.
empty
(
0
,
dtype
=
torch
.
uint8
))
task_log
(
tid
,
f
'Allocate tensor of size
{
num_bytes
}
bytes'
)
...
...
@@ -51,10 +42,7 @@ def pre_handle(args, tid, read_op):
ctxt
[
'buffer'
]
=
buffer
ctxt
[
'elapsed_sec'
]
=
0
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
task_log
(
tid
,
f
'
{
io_string
}
file
{
file
}
of size
{
num_bytes
}
bytes from buffer on device
{
buffer
.
device
}
'
)
return
ctxt
...
...
Prev
1
2
3
4
5
6
7
8
…
36
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment