Commit c25a91b6 authored by aiss's avatar aiss
Browse files

Merge branch 'ds-v0.9.2-rocm' into 'main'

Ds v0.9.2 rocm

See merge request dcutoolkit/deeplearing/deepspeed!2
parents d1596c94 af82b300
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2023 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory. Functionality for managing CPU tensors occupying page-locked memory.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2023 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for managing CPU tensors occupying page-locked memory. Functionality for managing CPU tensors occupying page-locked memory.
TODO: Implement a full-featured manager that TODO: Implement a full-featured manager that
1. Avoid page-locked memory leaks 1. Avoid page-locked memory leaks
2. Minimize page-locked memory usage by reducing internal fragmentation 2. Minimize page-locked memory usage by reducing internal fragmentation
Functionality for managing CPU tensors occupying page-locked memory.
*/ */
#include <map> #include <map>
......
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/* /*
Copyright 2020 The Microsoft DeepSpeed Team Copyright 2020 The Microsoft DeepSpeed Team
......
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/* /*
Copyright 2020 The Microsoft DeepSpeed Team Copyright 2020 The Microsoft DeepSpeed Team
......
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/* /*
Copyright 2020 The Microsoft DeepSpeed Team Copyright 2020 The Microsoft DeepSpeed Team
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/* /*
Copyright 2020 The Microsoft DeepSpeed Team Copyright 2020 The Microsoft DeepSpeed Team
......
/* // Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team // SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices. Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/ */
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2021 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
import os import os
...@@ -14,13 +15,10 @@ from perf_sweep_utils import BENCH_LOG_DIR, READ_LOG_DIR, WRITE_LOG_DIR ...@@ -14,13 +15,10 @@ from perf_sweep_utils import BENCH_LOG_DIR, READ_LOG_DIR, WRITE_LOG_DIR
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument('--log_dir',
'--log_dir', type=str,
type=str, default=BENCH_LOG_DIR,
default=BENCH_LOG_DIR, help=f'Folder of performance sweep logs. Default is {os.path.join(".", BENCH_LOG_DIR)}')
help=
f'Folder of performance sweep logs. Default is {os.path.join(".", BENCH_LOG_DIR)}'
)
args = parser.parse_args() args = parser.parse_args()
print(f'args = {args}') print(f'args = {args}')
...@@ -75,9 +73,7 @@ def generate_aio_param(read_log_dir, write_log_dir): ...@@ -75,9 +73,7 @@ def generate_aio_param(read_log_dir, write_log_dir):
optimal_config_read = read_results.get(read_perf_keys[optimal_key], None) optimal_config_read = read_results.get(read_perf_keys[optimal_key], None)
optimal_config_write = write_results.get(write_perf_keys[optimal_key], None) optimal_config_write = write_results.get(write_perf_keys[optimal_key], None)
print( print(f'Best performance (GB/sec): read = {optimal_config_read:5.2f}, write = {optimal_config_write:5.2f}')
f'Best performance (GB/sec): read = {optimal_config_read:5.2f}, write = {optimal_config_write:5.2f}'
)
print(json.dumps(aio_param, indent=3)) print(json.dumps(aio_param, indent=3))
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2021 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
import os import os
...@@ -20,20 +21,16 @@ from deepspeed.ops.op_builder import AsyncIOBuilder ...@@ -20,20 +21,16 @@ from deepspeed.ops.op_builder import AsyncIOBuilder
OTHER_OPTIONS = '--handle' OTHER_OPTIONS = '--handle'
PERF_SCRIPT = 'test_ds_aio.py' PERF_SCRIPT = 'test_ds_aio.py'
DEFAULT_SWEEP_CONFIG = { DEFAULT_SWEEP_CONFIG = {
"block_size": ["128K", "block_size": ["128K", "256K"],
"256K"], "queue_depth": [4, 16, 32],
"queue_depth": [4, "overlap_events": [True, False],
16, "io_parallel": [2, 8],
32],
"overlap_events": [True,
False],
"io_parallel": [2,
8],
"single_submit": [False] "single_submit": [False]
} }
class Job(object): class Job(object):
def __init__(self, cmd_line, output_file=None, work_dir=None): def __init__(self, cmd_line, output_file=None, work_dir=None):
self.cmd_line = cmd_line self.cmd_line = cmd_line
self.output_file = output_file self.output_file = output_file
...@@ -63,6 +60,7 @@ class Job(object): ...@@ -63,6 +60,7 @@ class Job(object):
class SweepConfig(object): class SweepConfig(object):
def __init__(self, args): def __init__(self, args):
self.nvme_dir = args.nvme_dir self.nvme_dir = args.nvme_dir
self.io_size = args.io_size self.io_size = args.io_size
...@@ -78,52 +76,35 @@ class SweepConfig(object): ...@@ -78,52 +76,35 @@ class SweepConfig(object):
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument('--nvme_dir',
'--nvme_dir', required=True,
required=True,
type=str,
help=
'Directory in which to perform I/O tests. A writeable directory on a NVMe device.'
)
parser.add_argument('--sweep_config',
type=str, type=str,
default=None, help='Directory in which to perform I/O tests. A writeable directory on a NVMe device.')
help='Performance sweep configuration json file.')
parser.add_argument('--no_read', parser.add_argument('--sweep_config', type=str, default=None, help='Performance sweep configuration json file.')
action='store_true',
help='Disable read performance measurements.')
parser.add_argument('--no_write', parser.add_argument('--no_read', action='store_true', help='Disable read performance measurements.')
action='store_true',
help='Disable write performance measurements.')
parser.add_argument( parser.add_argument('--no_write', action='store_true', help='Disable write performance measurements.')
'--io_size',
type=str, parser.add_argument('--io_size',
default="400M", type=str,
help='Number of I/O bytes to read/write for performance measurements.') default="400M",
help='Number of I/O bytes to read/write for performance measurements.')
parser.add_argument( parser.add_argument(
'--no_sudo', '--no_sudo',
action='store_true', action='store_true',
help= help=
'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.' 'Run without sudo access. Page cache will not be flushed and reported read speeds may be higher than actual.')
)
parser.add_argument( parser.add_argument(
'--log_dir', '--log_dir',
type=str, type=str,
default=BENCH_LOG_DIR, default=BENCH_LOG_DIR,
help= help=f'Output directory for performance log files. Default is {os.path.join(".", BENCH_LOG_DIR)}')
f'Output directory for performance log files. Default is {os.path.join(".", BENCH_LOG_DIR)}'
)
parser.add_argument('--loops', parser.add_argument('--loops', type=int, default=1, help='Count of operation repetitions')
type=int,
default=1,
help='Count of operation repetitions')
args = parser.parse_args() args = parser.parse_args()
print(f'args = {args}') print(f'args = {args}')
...@@ -147,6 +128,7 @@ def get_sweep_config_dict(sweep_config_json): ...@@ -147,6 +128,7 @@ def get_sweep_config_dict(sweep_config_json):
def get_sweep_cmd_lines(sweep_config_dict): def get_sweep_cmd_lines(sweep_config_dict):
def flatten_options(key, value_list): def flatten_options(key, value_list):
flat_list = [] flat_list = []
for v in value_list: for v in value_list:
...@@ -170,11 +152,7 @@ def run_job(job): ...@@ -170,11 +152,7 @@ def run_job(job):
args = ' '.join(job.cmd()) args = ' '.join(job.cmd())
print(f'args = {args}') print(f'args = {args}')
job.open_output_file() job.open_output_file()
proc = subprocess.run(args=args, proc = subprocess.run(args=args, shell=True, stdout=job.get_stdout(), stderr=job.get_stderr(), cwd=job.get_cwd())
shell=True,
stdout=job.get_stdout(),
stderr=job.get_stderr(),
cwd=job.get_cwd())
job.close_output_file() job.close_output_file()
assert proc.returncode == 0, \ assert proc.returncode == 0, \
f"This command failed: {job.cmd()}" f"This command failed: {job.cmd()}"
...@@ -240,14 +218,7 @@ def get_log_file(io_op_desc, cmd_line): ...@@ -240,14 +218,7 @@ def get_log_file(io_op_desc, cmd_line):
return tag_key return tag_key
return f'{tag_key}{value}' return f'{tag_key}{value}'
tag_list = [ tag_list = [SINGLE_SUBMIT, OVERLAP_EVENTS, THREAD_COUNT, IO_PARALLEL, QUEUE_DEPTH, BLOCK_SIZE]
SINGLE_SUBMIT,
OVERLAP_EVENTS,
THREAD_COUNT,
IO_PARALLEL,
QUEUE_DEPTH,
BLOCK_SIZE
]
log_tags = [io_op_desc] log_tags = [io_op_desc]
cmd_tags = create_cmd_tags(cmd_line) cmd_tags = create_cmd_tags(cmd_line)
for tag in tag_list: for tag in tag_list:
...@@ -298,16 +269,10 @@ def create_read_file(sweep_config): ...@@ -298,16 +269,10 @@ def create_read_file(sweep_config):
os.makedirs(read_folder, exist_ok=True) os.makedirs(read_folder, exist_ok=True)
read_file_name = os.path.join(read_folder, f'random_{sweep_config.io_size}B.pt') read_file_name = os.path.join(read_folder, f'random_{sweep_config.io_size}B.pt')
block_size, block_count = get_block_size_and_count(refine_integer_value(sweep_config.io_size)) block_size, block_count = get_block_size_and_count(refine_integer_value(sweep_config.io_size))
dd_job = Job(cmd_line=[ dd_job = Job(cmd_line=[f'dd if=/dev/urandom of={read_file_name} bs={block_size} count={block_count}'])
f'dd if=/dev/urandom of={read_file_name} bs={block_size} count={block_count}' print(f'[Start] Create read file of {sweep_config.io_size} bytes by running {dd_job.cmd()} ....')
])
print(
f'[Start] Create read file of {sweep_config.io_size} bytes by running {dd_job.cmd()} ....'
)
run_job(dd_job) run_job(dd_job)
print( print(f'[Done] Create read file of {sweep_config.io_size} bytes by running {dd_job.cmd()} ....')
f'[Done] Create read file of {sweep_config.io_size} bytes by running {dd_job.cmd()} ....'
)
return read_folder, read_file_name return read_folder, read_file_name
...@@ -319,20 +284,15 @@ def remove_folder(folder): ...@@ -319,20 +284,15 @@ def remove_folder(folder):
def run_read_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines): def run_read_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines):
read_folder, read_file_name = create_read_file(sweep_config) read_folder, read_file_name = create_read_file(sweep_config)
read_option = f'--read_file {read_file_name}' read_option = f'--read_file {read_file_name}'
read_cmd_lines = [[f'{read_option} {sweep_config.other_options}'] + cmd read_cmd_lines = [[f'{read_option} {sweep_config.other_options}'] + cmd for cmd in cmd_lines]
for cmd in cmd_lines]
#dump_cmd_lines(read_cmd_lines) #dump_cmd_lines(read_cmd_lines)
log_folder = os.path.join(sweep_config.log_dir, f'{READ_LOG_DIR}') log_folder = os.path.join(sweep_config.log_dir, f'{READ_LOG_DIR}')
os.makedirs(log_folder, exist_ok=True) os.makedirs(log_folder, exist_ok=True)
perf_jobs = create_perf_jobs(io_op_desc=READ_OP_DESC, perf_jobs = create_perf_jobs(io_op_desc=READ_OP_DESC, log_dir=log_folder, cmd_lines=read_cmd_lines)
log_dir=log_folder,
cmd_lines=read_cmd_lines)
launch_sweep(sweep_jobs=perf_jobs, launch_sweep(sweep_jobs=perf_jobs, sync_job=sync_job, flush_cache_job=flush_cache_job)
sync_job=sync_job,
flush_cache_job=flush_cache_job)
remove_folder(read_folder) remove_folder(read_folder)
...@@ -342,20 +302,15 @@ def run_write_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines): ...@@ -342,20 +302,15 @@ def run_write_sweep(sweep_config, flush_cache_job, sync_job, cmd_lines):
os.makedirs(write_folder, exist_ok=True) os.makedirs(write_folder, exist_ok=True)
write_file_name = os.path.join(write_folder, f'random_{sweep_config.io_size}B.pt') write_file_name = os.path.join(write_folder, f'random_{sweep_config.io_size}B.pt')
write_option = f'--write_size {sweep_config.io_size} --write_file {write_file_name}' write_option = f'--write_size {sweep_config.io_size} --write_file {write_file_name}'
write_cmd_lines = [[f'{write_option} {sweep_config.other_options}'] + cmd write_cmd_lines = [[f'{write_option} {sweep_config.other_options}'] + cmd for cmd in cmd_lines]
for cmd in cmd_lines]
#dump_cmd_lines(write_cmd_lines) #dump_cmd_lines(write_cmd_lines)
log_folder = os.path.join(sweep_config.log_dir, f'{WRITE_LOG_DIR}') log_folder = os.path.join(sweep_config.log_dir, f'{WRITE_LOG_DIR}')
os.makedirs(log_folder, exist_ok=True) os.makedirs(log_folder, exist_ok=True)
perf_jobs = create_perf_jobs(io_op_desc=WRITE_OP_DESC, perf_jobs = create_perf_jobs(io_op_desc=WRITE_OP_DESC, log_dir=log_folder, cmd_lines=write_cmd_lines)
log_dir=log_folder,
cmd_lines=write_cmd_lines)
launch_sweep(sweep_jobs=perf_jobs, launch_sweep(sweep_jobs=perf_jobs, sync_job=sync_job, flush_cache_job=flush_cache_job)
sync_job=sync_job,
flush_cache_job=flush_cache_job)
remove_folder(write_folder) remove_folder(write_folder)
...@@ -376,10 +331,7 @@ def main(): ...@@ -376,10 +331,7 @@ def main():
cmd_lines = get_sweep_cmd_lines(sweep_config.search_space) cmd_lines = get_sweep_cmd_lines(sweep_config.search_space)
if sweep_config.flush_cache: if sweep_config.flush_cache:
flush_cache_job = Job( flush_cache_job = Job(cmd_line=['sudo', 'bash -c', "'echo 1 > /proc/sys/vm/drop_caches'"])
cmd_line=['sudo',
'bash -c',
"'echo 1 > /proc/sys/vm/drop_caches'"])
else: else:
flush_cache_job = None flush_cache_job = None
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
...@@ -20,14 +21,8 @@ def pre_basic(args, tid, read_op): ...@@ -20,14 +21,8 @@ def pre_basic(args, tid, read_op):
file = args.read_file if read_op else f'{args.write_file}.{tid}' file = args.read_file if read_op else f'{args.write_file}.{tid}'
task_log(tid, f'Allocate tensor of size {num_bytes} bytes') task_log(tid, f'Allocate tensor of size {num_bytes} bytes')
buffer = get_accelerator().pin_memory( buffer = get_accelerator().pin_memory(torch.empty(num_bytes, dtype=torch.uint8, device='cpu'))
torch.empty(num_bytes, task_log(tid, f'{io_string} file {file} of size {num_bytes} bytes from buffer on device {buffer.device}')
dtype=torch.uint8,
device='cpu'))
task_log(
tid,
f'{io_string} file {file} of size {num_bytes} bytes from buffer on device {buffer.device}'
)
ctxt = {} ctxt = {}
ctxt['file'] = file ctxt['file'] = file
...@@ -60,13 +55,8 @@ def post_basic(pool_params): ...@@ -60,13 +55,8 @@ def post_basic(pool_params):
def main_basic_read(pool_params): def main_basic_read(pool_params):
args, tid, ctxt = pool_params args, tid, ctxt = pool_params
start_time = time.time() start_time = time.time()
AsyncIOBuilder().load().aio_read(ctxt['buffer'], AsyncIOBuilder().load().aio_read(ctxt['buffer'], ctxt['file'], args.block_size, args.queue_depth,
ctxt['file'], args.single_submit, args.overlap_events, args.validate)
args.block_size,
args.queue_depth,
args.single_submit,
args.overlap_events,
args.validate)
end_time = time.time() end_time = time.time()
ctxt['elapsed_sec'] += end_time - start_time ctxt['elapsed_sec'] += end_time - start_time
...@@ -76,13 +66,8 @@ def main_basic_read(pool_params): ...@@ -76,13 +66,8 @@ def main_basic_read(pool_params):
def main_basic_write(pool_params): def main_basic_write(pool_params):
args, tid, ctxt = pool_params args, tid, ctxt = pool_params
start_time = time.time() start_time = time.time()
AsyncIOBuilder().load().aio_write(ctxt['buffer'], AsyncIOBuilder().load().aio_write(ctxt['buffer'], ctxt['file'], args.block_size, args.queue_depth,
ctxt['file'], args.single_submit, args.overlap_events, args.validate)
args.block_size,
args.queue_depth,
args.single_submit,
args.overlap_events,
args.validate)
end_time = time.time() end_time = time.time()
ctxt['elapsed_sec'] += end_time - start_time ctxt['elapsed_sec'] += end_time - start_time
......
""" # Copyright (c) Microsoft Corporation.
Copyright 2020 The Microsoft DeepSpeed Team # SPDX-License-Identifier: Apache-2.0
Licensed under the MIT license.
# DeepSpeed Team
"""
Functionality of swapping optimizer tensors to/from (NVMe) storage devices. Functionality of swapping optimizer tensors to/from (NVMe) storage devices.
""" """
...@@ -20,27 +21,17 @@ def pre_handle(args, tid, read_op): ...@@ -20,27 +21,17 @@ def pre_handle(args, tid, read_op):
file = args.read_file if read_op else f'{args.write_file}.{tid}' file = args.read_file if read_op else f'{args.write_file}.{tid}'
io_parallel = args.io_parallel if args.io_parallel else 1 io_parallel = args.io_parallel if args.io_parallel else 1
handle = AsyncIOBuilder().load().aio_handle(args.block_size, handle = AsyncIOBuilder().load().aio_handle(args.block_size, args.queue_depth, args.single_submit,
args.queue_depth, args.overlap_events, io_parallel)
args.single_submit,
args.overlap_events,
io_parallel)
task_log(tid, f'Created deepspeed aio handle') task_log(tid, f'Created deepspeed aio handle')
if args.gpu: if args.gpu:
buffer = torch.empty(num_bytes, buffer = torch.empty(num_bytes, dtype=torch.uint8, device=get_accelerator().device_name())
dtype=torch.uint8,
device=get_accelerator().device_name())
else: else:
if args.use_accelerator_pin_memory: if args.use_accelerator_pin_memory:
buffer = get_accelerator().pin_memory( buffer = get_accelerator().pin_memory(torch.empty(num_bytes, dtype=torch.uint8, device='cpu'))
torch.empty(num_bytes,
dtype=torch.uint8,
device='cpu'))
else: else:
buffer = handle.new_cpu_locked_tensor(num_bytes, buffer = handle.new_cpu_locked_tensor(num_bytes, torch.empty(0, dtype=torch.uint8))
torch.empty(0,
dtype=torch.uint8))
task_log(tid, f'Allocate tensor of size {num_bytes} bytes') task_log(tid, f'Allocate tensor of size {num_bytes} bytes')
...@@ -51,10 +42,7 @@ def pre_handle(args, tid, read_op): ...@@ -51,10 +42,7 @@ def pre_handle(args, tid, read_op):
ctxt['buffer'] = buffer ctxt['buffer'] = buffer
ctxt['elapsed_sec'] = 0 ctxt['elapsed_sec'] = 0
task_log( task_log(tid, f'{io_string} file {file} of size {num_bytes} bytes from buffer on device {buffer.device}')
tid,
f'{io_string} file {file} of size {num_bytes} bytes from buffer on device {buffer.device}'
)
return ctxt return ctxt
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment