Commit c25a91b6 authored by aiss's avatar aiss
Browse files

Merge branch 'ds-v0.9.2-rocm' into 'main'

Ds v0.9.2 rocm

See merge request dcutoolkit/deeplearing/deepspeed!2
parents d1596c94 af82b300
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#ifdef __HIPCC__
#include "cpu_adagrad_hip.h"
#else
#include "cpu_adagrad.h"
#endif
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#include "cpu_adagrad.h"
#include <torch/extension.h>
#include <iostream>
#include <memory>
......@@ -178,7 +178,7 @@ int ds_adagrad_step(int optimizer_id,
std::static_pointer_cast<Adagrad_Optimizer>(s_optimizers[optimizer_id]);
opt->IncrementStep(step);
opt->update_state(lr, epsilon, weight_decay);
opt->Step_8(params_ptr, grads_ptr, exp_avg_sq_ptr, params_c.size(0));
opt->Step_8(params_ptr, grads_ptr, exp_avg_sq_ptr, params_c.numel());
#if defined(__ENABLE_CUDA__)
opt->SynchronizeStreams();
......@@ -214,7 +214,7 @@ int ds_adagrad_step_plus_copy(int optimizer_id,
opt->Step_8(params_ptr,
grads_ptr,
exp_avg_sq_ptr,
params_c.size(0),
params_c.numel(),
gpu_params_ptr,
(params.options().dtype() == at::kHalf));
......
/* Copyright 2020 The Microsoft DeepSpeed Team
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/
#ifndef TORCH_CHECK
#define TORCH_CHECK AT_CHECK
#endif
#ifdef VERSION_GE_1_3
#define DATA_PTR data_ptr
#else
#define DATA_PTR data
#endif
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#include "cpu_adam.h"
#include <torch/extension.h>
#include <cassert>
......@@ -230,7 +235,7 @@ int ds_adam_step(int optimizer_id,
grads_ptr,
exp_avg_ptr,
exp_avg_sq_ptr,
params_c.size(0),
params_c.numel(),
nullptr,
(params.options().dtype() == at::kHalf));
......@@ -275,7 +280,7 @@ int ds_adam_step_plus_copy(int optimizer_id,
grads_ptr,
exp_avg_ptr,
exp_avg_sq_ptr,
params_c.size(0),
params_c.numel(),
gpu_params_ptr,
(params.options().dtype() == at::kHalf));
......
#include "custom_cuda_layers.h"
__global__ void param_update_kernel(const float* input, __half* output, int size)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if (id < size) { output[id] = (__half)input[id]; }
}
void launch_param_update(const float* input, __half* output, int size, cudaStream_t stream)
{
int threads = 1024;
dim3 grid_dim((size - 1) / threads + 1);
dim3 block_dim(threads);
param_update_kernel<<<grid_dim, block_dim, 0, stream>>>(input, output, size);
}
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#include <torch/extension.h>
void multi_tensor_adam_cuda(int chunk_size,
......
/* Copyright 2020 The Microsoft DeepSpeed Team
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/
#include <ATen/ATen.h>
......@@ -12,11 +17,7 @@
#include <assert.h>
#ifdef __HIPCC__
#include "multi_tensor_apply_hip.cuh"
#else
#include "multi_tensor_apply.cuh"
#endif
#include "type_shim.h"
#define BLOCK_SIZE 512
......
/* Copyright 2020 The Microsoft DeepSpeed Team
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright NVIDIA/apex
This file is adapted from fused adam in NVIDIA/apex, commit a109f85
*/
#include <ATen/ATen.h>
......
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
......@@ -262,7 +264,7 @@ void report_file_error(const char* filename, const std::string file_op, const in
int open_file(const char* filename, const bool read_op)
{
const int flags = read_op ? (O_RDONLY | __O_DIRECT) : (O_WRONLY | O_CREAT | __O_DIRECT);
const int flags = read_op ? (O_RDONLY | O_DIRECT) : (O_WRONLY | O_CREAT | O_DIRECT);
const int mode = 0600;
const auto fd = open(filename, flags, mode);
if (fd == -1) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment