Commit 4947639c authored by Jun Liu's avatar Jun Liu
Browse files

Merge branch 'amd-develop' into amd-master

parents 17cf8179 d39c3f5d
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "ck_tile/core/config.hpp" #include "ck_tile/core/config.hpp"
#include "ck_tile/core/arch/arch.hpp" #include "ck_tile/core/arch/arch.hpp"
#include "ck_tile/core/arch/amd_buffer_addressing.hpp" #include "ck_tile/core/arch/amd_buffer_addressing.hpp"
#include "ck_tile/core/arch/generic_memory_space_atomic.hpp"
#include "ck_tile/core/container/array.hpp" #include "ck_tile/core/container/array.hpp"
#include "ck_tile/core/numeric/integer.hpp" #include "ck_tile/core/numeric/integer.hpp"
#include "ck_tile/core/numeric/integral_constant.hpp" #include "ck_tile/core/numeric/integral_constant.hpp"
...@@ -507,10 +508,10 @@ struct buffer_view<address_space_enum::global, ...@@ -507,10 +508,10 @@ struct buffer_view<address_space_enum::global,
bool constexpr use_amd_buffer_addressing = false; bool constexpr use_amd_buffer_addressing = false;
#endif #endif
if constexpr(use_amd_buffer_addressing)
{
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
if constexpr(use_amd_buffer_addressing)
{
amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x>( amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x>(
x, p_data_, i, is_valid_element, buffer_size_); x, p_data_, i, is_valid_element, buffer_size_);
} }
...@@ -518,7 +519,7 @@ struct buffer_view<address_space_enum::global, ...@@ -518,7 +519,7 @@ struct buffer_view<address_space_enum::global,
{ {
if(is_valid_element) if(is_valid_element)
{ {
atomic_add<X>(c_style_pointer_cast<X*>(&p_data_[i]), x); atomic_add_g<remove_cvref_t<T>, t_per_x>(&p_data_[i], x);
} }
} }
} }
...@@ -547,16 +548,16 @@ struct buffer_view<address_space_enum::global, ...@@ -547,16 +548,16 @@ struct buffer_view<address_space_enum::global,
bool constexpr use_amd_buffer_addressing = false; bool constexpr use_amd_buffer_addressing = false;
#endif #endif
if constexpr(use_amd_buffer_addressing)
{
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
if constexpr(use_amd_buffer_addressing)
{
amd_buffer_atomic_max<remove_cvref_t<T>, t_per_x>( amd_buffer_atomic_max<remove_cvref_t<T>, t_per_x>(
x, p_data_, i, is_valid_element, buffer_size_); x, p_data_, i, is_valid_element, buffer_size_);
} }
else if(is_valid_element) else if(is_valid_element)
{ {
atomic_max<X>(c_style_pointer_cast<X*>(&p_data_[i]), x); atomic_max_g<remove_cvref_t<T>, t_per_x>(&p_data_[i], x);
} }
} }
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment