Commit 1b4b640b authored by carlushuang's avatar carlushuang
Browse files

add permute example

parent 7971bb5b
# not using add_example_executable() to add this target, since we don't want this to have
# to be included in "make all/install/check"
add_executable(tile_example_permute EXCLUDE_FROM_ALL permute.cpp)
# target_compile_options(tile_example_permute PRIVATE -v --save-temps -Wno-gnu-line-marker)
# permute
This folder contains example for permute kernel, which is similiar to [torch.permute](https://pytorch.org/docs/stable/generated/torch.permute.html) (combined with [torch.contiguous](https://pytorch.org/docs/stable/generated/torch.Tensor.contiguous.html)). Currently we implement a generic permute kernel that support up to rank 8 arbitrary permutation with a single kernel instance. Performance is not the first consideration, we prefer a simple and general kernel implementation using `ck_tile` in this example.
```
args:
-v weather do CPU validation or not (default:1)
-prec data type. fp16/bf16/fp32 (default:fp16)
-shape the shape of the input tensor (default:2,3,4)
-perm permute perm (default:2,1,0)
```
## build
```
# in the root of ck_tile
mkdir build && cd build
sh ../script/cmake-ck-dev.sh ../ <arch> # you can replace this <arch> to gfx90a, gfx942...
make tile_example_permute -j
```
This will result in an executable `build/bin/tile_example_permute`
## some examples
```
# torch
x=torch.tensor.randn(2,3,4,6)
y=x.permute(0,3,2,1).contigous()
# ck_tile
./build/bin/tile_example_permute -shape=2,3,4,6 -perm=0,3,2,1
```
or you can try the smoke_test
```
# in the root of ck_tile, after you build this example
sh example/ck_tile/06_permute/script/smoke_test.sh
```
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "permute.hpp"
#include "ck_tile/host.hpp"
#include <array>
#include <cstring>
#include <functional>
#include <numeric>
#include <ostream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
namespace detail {
template <int bytes>
struct to_integer_type;
template <>
struct to_integer_type<4>
{
using type = int32_t;
};
template <>
struct to_integer_type<2>
{
using type = int16_t;
};
template <>
struct to_integer_type<1>
{
using type = int8_t;
};
} // namespace detail
template <int bytes>
using to_integer_type = typename detail::to_integer_type<bytes>::type;
// host API (shoule come from codegen)
float permute(permute_traits t, permute_args a, const ck_tile::stream_config& s)
{
if(t.data_type.compare("fp8") == 0)
{
using DataType = ck_tile::fp8_t;
using PipelineProblem = ck_tile::GenericPermuteProblem<DataType>;
using Kernel = ck_tile::GenericPermute<PipelineProblem>;
auto kargs = Kernel::MakeKargs(a);
const dim3 grids = Kernel::GridSize(a);
constexpr dim3 blocks = Kernel::BlockSize();
float ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<blocks.x, 1>(Kernel{}, grids, blocks, 0, kargs));
return ave_time;
}
else if(t.data_type.compare("fp16") == 0)
{
using DataType = ck_tile::half_t;
using PipelineProblem = ck_tile::GenericPermuteProblem<DataType>;
using Kernel = ck_tile::GenericPermute<PipelineProblem>;
auto kargs = Kernel::MakeKargs(a);
const dim3 grids = Kernel::GridSize(a);
constexpr dim3 blocks = Kernel::BlockSize();
float ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<blocks.x, 1>(Kernel{}, grids, blocks, 0, kargs));
return ave_time;
}
else if(t.data_type.compare("fp32") == 0)
{
using DataType = float;
using PipelineProblem = ck_tile::GenericPermuteProblem<DataType>;
using Kernel = ck_tile::GenericPermute<PipelineProblem>;
auto kargs = Kernel::MakeKargs(a);
const dim3 grids = Kernel::GridSize(a);
constexpr dim3 blocks = Kernel::BlockSize();
float ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<blocks.x, 1>(Kernel{}, grids, blocks, 0, kargs));
return ave_time;
}
return 0;
}
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
using size_type = typename std::vector<T>::size_type;
os << "[";
for(size_type idx = 0; idx < v.size(); ++idx)
{
if(0 < idx)
{
os << ", ";
}
os << v[idx];
}
return os << "]";
}
auto create_args(int argc, char* argv[])
{
ck_tile::ArgParser arg_parser;
arg_parser.insert("v", "1", "weather do CPU validation or not")
.insert("prec", "fp16", "data type. fp8/fp16/fp32 (representing 8/16/32 bit data)")
.insert("shape", "2,3,4", "the shape of the input tensor")
.insert("perm", "2,1,0", "permute perm")
.insert("kname", "0", "t to 1 will print kernel name")
.insert("seed",
"11939",
"random seed used for initializing input tensors. 0 for "
"non-deterministic seed")
.insert("warmup", "5", "number of iterations before benchmark the kernel")
.insert("repeat", "20", "number of iterations to benchmark the kernel");
bool result = arg_parser.parse(argc, argv);
return std::make_tuple(result, arg_parser);
}
// different threshold for different dtype
template <typename DataType>
auto get_elimit(std::string /*init_method*/)
{
double rtol = 1e-3;
double atol = 1e-3;
return ck_tile::make_tuple(rtol, atol);
}
template <>
auto get_elimit<ck_tile::bf16_t>(std::string /*init_method*/)
{
double rtol = 1e-2;
double atol = 1e-2;
return ck_tile::make_tuple(rtol, atol);
}
template <>
auto get_elimit<ck_tile::fp8_t>(std::string init_method)
{
if(init_method == "ui" || init_method == "ni")
{
unsigned max_rounding_point_distance = 0;
double atol = 2e-3;
return ck_tile::make_tuple(max_rounding_point_distance, atol);
}
else
{
unsigned max_rounding_point_distance = 1;
double atol = 0.0625;
return ck_tile::make_tuple(max_rounding_point_distance, atol);
}
}
// "1,2,3,4" -> vector{1,2,3,4}
std::vector<ck_tile::index_t> decode_vec(std::string q_val)
{
#define _S2I_(str_) static_cast<ck_tile::index_t>(std::atoi((str_).c_str()))
std::string::size_type pos = 0;
std::vector<ck_tile::index_t> v;
while(true)
{
auto found = q_val.find(',', pos);
ck_tile::index_t n =
_S2I_(q_val.substr(pos, found == std::string::npos ? found : found - pos));
v.push_back(n);
if(found == std::string::npos)
{
break;
}
pos = found + 1;
}
return v;
#undef _S2I_
}
template <typename DataType>
bool run(const ck_tile::ArgParser& arg_parser)
{
std::string data_type = arg_parser.get_str("prec");
int do_validation = arg_parser.get_int("v");
auto x_shape = decode_vec(arg_parser.get_str("shape"));
auto perm = decode_vec(arg_parser.get_str("perm"));
int stream_warmup = arg_parser.get_int("warmup");
int stream_repeat = arg_parser.get_int("repeat");
bool kname = arg_parser.get_bool("kname");
int seed = arg_parser.get_int("seed");
assert(shape.size() == perm.size());
ck_tile::index_t rank = perm.size();
if(rank > ck_tile::GenericPermuteHostArgs::kMaxRanks)
{
printf("rank %d permute is not support yet\n", rank);
return false;
}
ck_tile::HostTensor<DataType> x(x_shape);
ck_tile::FillUniformDistributionIntegerValue<DataType>{-15, 15, seed}(x);
std::vector<ck_tile::index_t> y_shape = [&]() {
std::vector<ck_tile::index_t> tmp(rank, 0);
// std::cout << "@@@@" << tmp << std::endl;
for(int i = 0; i < static_cast<int>(rank); i++)
{
// std::cout << " i:" << i << ", perm:" << perm[i] << ", rak:" <<
// static_cast<int>(rank)
// << std::endl;
tmp[i] = x_shape[perm[i]];
}
// std::cout << "@@@" << tmp << std::endl;
return tmp;
}();
ck_tile::HostTensor<DataType> y(y_shape);
ck_tile::DeviceMem x_buf(x.get_element_space_size_in_bytes());
ck_tile::DeviceMem y_buf(y.get_element_space_size_in_bytes());
x_buf.ToDevice(x.data());
permute_args args;
args.p_src = x_buf.GetDeviceBuffer();
args.p_dst = y_buf.GetDeviceBuffer();
args.rank = rank;
std::copy(x_shape.begin(), x_shape.end(), args.shape);
std::copy(perm.begin(), perm.end(), args.perm);
permute_traits trait;
trait.data_type = data_type;
std::cout << "[" << data_type << "] shape:" << x_shape << "->" << y_shape
<< ", permute:" << perm << std::flush;
ck_tile::stream_config stream_config{nullptr,
true,
/* log_level = */ (kname ? 1 : 0),
stream_warmup,
stream_repeat};
float ave_time = permute(trait, args, stream_config);
std::cout << ", time:" << ave_time << "ms" << std::flush;
bool pass = true;
if(do_validation)
{
reference_permute(x, y, perm);
#if 0
if constexpr (std::is_same_v<float, DataType>){
// using itype = to_integer_type<sizeof(DataType)>;
fflush(stdout);
for(int zz = 0; zz < static_cast<int>(x.get_element_size()); zz++ ) {
printf("%3.0f ", x.mData[zz]);
}
printf("->\n");
for(int zz = 0; zz < static_cast<int>(x.get_element_size()); zz++ ) {
printf("%3.0f ", y.mData[zz]);
}
fflush(stdout);
}
#endif
ck_tile::HostTensor<DataType> y_dev(y.get_lengths());
y_buf.FromDevice(y_dev.data());
pass = std::equal(
y_dev.begin(), y_dev.end(), y.begin(), [&](const DataType& d, const DataType& h) {
using itype = to_integer_type<sizeof(DataType)>;
itype i_d = ck_tile::bit_cast<itype>(d);
itype i_h = ck_tile::bit_cast<itype>(h);
return i_d == i_h;
});
std::cout << ", valid:" << (pass ? "y" : "n") << std::flush;
}
std::cout << std::endl;
return pass;
}
int main(int argc, char* argv[])
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
const std::string data_type = arg_parser.get_str("prec");
if(data_type == "fp8")
{
return run<ck_tile::fp8_t>(arg_parser) ? 0 : -2;
}
else if(data_type == "fp16")
{
return run<ck_tile::half_t>(arg_parser) ? 0 : -2;
}
else if(data_type == "fp32")
{
return run<float>(arg_parser) ? 0 : -2;
}
return -3;
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck_tile/core.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/permute.hpp"
#include <string>
struct permute_traits
{
std::string data_type;
};
using permute_args = ck_tile::GenericPermuteHostArgs;
// host API
float permute(permute_traits, permute_args, const ck_tile::stream_config&);
#!/bin/sh
# TODO: run this script from CK root
BUILD=build
EXE=$BUILD/bin/tile_example_permute
COMMON_ARGS='-v=1 -warmup=0 -repeat=1'
# mode=0
# export HIP_VISIBLE_DEVICES=4
if [ $# -ge 1 ] ; then
set -x
fi
for prec in "fp8" "fp16" "fp32" ; do
$EXE -prec=$prec -shape=3,8 -perm=1,0 $COMMON_ARGS
$EXE -prec=$prec -shape=48,6,8 -perm=2,1,0 $COMMON_ARGS
$EXE -prec=$prec -shape=24,128,3 -perm=0,2,1 $COMMON_ARGS
$EXE -prec=$prec -shape=4,10,7,6 -perm=0,2,3,1 $COMMON_ARGS
$EXE -prec=$prec -shape=8,24,36,10 -perm=3,1,2,0 $COMMON_ARGS
$EXE -prec=$prec -shape=8,1,36,4 -perm=2,1,0,3 $COMMON_ARGS
$EXE -prec=$prec -shape=5,10,16,2,36,4 -perm=4,5,2,1,0,3 $COMMON_ARGS
$EXE -prec=$prec -shape=2,32,8,3,6,2,5,4 -perm=5,2,4,7,1,6,3,0 $COMMON_ARGS
echo "------------------------------------------------------------------"
done
......@@ -4,3 +4,4 @@ include_directories(AFTER
add_subdirectory(01_fmha)
add_subdirectory(02_layernorm2d)
add_subdirectory(06_permute)
......@@ -19,6 +19,7 @@
#include "ck_tile/host/reference/reference_gemm.hpp"
#include "ck_tile/host/reference/reference_im2col.hpp"
#include "ck_tile/host/reference/reference_layernorm2d.hpp"
#include "ck_tile/host/reference/reference_permute.hpp"
#include "ck_tile/host/reference/reference_reduce.hpp"
#include "ck_tile/host/reference/reference_softmax.hpp"
#include "ck_tile/host/stream_config.hpp"
......
......@@ -10,6 +10,7 @@
namespace ck_tile {
// To be removed, which really does not tell the location of failed HIP functional call
#if 0
CK_TILE_HOST void hip_check_error(hipError_t x)
{
if(x != hipSuccess)
......@@ -20,6 +21,7 @@ CK_TILE_HOST void hip_check_error(hipError_t x)
throw std::runtime_error(ss.str());
}
}
#endif
} // namespace ck_tile
#define HIP_CHECK_ERROR(retval_or_funcall) \
......
......@@ -73,17 +73,17 @@ CK_TILE_HOST float launch_kernel(const stream_config& s, Callables... callables)
{
// clang-format off
if(!s.time_kernel_) {
(callables(s),...); hip_check_error(hipGetLastError());
(callables(s),...); HIP_CHECK_ERROR(hipGetLastError());
return 0;
}
if(s.is_gpu_timer_) {
gpu_timer timer {};
// warmup
for(int i = 0; i < s.cold_niters_; i++) { (callables(s),...); } hip_check_error(hipGetLastError());
for(int i = 0; i < s.cold_niters_; i++) { (callables(s),...); } HIP_CHECK_ERROR(hipGetLastError());
timer.start(s.stream_id_);
for(int i = 0; i < s.nrepeat_; i++) { (callables(s),...); } hip_check_error(hipGetLastError());
for(int i = 0; i < s.nrepeat_; i++) { (callables(s),...); } HIP_CHECK_ERROR(hipGetLastError());
timer.stop(s.stream_id_);
return timer.duration() / s.nrepeat_;
......@@ -92,10 +92,10 @@ CK_TILE_HOST float launch_kernel(const stream_config& s, Callables... callables)
cpu_timer timer {};
// warmup
for(int i = 0; i < s.cold_niters_; i++) { (callables(s),...); } hip_check_error(hipGetLastError());
for(int i = 0; i < s.cold_niters_; i++) { (callables(s),...); } HIP_CHECK_ERROR(hipGetLastError());
timer.start(s.stream_id_);
for(int i = 0; i < s.nrepeat_; i++) { (callables(s),...); } hip_check_error(hipGetLastError());
for(int i = 0; i < s.nrepeat_; i++) { (callables(s),...); } HIP_CHECK_ERROR(hipGetLastError());
timer.stop(s.stream_id_);
return timer.duration() / s.nrepeat_;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck_tile/core.hpp"
#include "ck_tile/host/host_tensor.hpp"
#include <thread>
#include <numeric>
#include <functional>
namespace ck_tile {
/*
this will do permute + contiguous like functionality in pytorch
*/
template <typename DataType>
CK_TILE_HOST void
reference_permute(const HostTensor<DataType>& x, HostTensor<DataType>& y, std::vector<index_t> dims)
{
const auto x_len = x.mDesc.get_lengths();
const auto y_len = y.mDesc.get_lengths();
assert(x_len.size() == y_len.size());
index_t rank = x_len.size();
const auto x_elm = std::accumulate(x_len.begin(), x_len.end(), 1, std::multiplies<index_t>());
const auto y_elm = std::accumulate(y_len.begin(), y_len.end(), 1, std::multiplies<index_t>());
assert(x_elm == y_elm);
(void)y_elm;
auto f = [&](auto i_element) {
std::vector<size_t> y_coord = [&]() {
std::vector<size_t> tmp(rank, 0);
size_t r = i_element;
for(index_t i = rank - 1; i >= 0; i--)
{
tmp[i] = r % y_len[i];
r = r / y_len[i];
}
return tmp;
}();
std::vector<size_t> x_coord = [&]() {
std::vector<size_t> tmp(rank, 0);
for(index_t i = 0; i < rank; i++)
{
tmp[dims[i]] = y_coord[i];
}
return tmp;
}();
// do permute
y(y_coord) = x(x_coord);
};
make_ParallelTensorFunctor(f, x_elm)(std::thread::hardware_concurrency());
}
} // namespace ck_tile
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck_tile/ops/permute/kernel/generic_permute_kernel.hpp"
#include "ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck_tile/core.hpp"
#include "ck_tile/ops/common.hpp"
// #include "ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp"
namespace ck_tile {
/* independent host side argument, no template
*/
struct GenericPermuteHostArgs
{
static constexpr index_t kMaxRanks = 8; // TODO: hardcoded
const void* p_src;
void* p_dst;
index_t rank;
index_t shape[kMaxRanks]; // input shape
index_t perm[kMaxRanks]; // permute index
};
/*
simulate torch.permute:
x_ = x_.view(x.shape[0],
x.shape[1]//16, 16,
x.shape[2]//32, 4, 8)
x_ = x_.permute(0,1,3,4,2,5)
x_ = x_.contiguous()
x_ = x_.view(x.shape[0], x.shape[1], x.shape[2]);//
this kernel is supposed not to be performant(just OK), with functional support up to kMaxRanks
dim of permutation, with a single kernel
*/
template <typename Problem_>
struct GenericPermute
{
using Problem = ck_tile::remove_cvref_t<Problem_>;
using DataType = remove_cvref_t<typename Problem::DataType>;
static constexpr index_t kBlockSize = Problem::kBlockSize;
static constexpr index_t kMaxRanks = Problem::kMaxRanks;
static constexpr bool KeepLastDim = Problem::KeepLastDim;
struct __attribute__((packed)) Kargs
{
const void* p_src;
void* p_dst;
// index_t rank;
index_t num_elements;
index_t perm_length[kMaxRanks]; // tensor length after permutation
index_t perm_stride[kMaxRanks]; // tensor stride after permutation
};
CK_TILE_HOST static constexpr index_t TotalElements(const GenericPermuteHostArgs& h)
{
index_t n = 1;
for(auto i = 0; i < h.rank; i++)
{
n *= h.shape[i];
}
return n;
}
CK_TILE_HOST static constexpr Kargs MakeKargs(const GenericPermuteHostArgs& h)
{
Kargs a;
a.p_src = h.p_src;
a.p_dst = h.p_dst;
// assert rank <= kMaxRanks
index_t i = 0;
index_t perm[kMaxRanks];
index_t x_shape[kMaxRanks];
index_t x_stride[kMaxRanks];
// index_t perm_length[kMaxRanks];
for(; i < h.rank; i++)
{
x_shape[i] = h.shape[i];
perm[i] = h.perm[i];
}
for(; i < kMaxRanks; i++)
{
x_shape[i] = 1;
perm[i] = i; // will index to len = 1
}
index_t stride = 1;
for(index_t j = kMaxRanks - 1; j >= 0; j--)
{
x_stride[j] = stride;
stride *= x_shape[j];
}
for(index_t j = 0; j < kMaxRanks; j++)
{
a.perm_length[j] = x_shape[perm[j]];
a.perm_stride[j] = x_stride[perm[j]];
}
a.num_elements = TotalElements(h);
return a;
}
CK_TILE_HOST static constexpr auto GridSize(GenericPermuteHostArgs h)
{
auto total = TotalElements(h);
auto grids = dim3((total + BlockSize() - 1) / BlockSize());
// printf("### total:%d, grids:%dx%dx%d\n", total, );
return grids;
}
CK_TILE_HOST_DEVICE static constexpr auto BlockSize() { return Problem::kBlockSize; }
CK_TILE_DEVICE void operator()(Kargs kargs) const
{
index_t id = blockIdx.x * BlockSize() + threadIdx.x;
if(id >= kargs.num_elements)
return;
const auto perm_length =
generate_tuple([&](auto I) { return kargs.perm_length[I]; }, number<kMaxRanks>{});
const auto perm_stride =
generate_tuple([&](auto I) { return kargs.perm_stride[I]; }, number<kMaxRanks>{});
const DataType* p_src = reinterpret_cast<const DataType*>(kargs.p_src);
DataType* p_dst = reinterpret_cast<DataType*>(kargs.p_dst);
const auto src_view_0 = make_naive_tensor_view<address_space_enum::global>(
p_src, perm_length, perm_stride, number<1>{}, number<1>{});
const auto src_view = transform_tensor_view(
src_view_0,
make_tuple(make_merge_transform(perm_length)),
make_tuple(typename arithmetic_sequence_gen<0, kMaxRanks, 1>::type{}),
make_tuple(sequence<0>{}));
auto dst_view_0 = make_naive_tensor_view_packed<address_space_enum::global>(
p_dst, perm_length, number<1>{});
auto dst_view = transform_tensor_view(
dst_view_0,
make_tuple(make_merge_transform(perm_length)),
make_tuple(typename arithmetic_sequence_gen<0, kMaxRanks, 1>::type{}),
make_tuple(sequence<0>{}));
// TODO: hard code to vector 1
using vector_t = thread_buffer<DataType, 1>;
const auto src_coord =
make_tensor_coordinate(src_view.get_tensor_descriptor(), array<index_t, 1>{id});
const auto dst_coord =
make_tensor_coordinate(dst_view.get_tensor_descriptor(), array<index_t, 1>{id});
// printf("src id:%d, os:%d\n", id, src_coord.get_offset());
// printf("dst id:%d, os:%d\n", id, dst_coord.get_offset());
const vector_t x = src_view.template get_vectorized_elements<vector_t>(src_coord);
dst_view.template set_vectorized_elements<vector_t>(dst_coord, x);
}
};
} // namespace ck_tile
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck_tile/core/utility/type_traits.hpp"
namespace ck_tile {
template <typename DataType_,
index_t kBlockSize_ = 256,
index_t kMaxRanks_ = 8,
bool KeepLastDim_ = false>
struct GenericPermuteProblem
{
using DataType = remove_cvref_t<DataType_>;
static constexpr index_t kBlockSize = kBlockSize_;
static constexpr index_t kMaxRanks = kMaxRanks_;
/* KeepLastDim:
* if last dim keep the same? this can help enable vector load
* permute(0, 2, 4, 1, 3, 5) -> true
* permute(0, 3, 2, 1) -> false
*/
static constexpr bool KeepLastDim = KeepLastDim_;
// TODO: not used(?)
};
} // namespace ck_tile
......@@ -27,6 +27,11 @@
} \
} while(0)
/*
TODO:
This is a simple design of scatter/gather through indexing transform, with limitations
We may design a scatter/gather adaptor layer directly inside tile window
*/
template <ck_tile::index_t ROW_TILE_SIZE = 8,
ck_tile::index_t COL_TILE_SIZE = 32 * 8,
ck_tile::index_t BLOCK_SIZE = 256,
......@@ -130,8 +135,8 @@ __global__ void row_scatter_gather(const INDEX_BUF_TYPE* src_row_idx_ptr,
auto data = load_tile(src_tile);
store_tile(dst_tile, data);
move_tile_window(src_tile, {0, COL_TILE_SIZE});
move_tile_window(dst_tile, {0, COL_TILE_SIZE});
move_tile_window(src_tile, {number<0>{}, number<COL_TILE_SIZE>{}});
move_tile_window(dst_tile, {number<0>{}, number<COL_TILE_SIZE>{}});
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment