Commit e305e41e authored by aska-0096's avatar aska-0096
Browse files

Merge branch 'develop' of...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into e2e_kernellib
parents bee4e344 a35456a3
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <functional> #include <functional>
#include <numeric> #include <numeric>
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip> #include <iomanip>
#include <vector> #include <vector>
...@@ -131,11 +131,12 @@ int main(int argc, char* argv[]) ...@@ -131,11 +131,12 @@ int main(int argc, char* argv[])
} }
} }
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_op_name << std::endl;
// run the best intance // run the best intance
if(found)
{ {
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_op_name << std::endl;
auto& op_ptr = op_ptrs[best_op_id]; auto& op_ptr = op_ptrs[best_op_id];
std::cout << "Run the best instance without timing: " << op_ptr->GetTypeString() std::cout << "Run the best instance without timing: " << op_ptr->GetTypeString()
<< std::endl; << std::endl;
......
add_executable(client_max_pool2d_fwd max_pool2d_fwd.cpp)
target_link_libraries(client_max_pool2d_fwd PRIVATE composable_kernel::device_operations)
add_executable(client_avg_pool3d_fwd avg_pool3d_fwd.cpp)
target_link_libraries(client_avg_pool3d_fwd PRIVATE composable_kernel::device_operations)
\ No newline at end of file
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
#include <iostream>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_pool_fwd.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp"
using InDataType = ck::half_t;
using OutDataType = ck::half_t;
using IndexDataType = int32_t;
constexpr ck::index_t InOutRank = 5;
constexpr ck::index_t WindowRank = 3;
#if 0
constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
constexpr bool OutputIndex = false;
#else
constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
constexpr bool OutputIndex = false;
#endif
struct SimpleDeviceMem
{
SimpleDeviceMem() = delete;
SimpleDeviceMem(std::size_t mem_size) : p_mem_{}
{
(void)hipMalloc(static_cast<void**>(&p_mem_), mem_size);
}
void* GetDeviceBuffer() { return p_mem_; }
~SimpleDeviceMem() { (void)hipFree(p_mem_); }
void* p_mem_;
};
int main(int argc, char* argv[])
{
ck::index_t N = 2;
ck::index_t C = 32;
ck::index_t Z = 2;
ck::index_t Y = 2;
ck::index_t X = 2;
ck::index_t Di = 30;
ck::index_t Hi = 30;
ck::index_t Wi = 30;
ck::index_t window_stride_d = 2;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t in_left_pad_d = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_d = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t Do = (Di + in_left_pad_d + in_right_pad_d - Z) / window_stride_d + 1;
ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
// Pool API only support the order of NCDHW
std::vector<ck::index_t> in_length = {N, C, Di, Hi, Wi};
std::vector<ck::index_t> out_length = {N, C, Do, Ho, Wo};
std::vector<ck::index_t> window_spatial_lengths = {Z, Y, X};
std::vector<ck::index_t> window_strides = {window_stride_d, window_stride_h, window_stride_w};
std::vector<ck::index_t> input_left_pads = {in_left_pad_d, in_left_pad_h, in_left_pad_w};
std::vector<ck::index_t> input_right_pads = {in_right_pad_d, in_right_pad_h, in_right_pad_w};
std::size_t in_tensor_size = N * C * Di * Hi * Wi;
std::size_t out_tensor_size = N * C * Do * Ho * Wo;
// tensor layout = NDHWC
std::vector<ck::index_t> in_tensor_stride = {Di * C * Hi * Wi, 1, C * Hi * Wi, Wi * C, C};
std::vector<ck::index_t> out_tensor_stride = {Do * C * Ho * Wo, 1, C * Ho * Wo, Wo * C, C};
SimpleDeviceMem in_device_buf(sizeof(InDataType) * in_tensor_size);
SimpleDeviceMem out_device_buf(sizeof(OutDataType) * out_tensor_size);
SimpleDeviceMem out_indices_device_buf(sizeof(IndexDataType) * out_tensor_size);
using DeviceOp = ck::tensor_operation::device::DevicePoolFwd<InOutRank,
WindowRank,
InDataType,
OutDataType,
IndexDataType,
ReduceOpId,
OutputIndex>;
// get device op instances
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
std::string best_op_name;
bool found = false;
int best_op_id = -1;
float best_ave_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
// profile device operation instances
std::cout << "Run all instances and do timing" << std::endl;
for(int i = 0; i < op_ptrs.size(); ++i)
{
auto& op_ptr = op_ptrs[i];
auto argument_ptr = op_ptr->MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
static_cast<IndexDataType*>(out_indices_device_buf.GetDeviceBuffer()),
in_length,
window_spatial_lengths,
out_length,
in_tensor_stride,
out_tensor_stride,
out_tensor_stride,
window_strides,
input_left_pads,
input_right_pads,
{2, 3, 4});
auto invoker_ptr = op_ptr->MakeInvokerPointer();
std::string op_name = op_ptr->GetTypeString();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true});
std::size_t num_bytes =
in_tensor_size * sizeof(InDataType) + out_tensor_size * sizeof(OutDataType);
if constexpr(OutputIndex)
num_bytes += out_tensor_size * sizeof(IndexDataType);
float gb_per_sec = num_bytes / 1.E6 / ave_time;
std::cout << "Perf: " << std::setw(10) << ave_time << " ms, " << gb_per_sec << " GB/s, "
<< op_name << std::endl;
if(ave_time < best_ave_time)
{
found = true;
best_op_id = i;
best_op_name = op_name;
best_ave_time = ave_time;
best_gb_per_sec = gb_per_sec;
}
}
else
{
std::cout << op_name << " does not support this problem" << std::endl;
}
}
// run the best intance
if(found)
{
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_op_name << std::endl;
auto& op_ptr = op_ptrs[best_op_id];
std::cout << "Run the best instance without timing: " << op_ptr->GetTypeString()
<< std::endl;
auto argument_ptr = op_ptr->MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
static_cast<IndexDataType*>(out_indices_device_buf.GetDeviceBuffer()),
in_length,
window_spatial_lengths,
out_length,
in_tensor_stride,
out_tensor_stride,
out_tensor_stride,
window_strides,
input_left_pads,
input_right_pads,
{2, 3, 4});
auto invoker_ptr = op_ptr->MakeInvokerPointer();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, false});
}
std::cout << "Done" << std::endl;
}
return 0;
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iomanip>
#include <vector>
#include <iostream>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_pool_fwd.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/pool2d_fwd.hpp"
using InDataType = ck::half_t;
using OutDataType = ck::half_t;
using IndexDataType = int32_t;
constexpr ck::index_t InOutRank = 4;
constexpr ck::index_t WindowRank = 2;
#if 1
constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
constexpr bool OutputIndex = true;
#else
constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
constexpr bool OutputIndex = false;
#endif
struct SimpleDeviceMem
{
SimpleDeviceMem() = delete;
SimpleDeviceMem(std::size_t mem_size) : p_mem_{}
{
(void)hipMalloc(static_cast<void**>(&p_mem_), mem_size);
}
void* GetDeviceBuffer() { return p_mem_; }
~SimpleDeviceMem() { (void)hipFree(p_mem_); }
void* p_mem_;
};
int main(int argc, char* argv[])
{
ck::index_t N = 2;
ck::index_t C = 32;
ck::index_t Y = 2;
ck::index_t X = 2;
ck::index_t Hi = 30;
ck::index_t Wi = 30;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
// Pool API only support the order of NCHW
std::vector<ck::index_t> in_length = {N, C, Hi, Wi};
std::vector<ck::index_t> out_length = {N, C, Ho, Wo};
std::vector<ck::index_t> window_spatial_lengths = {Y, X};
std::vector<ck::index_t> window_strides = {window_stride_h, window_stride_w};
std::vector<ck::index_t> input_left_pads = {in_left_pad_h, in_left_pad_w};
std::vector<ck::index_t> input_right_pads = {in_right_pad_h, in_right_pad_w};
std::size_t in_tensor_size = N * C * Hi * Wi;
std::size_t out_tensor_size = N * C * Ho * Wo;
// tensor layout = NHWC
std::vector<ck::index_t> in_tensor_stride = {C * Hi * Wi, 1, Wi * C, C};
std::vector<ck::index_t> out_tensor_stride = {C * Ho * Wo, 1, Wo * C, C};
SimpleDeviceMem in_device_buf(sizeof(InDataType) * in_tensor_size);
SimpleDeviceMem out_device_buf(sizeof(OutDataType) * out_tensor_size);
SimpleDeviceMem out_indices_device_buf(sizeof(IndexDataType) * out_tensor_size);
using DeviceOp = ck::tensor_operation::device::DevicePoolFwd<InOutRank,
WindowRank,
InDataType,
OutDataType,
IndexDataType,
ReduceOpId,
OutputIndex>;
// get device op instances
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
std::string best_op_name;
bool found = false;
int best_op_id = -1;
float best_ave_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
// profile device operation instances
std::cout << "Run all instances and do timing" << std::endl;
for(int i = 0; i < op_ptrs.size(); ++i)
{
auto& op_ptr = op_ptrs[i];
auto argument_ptr = op_ptr->MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
static_cast<IndexDataType*>(out_indices_device_buf.GetDeviceBuffer()),
in_length,
window_spatial_lengths,
out_length,
in_tensor_stride,
out_tensor_stride,
out_tensor_stride,
window_strides,
input_left_pads,
input_right_pads,
{2, 3});
auto invoker_ptr = op_ptr->MakeInvokerPointer();
std::string op_name = op_ptr->GetTypeString();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true});
std::size_t num_bytes =
in_tensor_size * sizeof(InDataType) + out_tensor_size * sizeof(OutDataType);
if constexpr(OutputIndex)
num_bytes += out_tensor_size * sizeof(IndexDataType);
float gb_per_sec = num_bytes / 1.E6 / ave_time;
std::cout << "Perf: " << std::setw(10) << ave_time << " ms, " << gb_per_sec << " GB/s, "
<< op_name << std::endl;
if(ave_time < best_ave_time)
{
found = true;
best_op_id = i;
best_op_name = op_name;
best_ave_time = ave_time;
best_gb_per_sec = gb_per_sec;
}
}
else
{
std::cout << op_name << " does not support this problem" << std::endl;
}
}
// run the best intance
if(found)
{
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_op_name << std::endl;
auto& op_ptr = op_ptrs[best_op_id];
std::cout << "Run the best instance without timing: " << op_ptr->GetTypeString()
<< std::endl;
auto argument_ptr = op_ptr->MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
static_cast<IndexDataType*>(out_indices_device_buf.GetDeviceBuffer()),
in_length,
window_spatial_lengths,
out_length,
in_tensor_stride,
out_tensor_stride,
out_tensor_stride,
window_strides,
input_left_pads,
input_right_pads,
{2, 3});
auto invoker_ptr = op_ptr->MakeInvokerPointer();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, false});
}
std::cout << "Done" << std::endl;
}
return 0;
}
...@@ -4,10 +4,21 @@ ...@@ -4,10 +4,21 @@
# list see the documentation: # list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html # https://www.sphinx-doc.org/en/master/usage/configuration.html
import subprocess
from rocm_docs import ROCmDocs from rocm_docs import ROCmDocs
docs_core = ROCmDocs("Composable Kernel Documentation")
docs_core.run_doxygen() name = "Composable Kernel"
get_version = r'sed -n -e "s/^rocm_setup_version(.* \([0-9\.]\{1,\}\).*/\1/p" ../CMakeLists.txt'
version = subprocess.getoutput(get_version)
if len(version) > 0:
name = f"{name} {version}"
external_toc_path = "./sphinx/_toc.yml"
docs_core = ROCmDocs(f"{name} Documentation")
docs_core.run_doxygen(doxygen_root="doxygen", doxygen_path="doxygen/docBin/xml")
docs_core.setup() docs_core.setup()
mathjax3_config = { mathjax3_config = {
......
=======
License
=======
.. include:: ../LICENSE
:literal:
# Anywhere {branch} is used, the branch name will be substituted.
# These comments will also be removed.
defaults:
numbered: False
maxdepth: 6
root: index
subtrees:
- caption: About
entries:
- file: license
rocm-docs-core==0.2.0 rocm-docs-core==0.10.3
sphinxcontrib-bibtex==2.5.0 sphinxcontrib-bibtex==2.5.0
# #
# This file is autogenerated by pip-compile with Python 3.10 # This file is autogenerated by pip-compile with Python 3.8
# by the following command: # by the following command:
# #
# pip-compile .sphinx/requirements.in # pip-compile requirements.in
# #
accessible-pygments==0.0.3 accessible-pygments==0.0.3
# via pydata-sphinx-theme # via pydata-sphinx-theme
alabaster==0.7.13 alabaster==0.7.13
# via sphinx # via sphinx
asttokens==2.2.1
# via stack-data
attrs==22.2.0
# via
# jsonschema
# jupyter-cache
babel==2.12.1 babel==2.12.1
# via # via
# pydata-sphinx-theme # pydata-sphinx-theme
# sphinx # sphinx
backcall==0.2.0
# via ipython
beautifulsoup4==4.11.2 beautifulsoup4==4.11.2
# via pydata-sphinx-theme # via pydata-sphinx-theme
breathe==4.34.0 breathe==4.34.0
...@@ -27,19 +19,15 @@ breathe==4.34.0 ...@@ -27,19 +19,15 @@ breathe==4.34.0
certifi==2022.12.7 certifi==2022.12.7
# via requests # via requests
cffi==1.15.1 cffi==1.15.1
# via pynacl # via
# cryptography
# pynacl
charset-normalizer==3.1.0 charset-normalizer==3.1.0
# via requests # via requests
click==8.1.3 click==8.1.3
# via # via sphinx-external-toc
# jupyter-cache cryptography==40.0.2
# sphinx-external-toc # via pyjwt
comm==0.1.2
# via ipykernel
debugpy==1.6.6
# via ipykernel
decorator==5.1.1
# via ipython
deprecated==1.2.13 deprecated==1.2.13
# via pygithub # via pygithub
docutils==0.16 docutils==0.16
...@@ -48,52 +36,26 @@ docutils==0.16 ...@@ -48,52 +36,26 @@ docutils==0.16
# myst-parser # myst-parser
# pybtex-docutils # pybtex-docutils
# pydata-sphinx-theme # pydata-sphinx-theme
# rocm-docs-core
# sphinx # sphinx
# sphinxcontrib-bibtex # sphinxcontrib-bibtex
executing==1.2.0
# via stack-data
fastjsonschema==2.16.3
# via nbformat
gitdb==4.0.10 gitdb==4.0.10
# via gitpython # via gitpython
gitpython==3.1.31 gitpython==3.1.31
# via rocm-docs-core # via rocm-docs-core
greenlet==2.0.2
# via sqlalchemy
idna==3.4 idna==3.4
# via requests # via requests
imagesize==1.4.1 imagesize==1.4.1
# via sphinx # via sphinx
importlib-metadata==6.0.0 importlib-metadata==6.0.0
# via # via
# jupyter-cache # sphinx
# myst-nb # sphinxcontrib-bibtex
ipykernel==6.21.3 importlib-resources==5.12.0
# via myst-nb # via rocm-docs-core
ipython==8.11.0
# via
# ipykernel
# myst-nb
jedi==0.18.2
# via ipython
jinja2==3.1.2 jinja2==3.1.2
# via # via
# myst-parser # myst-parser
# sphinx # sphinx
jsonschema==4.17.3
# via nbformat
jupyter-cache==0.5.0
# via myst-nb
jupyter-client==8.0.3
# via
# ipykernel
# nbclient
jupyter-core==5.3.0
# via
# ipykernel
# jupyter-client
# nbformat
latexcodec==2.0.1 latexcodec==2.0.1
# via pybtex # via pybtex
linkify-it-py==1.0.3 linkify-it-py==1.0.3
...@@ -104,54 +66,16 @@ markdown-it-py==2.2.0 ...@@ -104,54 +66,16 @@ markdown-it-py==2.2.0
# myst-parser # myst-parser
markupsafe==2.1.2 markupsafe==2.1.2
# via jinja2 # via jinja2
matplotlib-inline==0.1.6
# via
# ipykernel
# ipython
mdit-py-plugins==0.3.5 mdit-py-plugins==0.3.5
# via myst-parser # via myst-parser
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
myst-nb==0.17.1 myst-parser[linkify]==1.0.0
# via rocm-docs-core # via rocm-docs-core
myst-parser[linkify]==0.18.1
# via
# myst-nb
# rocm-docs-core
nbclient==0.5.13
# via
# jupyter-cache
# myst-nb
nbformat==5.7.3
# via
# jupyter-cache
# myst-nb
# nbclient
nest-asyncio==1.5.6
# via
# ipykernel
# nbclient
packaging==23.0 packaging==23.0
# via # via
# ipykernel
# pydata-sphinx-theme # pydata-sphinx-theme
# sphinx # sphinx
parso==0.8.3
# via jedi
pexpect==4.8.0
# via ipython
pickleshare==0.7.5
# via ipython
platformdirs==3.1.1
# via jupyter-core
prompt-toolkit==3.0.38
# via ipython
psutil==5.9.4
# via ipykernel
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
# via stack-data
pybtex==0.24.0 pybtex==0.24.0
# via # via
# pybtex-docutils # pybtex-docutils
...@@ -160,57 +84,47 @@ pybtex-docutils==1.0.2 ...@@ -160,57 +84,47 @@ pybtex-docutils==1.0.2
# via sphinxcontrib-bibtex # via sphinxcontrib-bibtex
pycparser==2.21 pycparser==2.21
# via cffi # via cffi
pydata-sphinx-theme==0.13.1 pydata-sphinx-theme==0.13.3
# via sphinx-book-theme # via
pygithub==1.57 # rocm-docs-core
# sphinx-book-theme
pygithub==1.58.2
# via rocm-docs-core # via rocm-docs-core
pygments==2.14.0 pygments==2.14.0
# via # via
# accessible-pygments # accessible-pygments
# ipython
# pydata-sphinx-theme # pydata-sphinx-theme
# sphinx # sphinx
pyjwt==2.6.0 pyjwt[crypto]==2.6.0
# via pygithub # via pygithub
pynacl==1.5.0 pynacl==1.5.0
# via pygithub # via pygithub
pyrsistent==0.19.3 pytz==2023.3
# via jsonschema # via babel
python-dateutil==2.8.2
# via jupyter-client
pyyaml==6.0 pyyaml==6.0
# via # via
# jupyter-cache
# myst-nb
# myst-parser # myst-parser
# pybtex # pybtex
# sphinx-external-toc # sphinx-external-toc
pyzmq==25.0.1
# via
# ipykernel
# jupyter-client
requests==2.28.2 requests==2.28.2
# via # via
# pygithub # pygithub
# sphinx # sphinx
rocm-docs-core==0.2.0 rocm-docs-core==0.10.3
# via -r .sphinx/requirements.in # via -r requirements.in
six==1.16.0 six==1.16.0
# via # via
# asttokens
# latexcodec # latexcodec
# pybtex # pybtex
# python-dateutil
smmap==5.0.0 smmap==5.0.0
# via gitdb # via gitdb
snowballstemmer==2.2.0 snowballstemmer==2.2.0
# via sphinx # via sphinx
soupsieve==2.4 soupsieve==2.4
# via beautifulsoup4 # via beautifulsoup4
sphinx==4.3.1 sphinx==5.3.0
# via # via
# breathe # breathe
# myst-nb
# myst-parser # myst-parser
# pydata-sphinx-theme # pydata-sphinx-theme
# rocm-docs-core # rocm-docs-core
...@@ -220,7 +134,7 @@ sphinx==4.3.1 ...@@ -220,7 +134,7 @@ sphinx==4.3.1
# sphinx-external-toc # sphinx-external-toc
# sphinx-notfound-page # sphinx-notfound-page
# sphinxcontrib-bibtex # sphinxcontrib-bibtex
sphinx-book-theme==1.0.0rc2 sphinx-book-theme==1.0.1
# via rocm-docs-core # via rocm-docs-core
sphinx-copybutton==0.5.1 sphinx-copybutton==0.5.1
# via rocm-docs-core # via rocm-docs-core
...@@ -233,7 +147,7 @@ sphinx-notfound-page==0.8.3 ...@@ -233,7 +147,7 @@ sphinx-notfound-page==0.8.3
sphinxcontrib-applehelp==1.0.4 sphinxcontrib-applehelp==1.0.4
# via sphinx # via sphinx
sphinxcontrib-bibtex==2.5.0 sphinxcontrib-bibtex==2.5.0
# via -r .sphinx/requirements.in # via -r requirements.in
sphinxcontrib-devhelp==1.0.2 sphinxcontrib-devhelp==1.0.2
# via sphinx # via sphinx
sphinxcontrib-htmlhelp==2.0.1 sphinxcontrib-htmlhelp==2.0.1
...@@ -244,40 +158,15 @@ sphinxcontrib-qthelp==1.0.3 ...@@ -244,40 +158,15 @@ sphinxcontrib-qthelp==1.0.3
# via sphinx # via sphinx
sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-serializinghtml==1.1.5
# via sphinx # via sphinx
sqlalchemy==1.4.46
# via jupyter-cache
stack-data==0.6.2
# via ipython
tabulate==0.9.0
# via jupyter-cache
tornado==6.2
# via
# ipykernel
# jupyter-client
traitlets==5.9.0
# via
# comm
# ipykernel
# ipython
# jupyter-client
# jupyter-core
# matplotlib-inline
# nbclient
# nbformat
typing-extensions==4.5.0 typing-extensions==4.5.0
# via # via pydata-sphinx-theme
# myst-nb
# myst-parser
uc-micro-py==1.0.1 uc-micro-py==1.0.1
# via linkify-it-py # via linkify-it-py
urllib3==1.26.15 urllib3==1.26.15
# via requests # via requests
wcwidth==0.2.6
# via prompt-toolkit
wrapt==1.15.0 wrapt==1.15.0
# via deprecated # via deprecated
zipp==3.15.0 zipp==3.15.0
# via importlib-metadata # via
# importlib-metadata
# The following packages are considered to be unsafe in a requirements file: # importlib-resources
# setuptools
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp" #include "common.hpp"
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp" #include "common.hpp"
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
#error Should compile this file with ck::int4_t support #error Should compile this file with ck::int4_t support
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp" #include "common.hpp"
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp" #include "common.hpp"
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp" #include "common.hpp"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment