Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
c6ec6638
Commit
c6ec6638
authored
Nov 29, 2023
by
Khalique Ahmed
Browse files
Merge branch 'develop' of
https://github.com/ROCmSoftwarePlatform/AMDMIGraphX
into auto_contig_fix
parents
b42c7b41
a6d1540f
Changes
152
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1198 additions
and
121 deletions
+1198
-121
.github/workflows/ci.yaml
.github/workflows/ci.yaml
+1
-1
.gitignore
.gitignore
+3
-1
CMakeLists.txt
CMakeLists.txt
+19
-5
Jenkinsfile
Jenkinsfile
+2
-0
cmake/Embed.cmake
cmake/Embed.cmake
+115
-98
docs/.sphinx/requirements.txt
docs/.sphinx/requirements.txt
+2
-2
examples/README.md
examples/README.md
+2
-1
examples/diffusion/README.md
examples/diffusion/README.md
+3
-0
examples/diffusion/python_stable_diffusion_21/README.md
examples/diffusion/python_stable_diffusion_21/README.md
+67
-0
examples/diffusion/python_stable_diffusion_21/example_output.jpg
...s/diffusion/python_stable_diffusion_21/example_output.jpg
+0
-0
examples/diffusion/python_stable_diffusion_21/gradio_app.py
examples/diffusion/python_stable_diffusion_21/gradio_app.py
+54
-0
examples/diffusion/python_stable_diffusion_21/gradio_reqirements.txt
...ffusion/python_stable_diffusion_21/gradio_reqirements.txt
+25
-0
examples/diffusion/python_stable_diffusion_21/requirements.txt
...les/diffusion/python_stable_diffusion_21/requirements.txt
+27
-0
examples/diffusion/python_stable_diffusion_21/sd21.ipynb
examples/diffusion/python_stable_diffusion_21/sd21.ipynb
+532
-0
examples/diffusion/python_stable_diffusion_21/txt2img.py
examples/diffusion/python_stable_diffusion_21/txt2img.py
+257
-0
src/CMakeLists.txt
src/CMakeLists.txt
+60
-10
src/api/include/migraphx/migraphx.h
src/api/include/migraphx/migraphx.h
+2
-1
src/driver/argument_parser.hpp
src/driver/argument_parser.hpp
+2
-0
src/dynamic_loader.cpp
src/dynamic_loader.cpp
+24
-0
src/fuse_pointwise.cpp
src/fuse_pointwise.cpp
+1
-2
No files found.
.github/workflows/ci.yaml
View file @
c6ec6638
...
@@ -465,7 +465,7 @@ jobs:
...
@@ -465,7 +465,7 @@ jobs:
-
name
:
Upload code coverage
-
name
:
Upload code coverage
if
:
"
matrix.configuration
==
'codecov'"
if
:
"
matrix.configuration
==
'codecov'"
env
:
env
:
CODECOV_TOKEN
:
"
8545af1c-f90b-4345-92a5-0d075503ca56
"
CODECOV_TOKEN
:
"
f5d5a10b-3177-4c76-b25f-9b1c2f165e8b
"
run
:
|
run
:
|
sudo apt-get install -y lcov
sudo apt-get install -y lcov
cd build
cd build
...
...
.gitignore
View file @
c6ec6638
...
@@ -81,5 +81,7 @@ cmake-build*/
...
@@ -81,5 +81,7 @@ cmake-build*/
build*/
build*/
# Recommended location to install rbuild dependencies from README.md
# Recommended location to install rbuild dependencies from README.md
depend
depend
*/
# local Python virtual environment
.venv/
CMakeLists.txt
View file @
c6ec6638
...
@@ -41,9 +41,12 @@ if(NOT MIGRAPHX_GENERATOR_IS_MULTI_CONFIG)
...
@@ -41,9 +41,12 @@ if(NOT MIGRAPHX_GENERATOR_IS_MULTI_CONFIG)
set_property
(
CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
${
CMAKE_CONFIGURATION_TYPES
}
)
set_property
(
CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
${
CMAKE_CONFIGURATION_TYPES
}
)
endif
()
endif
()
set
(
CMAKE_INSTALL_PREFIX
"/opt/rocm"
CACHE PATH
""
)
if
(
NOT WIN32
)
set
(
CMAKE_INSTALL_PREFIX
"/opt/rocm"
CACHE PATH
""
)
set
(
CMAKE_BUILD_RPATH
"
${
CMAKE_BINARY_DIR
}
/lib"
)
endif
()
set
(
CMAKE_BUILD_RPATH
"
${
CMAKE_BINARY_DIR
}
/lib"
)
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/llvm $ENV{ROCM_PATH} $ENV{HIP_PATH}
)
project
(
migraphx LANGUAGES C CXX
)
project
(
migraphx LANGUAGES C CXX
)
include
(
CTest
)
include
(
CTest
)
...
@@ -57,6 +60,9 @@ else()
...
@@ -57,6 +60,9 @@ else()
option
(
MIGRAPHX_ENABLE_PYTHON
"Enable python bindings"
ON
)
option
(
MIGRAPHX_ENABLE_PYTHON
"Enable python bindings"
ON
)
endif
()
endif
()
# By default build shared libraries
option
(
BUILD_SHARED_LIBS
"Create shared libraries"
ON
)
if
(
WIN32
)
# CK is not yet ported to Windows
if
(
WIN32
)
# CK is not yet ported to Windows
option
(
MIGRAPHX_USE_COMPOSABLEKERNEL
"Enable MIGraphX to use composable kernel JIT library"
OFF
)
option
(
MIGRAPHX_USE_COMPOSABLEKERNEL
"Enable MIGraphX to use composable kernel JIT library"
OFF
)
else
()
else
()
...
@@ -102,13 +108,21 @@ set(MIGRAPHX_ENABLE_CPU Off CACHE BOOL "")
...
@@ -102,13 +108,21 @@ set(MIGRAPHX_ENABLE_CPU Off CACHE BOOL "")
# Disable fpga backend by default
# Disable fpga backend by default
set
(
MIGRAPHX_ENABLE_FPGA Off CACHE BOOL
""
)
set
(
MIGRAPHX_ENABLE_FPGA Off CACHE BOOL
""
)
if
(
WIN32
)
add_compile_definitions
(
"$<$<COMPILE_LANGUAGE:C,CXX>:_CRT_SECURE_NO_WARNINGS;_USE_MATH_DEFINES>"
)
endif
()
set
(
CMAKE_CXX_STANDARD_DEFAULT
""
)
set
(
CMAKE_CXX_STANDARD_DEFAULT
""
)
add_compile_options
(
$<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
)
if
(
MSVC
)
add_compile_options
(
$<$<COMPILE_LANGUAGE:CXX>:/std:c++17>
)
else
()
add_compile_options
(
$<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
)
endif
()
list
(
APPEND CMAKE_MODULE_PATH
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake
)
list
(
APPEND CMAKE_MODULE_PATH
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake
)
include
(
EnableCompilerWarnings
)
include
(
EnableCompilerWarnings
)
include
(
ROCMClangTidy
)
include
(
ROCMClangTidy
)
if
(
CMAKE_CXX_COMPILER MATCHES
".*clang
\\
+
\\
+"
)
if
(
CMAKE_CXX_COMPILER MATCHES
".*clang
\\
+
\\
+
.*
"
)
set
(
MIGRAPHX_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
set
(
MIGRAPHX_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
# Enable tidy on hip
# Enable tidy on hip
elseif
(
MIGRAPHX_ENABLE_GPU
)
elseif
(
MIGRAPHX_ENABLE_GPU
)
...
...
Jenkinsfile
View file @
c6ec6638
...
@@ -22,6 +22,8 @@ def rocmtestnode(Map conf) {
...
@@ -22,6 +22,8 @@ def rocmtestnode(Map conf) {
def
cmd
=
"""
def
cmd
=
"""
ulimit -c unlimited
ulimit -c unlimited
echo "leak:dnnl::impl::malloc" > suppressions.txt
echo "leak:dnnl::impl::malloc" > suppressions.txt
echo "leak:libtbb.so" >> suppressions.txt
cat suppressions.txt
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export CXX=${compiler}
export CXX=${compiler}
...
...
cmake/Embed.cmake
100755 → 100644
View file @
c6ec6638
...
@@ -21,17 +21,25 @@
...
@@ -21,17 +21,25 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# THE SOFTWARE.
#####################################################################################
#####################################################################################
find_program
(
EMBED_LD ld
)
find_program
(
EMBED_OBJCOPY objcopy
)
option
(
EMBED_USE_LD
"Use ld to embed data files"
OFF
)
if
(
WIN32
)
set
(
EMBED_USE RC CACHE STRING
"Use RC or CArrays to embed data files"
)
set_property
(
CACHE EMBED_USE PROPERTY STRINGS
"RC;CArrays"
)
else
()
set
(
EMBED_USE CArrays CACHE STRING
"Use LD or CArrays to embed data files"
)
set_property
(
CACHE EMBED_USE PROPERTY STRINGS
"LD;CArrays"
)
endif
()
if
(
EMBED_USE STREQUAL
"LD"
)
find_program
(
EMBED_LD ld REQUIRED
)
find_program
(
EMBED_OBJCOPY objcopy REQUIRED
)
endif
()
function
(
wrap_string
)
function
(
wrap_string
)
set
(
options
)
set
(
options
)
set
(
oneValueArgs VARIABLE AT_COLUMN
)
set
(
oneValueArgs VARIABLE AT_COLUMN
)
set
(
multiValueArgs
)
set
(
multiValueArgs
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
WRAP_STRING
"
${
options
}
"
"
${
oneValueArgs
}
"
""
${
ARGN
}
)
string
(
LENGTH
${${
PARSE_VARIABLE
}}
string_length
)
string
(
LENGTH
${${
PARSE_VARIABLE
}}
string_length
)
math
(
EXPR offset
"0"
)
math
(
EXPR offset
"0"
)
...
@@ -54,97 +62,108 @@ function(wrap_string)
...
@@ -54,97 +62,108 @@ function(wrap_string)
set
(
${
PARSE_VARIABLE
}
"
${
lines
}
"
PARENT_SCOPE
)
set
(
${
PARSE_VARIABLE
}
"
${
lines
}
"
PARENT_SCOPE
)
endfunction
()
endfunction
()
function
(
generate_embed_source EMBED_NAME
)
function
(
generate_embed_source EMBED_NAME
EMBED_DIR BASE_DIRECTORY
)
set
(
options
)
set
(
options
)
set
(
oneValueArgs SRC HEADER RELATIVE
)
set
(
oneValueArgs
)
set
(
multiValueArgs OBJECTS SYMBOLS FILES
)
set
(
multiValueArgs SYMBOLS FILES
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
EXTERNS
)
set
(
RESOURCE_ID 100
)
set
(
INIT_KERNELS
)
foreach
(
SYMBOL FILE IN ZIP_LISTS PARSE_SYMBOLS PARSE_FILES
)
cmake_path
(
RELATIVE_PATH FILE BASE_DIRECTORY
${
BASE_DIRECTORY
}
OUTPUT_VARIABLE BASE_NAME
)
list
(
LENGTH PARSE_SYMBOLS SYMBOLS_LEN
)
if
(
EMBED_USE STREQUAL
"RC"
)
list
(
LENGTH PARSE_OBJECTS OBJECTS_LEN
)
string
(
TOUPPER
"
${
SYMBOL
}
"
SYMBOL
)
if
(
NOT
${
SYMBOLS_LEN
}
EQUAL
${
OBJECTS_LEN
}
)
string
(
APPEND FILE_IDS
"#define IDR_
${
SYMBOL
}
${
RESOURCE_ID
}
\n
"
)
message
(
FATAL_ERROR
"Symbols and objects dont match:
${
SYMBOLS_LEN
}
!=
${
OBJECTS_LEN
}
"
)
cmake_path
(
NATIVE_PATH FILE NORMALIZE NATIVE_FILE
)
endif
()
string
(
REPLACE
"
\\
"
"
\\\\
"
NATIVE_FILE
"
${
NATIVE_FILE
}
"
)
math
(
EXPR LEN
"
${
SYMBOLS_LEN
}
- 1"
)
string
(
APPEND RC_FILE_MAPPING
"IDR_
${
SYMBOL
}
TEXTFILE
\"
${
NATIVE_FILE
}
\"\n
"
)
string
(
APPEND INIT_KERNELS
"
\n
{
\"
${
BASE_NAME
}
\"
, resource::read(IDR_
${
SYMBOL
}
)},"
)
foreach
(
idx RANGE
${
LEN
}
)
math
(
EXPR RESOURCE_ID
"
${
RESOURCE_ID
}
+ 1"
OUTPUT_FORMAT DECIMAL
)
list
(
GET PARSE_SYMBOLS
${
idx
}
SYMBOL
)
else
()
list
(
GET PARSE_OBJECTS
${
idx
}
OBJECT
)
list
(
GET PARSE_FILES
${
idx
}
FILE
)
set
(
START_SYMBOL
"_binary_
${
SYMBOL
}
_start"
)
set
(
START_SYMBOL
"_binary_
${
SYMBOL
}
_start"
)
set
(
LENGTH_SYMBOL
"_binary_
${
SYMBOL
}
_length"
)
set
(
LENGTH_SYMBOL
"_binary_
${
SYMBOL
}
_length"
)
if
(
EMBED_USE
_
LD
)
if
(
EMBED_USE
STREQUAL
"
LD
"
)
string
(
APPEND EXTERNS
"
string
(
APPEND EXTERNS
"
extern const char
${
START_SYMBOL
}
[];
extern const char
${
START_SYMBOL
}
[];
extern const size_t _binary_
${
SYMBOL
}
_size;
extern const size_t _binary_
${
SYMBOL
}
_size;
const auto
${
LENGTH_SYMBOL
}
= reinterpret_cast<size_t>(&_binary_
${
SYMBOL
}
_size);
const auto
${
LENGTH_SYMBOL
}
= reinterpret_cast<size_t>(&_binary_
${
SYMBOL
}
_size);
"
)
"
)
else
()
else
()
string
(
APPEND EXTERNS
"
string
(
APPEND EXTERNS
"
extern const char
${
START_SYMBOL
}
[];
extern const char
${
START_SYMBOL
}
[];
extern const size_t
${
LENGTH_SYMBOL
}
;
extern const size_t
${
LENGTH_SYMBOL
}
;
"
)
"
)
endif
()
if
(
PARSE_RELATIVE
)
file
(
RELATIVE_PATH BASE_NAME
${
PARSE_RELATIVE
}
"
${
FILE
}
"
)
else
()
get_filename_component
(
BASE_NAME
"
${
FILE
}
"
NAME
)
endif
()
endif
()
string
(
APPEND INIT_KERNELS
"
string
(
APPEND INIT_KERNELS
"
{
\"
${
BASE_NAME
}
\"
, {
${
START_SYMBOL
}
,
${
LENGTH_SYMBOL
}
} },"
)
{
\"
${
BASE_NAME
}
\"
, {
${
START_SYMBOL
}
,
${
LENGTH_SYMBOL
}
} },"
)
endif
()
endforeach
()
endforeach
()
if
(
EMBED_USE STREQUAL
"RC"
)
file
(
WRITE
"
${
EMBED_DIR
}
/include/resource.h"
"
#define TEXTFILE 256
${
FILE_IDS
}
"
)
file
(
WRITE
"
${
EMBED_DIR
}
/resource.rc"
"
#include
\"
resource.h
\"
${
RC_FILE_MAPPING
}
"
)
set
(
EXTERNS
"
#include <Windows.h>
#include
\"
resource.h
\"
file
(
WRITE
"
${
PARSE_HEADER
}
"
"
namespace resource {
std::string_view read(int id)
{
HMODULE handle = GetModuleHandle(nullptr);
HRSRC rc = FindResource(handle, MAKEINTRESOURCE(id), MAKEINTRESOURCE(TEXTFILE));
HGLOBAL data = LoadResource(handle, rc);
return {static_cast<const char*>(LockResource(data)), SizeofResource(handle, rc)};
}
}
"
)
set
(
EMBED_FILES
${
EMBED_DIR
}
/include/resource.h
${
EMBED_DIR
}
/resource.rc
)
endif
()
file
(
WRITE
"
${
EMBED_DIR
}
/include/
${
EMBED_NAME
}
.hpp"
"
#include <string_view>
#include <string_view>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
std::unordered_map<std::string_view, std::string_view>
${
EMBED_NAME
}
();
std::unordered_map<std::string_view, std::string_view>
${
EMBED_NAME
}
();
"
)
"
)
file
(
WRITE
"
${
PARSE_SRC
}
"
"
file
(
WRITE
"
${
EMBED_DIR
}
/
${
EMBED_NAME
}
.cpp
"
"
#include <
${
EMBED_NAME
}
.hpp>
#include <
${
EMBED_NAME
}
.hpp>
${
EXTERNS
}
${
EXTERNS
}
std::unordered_map<std::string_view, std::string_view>
${
EMBED_NAME
}
()
std::unordered_map<std::string_view, std::string_view>
${
EMBED_NAME
}
()
{
{
static std::unordered_map<std::string_view, std::string_view> result = {
${
INIT_KERNELS
}
};
static std::unordered_map<std::string_view, std::string_view> result = {
${
INIT_KERNELS
}
};
return result;
return result;
}
}
"
)
"
)
list
(
APPEND EMBED_FILES
${
EMBED_DIR
}
/
${
EMBED_NAME
}
.cpp
${
EMBED_DIR
}
/include/
${
EMBED_NAME
}
.hpp
)
set
(
EMBED_FILES
${
EMBED_FILES
}
PARENT_SCOPE
)
endfunction
()
endfunction
()
function
(
embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE
)
function
(
embed_file FILE BASE_DIRECTORY
)
set
(
WORKING_DIRECTORY
${
CMAKE_CURRENT_SOURCE_DIR
}
)
message
(
STATUS
"
${
FILE
}
"
)
# Glob is used to compute the relative path
cmake_path
(
RELATIVE_PATH FILE BASE_DIRECTORY
"
${
BASE_DIRECTORY
}
"
OUTPUT_VARIABLE REL_FILE
)
file
(
GLOB FILES RELATIVE
${
WORKING_DIRECTORY
}
${
FILE
}
)
string
(
MAKE_C_IDENTIFIER
"
${
REL_FILE
}
"
OUTPUT_SYMBOL
)
foreach
(
REL_FILE
${
FILES
}
)
string
(
MAKE_C_IDENTIFIER
"
${
REL_FILE
}
"
SYMBOL
)
get_filename_component
(
OUTPUT_FILE_DIR
"
${
REL_FILE
}
"
DIRECTORY
)
get_filename_component
(
OUTPUT_FILE_DIR
"
${
REL_FILE
}
"
DIRECTORY
)
file
(
MAKE_DIRECTORY
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
OUTPUT_FILE_DIR
}
"
)
file
(
MAKE_DIRECTORY
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
OUTPUT_FILE_DIR
}
"
)
if
(
EMBED_USE_LD
)
if
(
EMBED_USE STREQUAL
"LD"
)
set
(
OUT_FILE
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
REL_FILE
}
.o"
)
set
(
OUTPUT_FILE
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
REL_FILE
}
.o"
)
else
()
set
(
OUT_FILE
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
REL_FILE
}
.cpp"
)
endif
()
set
(
${
OUTPUT_SYMBOL
}
${
SYMBOL
}
PARENT_SCOPE
)
set
(
${
OUTPUT_FILE
}
"
${
OUT_FILE
}
"
PARENT_SCOPE
)
if
(
EMBED_USE_LD
)
add_custom_command
(
add_custom_command
(
OUTPUT
"
${
OUT_FILE
}
"
OUTPUT
"
${
OUT
PUT
_FILE
}
"
COMMAND
${
EMBED_LD
}
-r -o
"
${
OUT_FILE
}
"
-z noexecstack --format=binary
"
${
REL_FILE
}
"
COMMAND
${
EMBED_LD
}
-r -o
"
${
OUT
PUT
_FILE
}
"
-z noexecstack --format=binary
"
${
REL_FILE
}
"
COMMAND
${
EMBED_OBJCOPY
}
--rename-section .data=.rodata,alloc,load,readonly,data,contents
"
${
OUT_FILE
}
"
COMMAND
${
EMBED_OBJCOPY
}
--rename-section .data=.rodata,alloc,load,readonly,data,contents
"
${
OUT
PUT
_FILE
}
"
WORKING_DIRECTORY
${
WORKING
_DIRECTORY
}
WORKING_DIRECTORY
"
${
BASE
_DIRECTORY
}
"
DEPENDS
${
FILE
}
DEPENDS
"
${
FILE
}
"
VERBATIM
VERBATIM
)
)
set
(
OUTPUT_FILE
${
OUTPUT_FILE
}
PARENT_SCOPE
)
else
(
)
else
if
(
EMBED_USE STREQUAL
"CArrays"
)
set_property
(
DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${
FILE
}
)
set
(
OUTPUT_FILE
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
REL_FILE
}
.cpp"
)
# reads source file contents as hex string
# reads source file contents as hex string
file
(
READ
${
FILE
}
HEX_STRING HEX
)
file
(
READ
${
FILE
}
HEX_STRING HEX
)
# wraps the hex string into multiple lines
# wraps the hex string into multiple lines
...
@@ -153,13 +172,14 @@ function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
...
@@ -153,13 +172,14 @@ function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
string
(
REGEX REPLACE
"([0-9a-f][0-9a-f])"
"0x
\\
1, "
ARRAY_VALUES
${
HEX_STRING
}
)
string
(
REGEX REPLACE
"([0-9a-f][0-9a-f])"
"0x
\\
1, "
ARRAY_VALUES
${
HEX_STRING
}
)
# removes trailing comma
# removes trailing comma
string
(
REGEX REPLACE
", $"
""
ARRAY_VALUES
${
ARRAY_VALUES
}
)
string
(
REGEX REPLACE
", $"
""
ARRAY_VALUES
${
ARRAY_VALUES
}
)
file
(
WRITE
"
${
OUT_FILE
}
"
"
file
(
WRITE
"
${
OUT
PUT
_FILE
}
"
"
#include <cstddef>
#include <cstddef>
extern const char _binary_
${
SYMBOL
}
_start[] = {
${
ARRAY_VALUES
}
};
extern const char _binary_
${
OUTPUT_
SYMBOL
}
_start[] = {
${
ARRAY_VALUES
}
};
extern const size_t _binary_
${
SYMBOL
}
_length = sizeof(_binary_
${
SYMBOL
}
_start);
extern const size_t _binary_
${
OUTPUT_
SYMBOL
}
_length = sizeof(_binary_
${
OUTPUT_
SYMBOL
}
_start);
"
)
"
)
set
(
OUTPUT_FILE
${
OUTPUT_FILE
}
PARENT_SCOPE
)
endif
()
endif
()
endforeach
(
)
set
(
OUTPUT_SYMBOL
${
OUTPUT_SYMBOL
}
PARENT_SCOPE
)
endfunction
()
endfunction
()
function
(
add_embed_library EMBED_NAME
)
function
(
add_embed_library EMBED_NAME
)
...
@@ -168,35 +188,32 @@ function(add_embed_library EMBED_NAME)
...
@@ -168,35 +188,32 @@ function(add_embed_library EMBED_NAME)
set
(
multiValueArgs
)
set
(
multiValueArgs
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
PARSE
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
file
(
MAKE_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/embed
)
file
(
MAKE_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/embed/
${
EMBED_NAME
}
)
set
(
EMBED_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/embed/
${
EMBED_NAME
}
)
set
(
EMBED_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/embed/
${
EMBED_NAME
}
)
set
(
SRC_FILE
"
${
EMBED_DIR
}
/
${
EMBED_NAME
}
.cpp"
)
file
(
MAKE_DIRECTORY
${
EMBED_DIR
}
)
set
(
HEADER_FILE
"
${
EMBED_DIR
}
/include/
${
EMBED_NAME
}
.hpp"
)
message
(
STATUS
"Embedding kernel files:"
)
set
(
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
set
(
OUTPUT_FILES
)
set
(
SYMBOLS
)
message
(
STATUS
"Embedding files"
)
foreach
(
FILE
${
PARSE_UNPARSED_ARGUMENTS
}
)
foreach
(
FILE
${
PARSE_UNPARSED_ARGUMENTS
}
)
embed_file
(
OUTPUT_FILE OUTPUT_SYMBOL
${
FIL
E
}
)
embed_file
(
${
FILE
}
${
PARSE_RELATIV
E
}
)
list
(
APPEND OUTPUT_FILES
${
OUTPUT_FILE
}
)
list
(
APPEND OUTPUT_FILES
${
OUTPUT_FILE
}
)
list
(
APPEND SYMBOLS
${
OUTPUT_SYMBOL
}
)
list
(
APPEND SYMBOLS
${
OUTPUT_SYMBOL
}
)
endforeach
()
endforeach
()
message
(
STATUS
"Generating embedding library
${
EMBED_NAME
}
"
)
message
(
STATUS
"Generating embedding library '
${
EMBED_NAME
}
'"
)
generate_embed_source
(
${
EMBED_NAME
}
SRC
${
SRC_FILE
}
HEADER
${
HEADER_FILE
}
OBJECTS
${
OUTPUT_FILES
}
SYMBOLS
${
SYMBOLS
}
RELATIVE
${
PARSE_RELATIVE
}
FILES
${
PARSE_UNPARSED_ARGUMENTS
}
)
generate_embed_source
(
${
EMBED_NAME
}
${
EMBED_DIR
}
"
${
PARSE_RELATIVE
}
"
SYMBOLS
${
SYMBOLS
}
FILES
${
PARSE_UNPARSED_ARGUMENTS
}
)
set
(
INTERNAL_EMBED_LIB embed_lib_
${
EMBED_NAME
}
)
set
(
INTERNAL_EMBED_LIB embed_lib_
${
EMBED_NAME
}
)
add_library
(
${
INTERNAL_EMBED_LIB
}
OBJECT
"
${
SRC_FILE
}
"
)
add_library
(
${
INTERNAL_EMBED_LIB
}
OBJECT
${
EMBED_FILES
}
)
if
(
EMBED_USE STREQUAL
"CArrays"
)
target_sources
(
${
INTERNAL_EMBED_LIB
}
PRIVATE
${
OUTPUT_FILES
}
)
endif
()
target_include_directories
(
${
INTERNAL_EMBED_LIB
}
PRIVATE
"
${
EMBED_DIR
}
/include"
)
target_include_directories
(
${
INTERNAL_EMBED_LIB
}
PRIVATE
"
${
EMBED_DIR
}
/include"
)
target_compile_options
(
${
INTERNAL_EMBED_LIB
}
PRIVATE -Wno-reserved-identifier -Wno-extern-initializer -Wno-missing-variable-declarations
)
target_compile_options
(
${
INTERNAL_EMBED_LIB
}
PRIVATE -Wno-reserved-identifier -Wno-extern-initializer -Wno-missing-variable-declarations
)
set_target_properties
(
${
INTERNAL_EMBED_LIB
}
PROPERTIES POSITION_INDEPENDENT_CODE On
)
set_target_properties
(
${
INTERNAL_EMBED_LIB
}
PROPERTIES POSITION_INDEPENDENT_CODE On
)
add_library
(
${
EMBED_NAME
}
INTERFACE
)
add_library
(
${
EMBED_NAME
}
INTERFACE
)
if
(
EMBED_USE
_
LD
)
if
(
EMBED_USE
STREQUAL
"
LD
"
)
target_sources
(
${
EMBED_NAME
}
INTERFACE
${
OUTPUT_FILES
}
)
target_sources
(
${
EMBED_NAME
}
INTERFACE
${
OUTPUT_FILES
}
)
else
()
endif
()
target_sources
(
${
INTERNAL_EMBED_LIB
}
PRIVATE
${
OUTPUT_FILES
}
)
if
(
EMBED_USE STREQUAL
"RC"
)
target_link_libraries
(
${
EMBED_NAME
}
INTERFACE $<TARGET_OBJECTS:
${
INTERNAL_EMBED_LIB
}
>
)
endif
()
endif
()
target_sources
(
${
EMBED_NAME
}
INTERFACE $<TARGET_OBJECTS:
${
INTERNAL_EMBED_LIB
}
>
)
target_sources
(
${
EMBED_NAME
}
INTERFACE $<TARGET_OBJECTS:
${
INTERNAL_EMBED_LIB
}
>
)
target_include_directories
(
${
EMBED_NAME
}
INTERFACE
"
${
EMBED_DIR
}
/include"
)
target_include_directories
(
${
EMBED_NAME
}
INTERFACE
"
${
EMBED_DIR
}
/include"
)
endfunction
()
endfunction
()
docs/.sphinx/requirements.txt
View file @
c6ec6638
...
@@ -21,7 +21,7 @@ charset-normalizer==3.1.0
...
@@ -21,7 +21,7 @@ charset-normalizer==3.1.0
# via requests
# via requests
click==8.1.3
click==8.1.3
# via sphinx-external-toc
# via sphinx-external-toc
cryptography==41.0.
4
cryptography==41.0.
6
# via pyjwt
# via pyjwt
deprecated==1.2.13
deprecated==1.2.13
# via pygithub
# via pygithub
...
@@ -89,7 +89,7 @@ requests==2.28.2
...
@@ -89,7 +89,7 @@ requests==2.28.2
# via
# via
# pygithub
# pygithub
# sphinx
# sphinx
rocm-docs-core==0.2
7
.0
rocm-docs-core==0.2
9
.0
# via -r requirements.in
# via -r requirements.in
smmap==5.0.0
smmap==5.0.0
# via gitdb
# via gitdb
...
...
examples/README.md
View file @
c6ec6638
...
@@ -7,3 +7,4 @@ This directory contains examples of common use cases for MIGraphX.
...
@@ -7,3 +7,4 @@ This directory contains examples of common use cases for MIGraphX.
-
[
MIGraphX usage and utilities
](
./migraphx
)
-
[
MIGraphX usage and utilities
](
./migraphx
)
-
[
Vision inference examples
](
./vision
)
-
[
Vision inference examples
](
./vision
)
-
[
Natural language inference examples
](
./nlp
)
-
[
Natural language inference examples
](
./nlp
)
-
[
Diffusion inference examples
](
./diffusion
)
examples/diffusion/README.md
0 → 100644
View file @
c6ec6638
# Diffusion Inference Examples
-
[
Python Stable Diffusion 2.1
](
./python_stable_diffusion_21
)
examples/diffusion/python_stable_diffusion_21/README.md
0 → 100644
View file @
c6ec6638
# Stable Diffusion 2.1
This version was tested with
[
rocm 5.7
](
https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/tree/rocm-5.7.0
)
revision.
## Jupyter notebook
There is a dedicated step-by-step notebook. See
[
sd21.ipynb
](
./sd21.ipynb
)
## Console application
To run the console application, follow these steps below.
Setup python environment
```
bash
# this will require the python venv to installed (e.g. apt install python3.8-venv)
python3
-m
venv sd_venv
.
sd_venv/bin/activate
```
Install dependencies
```
bash
pip
install
-r
requirements.txt
```
Use MIGraphX Python Module
```
bash
export
PYTHONPATH
=
/opt/rocm/lib:
$PYTHONPATH
```
Get models with optimum
```
bash
optimum-cli
export
onnx
--model
stabilityai/stable-diffusion-2-1 models/sd21-onnx
```
*Note: `models/sd21-onnx` will be used in the scripts.*
Run the text-to-image script with the following example prompt and seed:
```
bash
python txt2img.py
--prompt
"a photograph of an astronaut riding a horse"
--seed
13
--output
astro_horse.jpg
```
*Note: The first run will compile the models and cache them to make subsequent runs faster.*
The result should look like this:

## Gradio application
Note: requires
`Console application`
to work
Install gradio dependencies
```
bash
pip
install
-r
gradio_requirements.txt
```
Usage
```
bash
python gradio_app.py
```
This will load the models (which can take several minutes), and when the setup is ready, starts a server on
`http://127.0.0.1:7860`
.
examples/diffusion/python_stable_diffusion_21/example_output.jpg
0 → 100644
View file @
c6ec6638
28.8 KB
examples/diffusion/python_stable_diffusion_21/gradio_app.py
0 → 100644
View file @
c6ec6638
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
from
txt2img
import
StableDiffusionMGX
import
gradio
as
gr
def
main
():
# Note: This will load the models, which can take several minutes
sd
=
StableDiffusionMGX
()
def
gr_wrapper
(
prompt
,
negative_prompt
,
steps
,
seed
,
scale
):
result
=
sd
.
run
(
str
(
prompt
),
str
(
negative_prompt
),
int
(
steps
),
int
(
seed
),
float
(
scale
))
return
StableDiffusionMGX
.
convert_to_rgb_image
(
result
)
demo
=
gr
.
Interface
(
gr_wrapper
,
[
gr
.
Textbox
(
value
=
"a photograph of an astronaut riding a horse"
,
label
=
"Prompt"
),
gr
.
Textbox
(
value
=
""
,
label
=
"Negative prompt (Optional)"
),
gr
.
Slider
(
1
,
100
,
step
=
1
,
value
=
20
,
label
=
"Number of steps"
),
gr
.
Textbox
(
value
=
13
,
label
=
"Random seed"
),
gr
.
Slider
(
1
,
20
,
step
=
0.1
,
value
=
7.0
,
label
=
"Guidance scale"
),
],
"image"
,
)
demo
.
launch
()
if
__name__
==
"__main__"
:
main
()
examples/diffusion/python_stable_diffusion_21/gradio_reqirements.txt
0 → 100644
View file @
c6ec6638
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
-f requirements.txt
gradio
\ No newline at end of file
examples/diffusion/python_stable_diffusion_21/requirements.txt
0 → 100644
View file @
c6ec6638
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
accelerate
diffusers
optimum[onnxruntime]
transformers
\ No newline at end of file
examples/diffusion/python_stable_diffusion_21/sd21.ipynb
0 → 100644
View file @
c6ec6638
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# The MIT License (MIT)\n",
"#\n",
"# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.\n",
"#\n",
"# Permission is hereby granted, free of charge, to any person obtaining a copy\n",
"# of this software and associated documentation files (the 'Software'), to deal\n",
"# in the Software without restriction, including without limitation the rights\n",
"# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n",
"# copies of the Software, and to permit persons to whom the Software is\n",
"# furnished to do so, subject to the following conditions:\n",
"#\n",
"# The above copyright notice and this permission notice shall be included in\n",
"# all copies or substantial portions of the Software.\n",
"#\n",
"# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
"# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n",
"# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n",
"# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n",
"# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n",
"# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n",
"# THE SOFTWARE."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stable Diffusion 2.1\n",
"\n",
"The following example will show how to run `Stable Diffusion 2.1` with `MIGraphX`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Install the required dependencies."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install dependencies\n",
"!pip install optimum[onnxruntime] transformers diffusers accelerate"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We will use optimum to generate the onnx files."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# export models\n",
"!optimum-cli export onnx --model stabilityai/stable-diffusion-2-1 models/sd21-onnx"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now it is time to load these models with python.\n",
"\n",
"First, we make sure that MIGraphX module is found in the python path."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"mgx_lib_path = \"/opt/rocm/lib/\" # or \"/code/AMDMIGraphX/build/lib/\"\n",
"if mgx_lib_path not in sys.path:\n",
" sys.path.append(mgx_lib_path)\n",
"import migraphx as mgx"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, a helper method to load and cache the models.\n",
"\n",
"This will use the `models/sd21-onnx` path. If you changed it, make sure to update here as well."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"# helper for model loading\n",
"def load_mgx_model(name, shapes):\n",
" file = f\"models/sd21-onnx/{name}/model\"\n",
" print(f\"Loading {name} model from {file}\")\n",
" if os.path.isfile(f\"{file}.mxr\"):\n",
" print(f\"Found mxr, loading it...\")\n",
" model = mgx.load(f\"{file}.mxr\", format=\"msgpack\")\n",
" elif os.path.isfile(f\"{file}.onnx\"):\n",
" print(f\"Parsing from onnx file...\")\n",
" model = mgx.parse_onnx(f\"{file}.onnx\", map_input_dims=shapes)\n",
" model.compile(mgx.get_target(\"gpu\"))\n",
" print(f\"Saving {name} model to mxr file...\")\n",
" mgx.save(model, f\"{file}.mxr\", format=\"msgpack\")\n",
" else:\n",
" print(f\"No {name} model found. Please verify the path is correct and re-try, or re-download model.\")\n",
" os.exit(1)\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With that, we can load the models. This could take several minutes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text_encoder = load_mgx_model(\"text_encoder\", {\"input_ids\": [1, 77]})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"unet = load_mgx_model(\n",
" \"unet\", {\n",
" \"sample\": [1, 4, 64, 64],\n",
" \"encoder_hidden_states\": [1, 77, 1024],\n",
" \"timestep\": [1],\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vae = load_mgx_model(\"vae_decoder\", {\"latent_sample\": [1, 4, 64, 64]})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import the remaining packages."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from diffusers import EulerDiscreteScheduler\n",
"from transformers import CLIPTokenizer\n",
"import torch\n",
"import numpy as np\n",
"from tqdm.auto import tqdm\n",
"from PIL import Image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Time to load the scheduler and tokenizer from the original source."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_id = \"stabilityai/stable-diffusion-2-1\"\n",
"scheduler = EulerDiscreteScheduler.from_pretrained(model_id,\n",
" subfolder=\"scheduler\")\n",
"tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder=\"tokenizer\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we will define all the steps one by one, to make the last step short and simple.\n",
"\n",
"The first step will be to tokenize the user prompt. It will make a `(1, 77)` shaped `input_ids`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def tokenize(input):\n",
" return tokenizer([input],\n",
" padding=\"max_length\",\n",
" max_length=tokenizer.model_max_length,\n",
" truncation=True,\n",
" return_tensors=\"np\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Optional\n",
"test_tk = tokenize(\"test tokenizer to see the tokens\")\n",
"test_tk.input_ids.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We run the tokenized prompt through the `Text Encoder` model. It expects the `(1, 77)` data as `int32`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Optional\n",
"text_encoder.get_parameter_shapes()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_embeddings(input):\n",
" return np.array(\n",
" text_encoder.run({\"input_ids\": input.input_ids.astype(np.int32)\n",
" })[0]).astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Optional\n",
"test_emb = get_embeddings(tokenize(\"test tokenizer to see the tokens\"))\n",
"test_emb.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The other input of the model is latent representation (pure noise). It will be transformed into a 512x512 image later.\n",
"The last input will be the timestep."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generate_latents(seed):\n",
" return torch.randn(\n",
" (1, 4, 64, 64),\n",
" generator=torch.manual_seed(seed),\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Optional\n",
"test_latents = generate_latents(42)\n",
"latents.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we add two helpers to access and convert from torch to numpy with the proper datatype."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_scaled_sample(latents, t):\n",
" return scheduler.scale_model_input(latents, t).numpy().astype(np.float32)\n",
"\n",
"\n",
"def get_timestep(t):\n",
" return np.atleast_1d(t.numpy().astype(np.int64)) # convert 0D -> 1D"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The UNet model will be run in a loop. It will predict the noise residual."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Optional\n",
"unet.get_parameter_shapes()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def denoise(sample, embeddings, timestep):\n",
" return np.array(\n",
" unet.run({\n",
" \"sample\": sample,\n",
" \"encoder_hidden_states\": embeddings,\n",
" \"timestep\": timestep\n",
" })[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Helpers to do the classifier-free guidance and computing the previous noisy sample."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def perform_guidance(noise_pred_uncond, noise_pred_text, scale):\n",
" return noise_pred_uncond + scale * (noise_pred_text - noise_pred_uncond)\n",
"\n",
"def compute_previous(noise_pred, t, latents):\n",
" # compute the previous noisy sample x_t -> x_t-1\n",
" return scheduler.step(noise_pred, t, latents).prev_sample\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Scale and decode the image latents with VAE."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def scale_denoised(latents):\n",
" return 1 / 0.18215 * latents\n",
"\n",
"\n",
"def decode(latents):\n",
" return np.array(\n",
" vae.run({\"latent_sample\": latents.numpy().astype(np.float32)})[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And lastly, we need to convert it to an image to display or save."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def convert_to_rgb_image(image):\n",
" image = np.clip(image / 2 + 0.5, 0, 1)\n",
" image = np.transpose(image, (0, 2, 3, 1))\n",
" images = (image * 255).round().astype(\"uint8\")\n",
" return Image.fromarray(images[0])\n",
"\n",
"def save_image(pil_image, filename=\"output.png\"):\n",
" pil_image.save(filename, format=\"png\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Feel free to play around with these params."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"prompt = \"a photograph of an astronaut riding a horse\"\n",
"negative_prompt = \"\"\n",
"steps = 20\n",
"seed = 13\n",
"scale = 7.0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And now, to put everything together and run the whole pipeline:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"scheduler.set_timesteps(steps)\n",
"\n",
"text_input, uncond_input = tokenize(prompt), tokenize(negative_prompt)\n",
"text_embeddings, uncond_embeddings = get_embeddings(\n",
" text_input), get_embeddings(uncond_input)\n",
"latents = generate_latents(seed) * scheduler.init_noise_sigma\n",
"\n",
"for t in tqdm(scheduler.timesteps):\n",
" sample = get_scaled_sample(latents, t)\n",
" timestep = get_timestep(t)\n",
"\n",
" noise_pred_uncond = denoise(sample, uncond_embeddings, timestep)\n",
" noise_pred_text = denoise(sample, text_embeddings, timestep)\n",
"\n",
" noise_pred = perform_guidance(noise_pred_uncond, noise_pred_text, scale)\n",
" latents = compute_previous(torch.from_numpy(noise_pred), t, latents)\n",
"\n",
"latents = scale_denoised(latents)\n",
"result = decode(latents)\n",
"image = convert_to_rgb_image(result)\n",
"\n",
"# show the image\n",
"image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you like the generated image, save it with the following:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_image(image, \"output.png\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "sd_venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
examples/diffusion/python_stable_diffusion_21/txt2img.py
0 → 100644
View file @
c6ec6638
# The MIT License (MIT)
#
# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the 'Software'), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from
argparse
import
ArgumentParser
from
diffusers
import
EulerDiscreteScheduler
from
transformers
import
CLIPTokenizer
from
PIL
import
Image
import
migraphx
as
mgx
import
numpy
as
np
import
os
import
torch
import
time
from
functools
import
wraps
# measurement helper
def
measure
(
fn
):
@
wraps
(
fn
)
def
measure_ms
(
*
args
,
**
kwargs
):
start_time
=
time
.
perf_counter_ns
()
result
=
fn
(
*
args
,
**
kwargs
)
end_time
=
time
.
perf_counter_ns
()
print
(
f
"Elapsed time:
{
(
end_time
-
start_time
)
*
1e-6
:.
4
f
}
ms
\n
"
)
return
result
return
measure_ms
def
get_args
():
parser
=
ArgumentParser
()
parser
.
add_argument
(
"-s"
,
"--seed"
,
type
=
int
,
default
=
42
,
help
=
"Random seed"
,
)
parser
.
add_argument
(
"-t"
,
"--steps"
,
type
=
int
,
default
=
20
,
help
=
"Number of steps"
,
)
parser
.
add_argument
(
"-p"
,
"--prompt"
,
type
=
str
,
required
=
True
,
help
=
"Prompt"
,
)
parser
.
add_argument
(
"-n"
,
"--negative-prompt"
,
type
=
str
,
default
=
""
,
help
=
"Negative prompt"
,
)
parser
.
add_argument
(
"--scale"
,
type
=
float
,
default
=
7.0
,
help
=
"Guidance scale"
,
)
parser
.
add_argument
(
"-o"
,
"--output"
,
type
=
str
,
default
=
None
,
help
=
"Output name"
,
)
return
parser
.
parse_args
()
class
StableDiffusionMGX
():
def
__init__
(
self
):
model_id
=
"stabilityai/stable-diffusion-2-1"
print
(
f
"Using
{
model_id
}
"
)
print
(
"Creating EulerDiscreteScheduler scheduler"
)
self
.
scheduler
=
EulerDiscreteScheduler
.
from_pretrained
(
model_id
,
subfolder
=
"scheduler"
)
print
(
"Creating CLIPTokenizer tokenizer..."
)
self
.
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
model_id
,
subfolder
=
"tokenizer"
)
print
(
"Load models..."
)
self
.
vae
=
StableDiffusionMGX
.
load_mgx_model
(
"vae_decoder"
,
{
"latent_sample"
:
[
1
,
4
,
64
,
64
]})
self
.
text_encoder
=
StableDiffusionMGX
.
load_mgx_model
(
"text_encoder"
,
{
"input_ids"
:
[
1
,
77
]})
self
.
unet
=
StableDiffusionMGX
.
load_mgx_model
(
"unet"
,
{
"sample"
:
[
1
,
4
,
64
,
64
],
"encoder_hidden_states"
:
[
1
,
77
,
1024
],
"timestep"
:
[
1
],
})
def
run
(
self
,
prompt
,
negative_prompt
,
steps
,
seed
,
scale
):
# need to set this for each run
self
.
scheduler
.
set_timesteps
(
steps
)
print
(
"Tokenizing prompt..."
)
text_input
=
self
.
tokenize
(
prompt
)
print
(
"Creating text embeddings for prompt..."
)
text_embeddings
=
self
.
get_embeddings
(
text_input
)
print
(
"Tokenizing negative prompt..."
)
uncond_input
=
self
.
tokenize
(
negative_prompt
)
print
(
"Creating text embeddings for negative prompt..."
)
uncond_embeddings
=
self
.
get_embeddings
(
uncond_input
)
print
(
f
"Creating random input data (
{
1
}
x
{
4
}
x
{
64
}
x
{
64
}
) (latents) with seed=
{
seed
}
..."
)
latents
=
torch
.
randn
((
1
,
4
,
64
,
64
),
generator
=
torch
.
manual_seed
(
seed
))
print
(
"Apply initial noise sigma
\n
"
)
latents
=
latents
*
self
.
scheduler
.
init_noise_sigma
print
(
"Running denoising loop..."
)
for
step
,
t
in
enumerate
(
self
.
scheduler
.
timesteps
):
print
(
f
"#
{
step
}
/
{
len
(
self
.
scheduler
.
timesteps
)
}
step"
)
latents
=
self
.
denoise_step
(
text_embeddings
,
uncond_embeddings
,
latents
,
t
,
scale
)
print
(
"Scale denoised result..."
)
latents
=
1
/
0.18215
*
latents
print
(
"Decode denoised result..."
)
image
=
self
.
decode
(
latents
)
return
image
@
staticmethod
@
measure
def
load_mgx_model
(
name
,
shapes
):
file
=
f
"models/sd21-onnx/
{
name
}
/model"
print
(
f
"Loading
{
name
}
model from
{
file
}
"
)
if
os
.
path
.
isfile
(
f
"
{
file
}
.mxr"
):
print
(
"Found mxr, loading it..."
)
model
=
mgx
.
load
(
f
"
{
file
}
.mxr"
,
format
=
"msgpack"
)
elif
os
.
path
.
isfile
(
f
"
{
file
}
.onnx"
):
print
(
"Parsing from onnx file..."
)
model
=
mgx
.
parse_onnx
(
f
"
{
file
}
.onnx"
,
map_input_dims
=
shapes
)
model
.
compile
(
mgx
.
get_target
(
"gpu"
))
print
(
f
"Saving
{
name
}
model to mxr file..."
)
mgx
.
save
(
model
,
f
"
{
file
}
.mxr"
,
format
=
"msgpack"
)
else
:
print
(
f
"No
{
name
}
model found. Please download it and re-try."
)
os
.
exit
(
1
)
return
model
@
measure
def
tokenize
(
self
,
input
):
return
self
.
tokenizer
([
input
],
padding
=
"max_length"
,
max_length
=
self
.
tokenizer
.
model_max_length
,
truncation
=
True
,
return_tensors
=
"np"
)
@
measure
def
get_embeddings
(
self
,
input
):
return
np
.
array
(
self
.
text_encoder
.
run
(
{
"input_ids"
:
input
.
input_ids
.
astype
(
np
.
int32
)})[
0
]).
astype
(
np
.
float32
)
@
staticmethod
def
convert_to_rgb_image
(
image
):
image
=
np
.
clip
(
image
/
2
+
0.5
,
0
,
1
)
image
=
np
.
transpose
(
image
,
(
0
,
2
,
3
,
1
))
images
=
(
image
*
255
).
round
().
astype
(
"uint8"
)
return
Image
.
fromarray
(
images
[
0
])
@
staticmethod
def
save_image
(
pil_image
,
filename
=
"output.png"
):
pil_image
.
save
(
filename
)
@
measure
def
denoise_step
(
self
,
text_embeddings
,
uncond_embeddings
,
latents
,
t
,
scale
):
sample
=
self
.
scheduler
.
scale_model_input
(
latents
,
t
).
numpy
().
astype
(
np
.
float32
)
timestep
=
np
.
atleast_1d
(
t
.
numpy
().
astype
(
np
.
int64
))
# convert 0D -> 1D
noise_pred_uncond
=
np
.
array
(
self
.
unet
.
run
({
"sample"
:
sample
,
"encoder_hidden_states"
:
uncond_embeddings
,
"timestep"
:
timestep
})[
0
])
noise_pred_text
=
np
.
array
(
self
.
unet
.
run
({
"sample"
:
sample
,
"encoder_hidden_states"
:
text_embeddings
,
"timestep"
:
timestep
})[
0
])
# perform guidance
noise_pred
=
noise_pred_uncond
+
scale
*
(
noise_pred_text
-
noise_pred_uncond
)
# compute the previous noisy sample x_t -> x_t-1
return
self
.
scheduler
.
step
(
torch
.
from_numpy
(
noise_pred
),
t
,
latents
).
prev_sample
@
measure
def
decode
(
self
,
latents
):
return
np
.
array
(
self
.
vae
.
run
({
"latent_sample"
:
latents
.
numpy
().
astype
(
np
.
float32
)})[
0
])
if
__name__
==
"__main__"
:
args
=
get_args
()
sd
=
StableDiffusionMGX
()
result
=
sd
.
run
(
args
.
prompt
,
args
.
negative_prompt
,
args
.
steps
,
args
.
seed
,
args
.
scale
)
print
(
"Convert result to rgb image..."
)
image
=
StableDiffusionMGX
.
convert_to_rgb_image
(
result
)
filename
=
args
.
output
if
args
.
output
else
f
"output_s
{
args
.
seed
}
_t
{
args
.
steps
}
.png"
StableDiffusionMGX
.
save_image
(
image
,
args
.
output
)
print
(
f
"Image saved to
{
filename
}
"
)
src/CMakeLists.txt
View file @
c6ec6638
...
@@ -28,7 +28,7 @@ include(ROCMInstallTargets)
...
@@ -28,7 +28,7 @@ include(ROCMInstallTargets)
include
(
ROCMPackageConfigHelpers
)
include
(
ROCMPackageConfigHelpers
)
include
(
RegisterOp
)
include
(
RegisterOp
)
include
(
CheckCXXLinkerFlag
)
include
(
CheckCXXLinkerFlag
)
include
(
CheckCXXSourceCompiles
)
add_library
(
migraphx
add_library
(
migraphx
adjust_allocation.cpp
adjust_allocation.cpp
...
@@ -104,6 +104,12 @@ add_library(migraphx
...
@@ -104,6 +104,12 @@ add_library(migraphx
value.cpp
value.cpp
verify_args.cpp
verify_args.cpp
)
)
if
(
WIN32
)
# Due to compilation crashing, we need to use type-erased matchers on Windows.
target_compile_definitions
(
migraphx PUBLIC MIGRAPHX_USE_TYPE_ERASED_MATCHERS=1
)
endif
()
configure_file
(
version.h.in include/migraphx/version.h
)
configure_file
(
version.h.in include/migraphx/version.h
)
rocm_set_soversion
(
migraphx
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx
${
MIGRAPHX_SO_VERSION
}
)
function
(
register_migraphx_ops
)
function
(
register_migraphx_ops
)
...
@@ -215,6 +221,8 @@ register_migraphx_ops(
...
@@ -215,6 +221,8 @@ register_migraphx_ops(
scatternd_add
scatternd_add
scatternd_mul
scatternd_mul
scatternd_none
scatternd_none
scatternd_max
scatternd_min
select_module
select_module
sigmoid
sigmoid
sign
sign
...
@@ -247,17 +255,61 @@ rocm_install_targets(
...
@@ -247,17 +255,61 @@ rocm_install_targets(
${
CMAKE_CURRENT_BINARY_DIR
}
/include
${
CMAKE_CURRENT_BINARY_DIR
}
/include
)
)
if
(
NOT WIN32
)
check_cxx_linker_flag
(
-lstdc++fs HAS_LIB_STD_FILESYSTEM
)
check_cxx_linker_flag
(
-lstdc++fs HAS_LIB_STD_FILESYSTEM
)
if
(
HAS_LIB_STD_FILESYSTEM
)
if
(
HAS_LIB_STD_FILESYSTEM
)
target_link_libraries
(
migraphx PRIVATE -lstdc++fs
)
target_link_libraries
(
migraphx PRIVATE -lstdc++fs
)
endif
()
target_link_libraries
(
migraphx PRIVATE -ldl
)
endif
()
endif
()
target_link_libraries
(
migraphx PRIVATE -ldl
)
target_include_directories
(
migraphx SYSTEM PUBLIC $<BUILD_INTERFACE:
${
HALF_INCLUDE_DIR
}
>
)
target_include_directories
(
migraphx SYSTEM PUBLIC $<BUILD_INTERFACE:
${
HALF_INCLUDE_DIR
}
>
)
target_link_libraries
(
migraphx PUBLIC Threads::Threads
)
target_link_libraries
(
migraphx PUBLIC Threads::Threads
)
function
(
check_execution_par RESULT
)
set
(
CMAKE_REQUIRED_LIBRARIES
${
ARGN
}
)
set
(
CMAKE_REQUIRED_FLAGS
)
if
(
NOT MSVC
)
set
(
CMAKE_REQUIRED_FLAGS
"-std=c++17"
)
endif
()
string
(
MD5 _flags_hash
"
${
CMAKE_REQUIRED_FLAGS
}
${
CMAKE_REQUIRED_LIBRARIES
}
"
)
set
(
_source
"
#include <execution>
int main() {
int* i = nullptr;
std::sort(std::execution::par, i, i);
}
"
)
check_cxx_source_compiles
(
"
${
_source
}
"
_has_execution_
${
_flags_hash
}
)
set
(
${
RESULT
}
${
_has_execution_
${
_flags_hash
}}
PARENT_SCOPE
)
endfunction
()
set
(
MIGRAPHX_HAS_EXECUTORS_DEFAULT Off
)
find_package
(
TBB QUIET
)
if
(
TBB_FOUND
)
check_execution_par
(
TBB_HAS_EXECUTION_PAR TBB::tbb
)
if
(
TBB_HAS_EXECUTION_PAR
)
target_link_libraries
(
migraphx PUBLIC TBB::tbb
)
set
(
MIGRAPHX_HAS_EXECUTORS_DEFAULT On
)
message
(
STATUS
"Using TBB for parallel execution"
)
endif
()
else
()
check_execution_par
(
HAS_EXECUTION_PAR
)
if
(
HAS_EXECUTION_PAR
)
set
(
MIGRAPHX_HAS_EXECUTORS_DEFAULT On
)
endif
()
endif
()
option
(
MIGRAPHX_HAS_EXECUTORS
"C++ supports parallel executors"
${
MIGRAPHX_HAS_EXECUTORS_DEFAULT
}
)
if
(
MIGRAPHX_HAS_EXECUTORS
)
message
(
"Parallel STL enabled"
)
target_compile_definitions
(
migraphx PUBLIC MIGRAPHX_HAS_EXECUTORS=1
)
else
()
message
(
"Parallel STL disabled"
)
target_compile_definitions
(
migraphx PUBLIC MIGRAPHX_HAS_EXECUTORS=0
)
endif
()
find_package
(
nlohmann_json 3.8.0 REQUIRED
)
find_package
(
nlohmann_json 3.8.0 REQUIRED
)
target_link_libraries
(
migraphx PRIVATE nlohmann_json::nlohmann_json
)
target_link_libraries
(
migraphx PRIVATE nlohmann_json::nlohmann_json
)
migraphx_generate_export_header
(
migraphx
)
migraphx_generate_export_header
(
migraphx
)
...
@@ -275,8 +327,6 @@ target_link_libraries(migraphx INTERFACE $<BUILD_INTERFACE:msgpackc-cxx>)
...
@@ -275,8 +327,6 @@ target_link_libraries(migraphx INTERFACE $<BUILD_INTERFACE:msgpackc-cxx>)
add_library
(
migraphx_all_targets INTERFACE
)
add_library
(
migraphx_all_targets INTERFACE
)
set
(
PACKAGE_DEPENDS
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
add_subdirectory
(
driver
)
add_subdirectory
(
driver
)
add_subdirectory
(
onnx
)
add_subdirectory
(
onnx
)
...
...
src/api/include/migraphx/migraphx.h
View file @
c6ec6638
...
@@ -44,7 +44,8 @@
...
@@ -44,7 +44,8 @@
m(int32_type, int32_t) \
m(int32_type, int32_t) \
m(int64_type, int64_t) \
m(int64_type, int64_t) \
m(uint32_type, uint32_t) \
m(uint32_type, uint32_t) \
m(uint64_type, uint64_t)
m(uint64_type, uint64_t) \
m(fp8e4m3fnuz_type, migraphx::fp8::fp8e4m3fnuz)
// clang-format on
// clang-format on
#ifdef __cplusplus
#ifdef __cplusplus
...
...
src/driver/argument_parser.hpp
View file @
c6ec6638
...
@@ -105,6 +105,8 @@ inline std::ostream& operator<<(std::ostream& os, const color& c)
...
@@ -105,6 +105,8 @@ inline std::ostream& operator<<(std::ostream& os, const color& c)
static
const
bool
use_color
=
isatty
(
STDOUT_FILENO
)
!=
0
;
static
const
bool
use_color
=
isatty
(
STDOUT_FILENO
)
!=
0
;
if
(
use_color
)
if
(
use_color
)
return
os
<<
"
\033
["
<<
static_cast
<
std
::
size_t
>
(
c
)
<<
"m"
;
return
os
<<
"
\033
["
<<
static_cast
<
std
::
size_t
>
(
c
)
<<
"m"
;
#else
(
void
)
c
;
#endif
#endif
return
os
;
return
os
;
}
}
...
...
src/dynamic_loader.cpp
View file @
c6ec6638
...
@@ -130,6 +130,30 @@ struct dynamic_loader_impl
...
@@ -130,6 +130,30 @@ struct dynamic_loader_impl
tmp_dir
temp
;
tmp_dir
temp
;
};
};
fs
::
path
dynamic_loader
::
path
(
void
*
address
)
{
HMODULE
module
=
nullptr
;
if
(
GetModuleHandleEx
(
GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
|
GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT
,
static_cast
<
LPCSTR
>
(
address
),
&
module
)
==
0
)
{
auto
err
=
GetLastError
();
MIGRAPHX_THROW
(
"Unable to obtain module handle, error = "
+
std
::
to_string
(
err
));
}
TCHAR
buffer
[
MAX_PATH
];
if
(
GetModuleFileName
(
module
,
buffer
,
sizeof
(
buffer
))
==
0
)
{
auto
err
=
GetLastError
();
MIGRAPHX_THROW
(
"Unable to read module file path, error = "
+
std
::
to_string
(
err
));
}
if
(
GetLastError
()
==
ERROR_INSUFFICIENT_BUFFER
)
{
MIGRAPHX_THROW
(
"Buffer too small ("
+
std
::
to_string
(
MAX_PATH
)
+
") to hold the path"
);
}
return
{
buffer
};
}
#endif
#endif
optional
<
dynamic_loader
>
dynamic_loader
::
try_load
(
const
fs
::
path
&
p
)
optional
<
dynamic_loader
>
dynamic_loader
::
try_load
(
const
fs
::
path
&
p
)
...
...
src/fuse_pointwise.cpp
View file @
c6ec6638
...
@@ -219,9 +219,8 @@ struct find_pointwise_reshape_pointwise
...
@@ -219,9 +219,8 @@ struct find_pointwise_reshape_pointwise
auto
reshape_input
=
[
&
](
const
auto
&
ins_to_insert
)
{
auto
reshape_input
=
[
&
](
const
auto
&
ins_to_insert
)
{
return
[
&
](
auto
input
)
{
return
[
&
](
auto
input
)
{
auto
c
=
m
.
insert_instruction
(
ins_to_insert
,
make_op
(
"contiguous"
),
input
);
return
m
.
insert_instruction
(
return
m
.
insert_instruction
(
ins_to_insert
,
make_op
(
"reshape"
,
{{
"dims"
,
cd
.
dims
}}),
c
);
ins_to_insert
,
make_op
(
"reshape"
,
{{
"dims"
,
cd
.
dims
}}),
input
);
};
};
};
};
auto
x_inputs
=
x_ins
->
inputs
();
auto
x_inputs
=
x_ins
->
inputs
();
...
...
Prev
1
2
3
4
5
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment