Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
174c2d06
Commit
174c2d06
authored
Mar 22, 2023
by
Paul
Browse files
Some update and fixes
parent
4f053f22
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
4 deletions
+11
-4
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+3
-2
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+1
-1
tools/tune_ck.py
tools/tune_ck.py
+7
-1
No files found.
src/targets/gpu/fuse_ops.cpp
View file @
174c2d06
...
@@ -680,7 +680,7 @@ struct find_contiguous_tranpose_precompile
...
@@ -680,7 +680,7 @@ struct find_contiguous_tranpose_precompile
auto
matcher
()
const
auto
matcher
()
const
{
{
return
match
::
name
(
"gpu::contiguous"
)(
match
::
arg
(
0
)(
return
match
::
name
(
"gpu::contiguous"
)(
match
::
arg
(
0
)(
match
::
name
(
"transpose"
)(
match
::
name
(
"transpose"
)(
match
::
used_once
(),
match
::
arg
(
0
)(
match
::
name
(
"gpu::precompile_op"
)(
match
::
used_once
()).
bind
(
"op"
)))
match
::
arg
(
0
)(
match
::
name
(
"gpu::precompile_op"
)(
match
::
used_once
()).
bind
(
"op"
)))
.
bind
(
"transpose"
)));
.
bind
(
"transpose"
)));
}
}
...
@@ -694,11 +694,12 @@ struct find_contiguous_tranpose_precompile
...
@@ -694,11 +694,12 @@ struct find_contiguous_tranpose_precompile
auto
perm
=
transpose
->
get_operator
().
to_value
()[
"permutation"
].
to_vector
<
int64_t
>
();
auto
perm
=
transpose
->
get_operator
().
to_value
()[
"permutation"
].
to_vector
<
int64_t
>
();
auto
iperm
=
invert_permutation
(
perm
);
auto
iperm
=
invert_permutation
(
perm
);
auto
s
=
auto
s
=
shape
::
from_permutation
(
op_ins
->
get_shape
().
type
(),
op_ins
->
get_shape
().
lens
(),
i
perm
);
shape
::
from_permutation
(
op_ins
->
get_shape
().
type
(),
op_ins
->
get_shape
().
lens
(),
perm
);
// perm or iperm?
auto
v
=
op_ins
->
get_operator
().
to_value
();
auto
v
=
op_ins
->
get_operator
().
to_value
();
v
[
"output_shape"
]
=
to_value
(
s
);
v
[
"output_shape"
]
=
to_value
(
s
);
auto
new_op
=
make_op
(
"gpu::precompile_op"
,
v
);
auto
new_op
=
make_op
(
"gpu::precompile_op"
,
v
);
m
.
replace_instruction
(
op_ins
,
new_op
,
op_ins
->
inputs
(),
op_ins
->
module_inputs
());
m
.
replace_instruction
(
op_ins
,
new_op
,
op_ins
->
inputs
(),
op_ins
->
module_inputs
());
assert
(
ins
->
get_shape
()
==
transpose
->
get_shape
());
m
.
replace_instruction
(
ins
,
transpose
);
m
.
replace_instruction
(
ins
,
transpose
);
}
}
};
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
174c2d06
...
@@ -237,7 +237,7 @@ struct index
...
@@ -237,7 +237,7 @@ struct index
template
<
class
F
,
class
N
>
template
<
class
F
,
class
N
>
__device__
void
group_stride
(
N
n
,
F
f
)
const
__device__
void
group_stride
(
N
n
,
F
f
)
const
{
{
for_stride
(
group
,
n
,
ngroup
(),
f
);
for_stride
<
false
>
(
group
,
n
,
ngroup
(),
f
);
}
}
};
};
...
...
tools/tune_ck.py
View file @
174c2d06
...
@@ -21,10 +21,14 @@ def pretty_print(obj):
...
@@ -21,10 +21,14 @@ def pretty_print(obj):
def
run_driver
(
b
):
def
run_driver
(
b
):
print
(
b
)
print
(
b
)
with
tmp_file
(
lambda
tf
:
json
.
dump
(
b
,
tf
))
as
tf
:
with
tmp_file
(
lambda
tf
:
json
.
dump
(
b
,
tf
))
as
tf
:
if
not
os
.
path
.
exists
(
'./bin/gpu-driver'
):
print
(
"./bin/gpu-driver not found"
)
os
.
abort
()
cp
=
subprocess
.
run
(
'./bin/gpu-driver {}'
.
format
(
tf
),
cp
=
subprocess
.
run
(
'./bin/gpu-driver {}'
.
format
(
tf
),
capture_output
=
True
,
capture_output
=
True
,
check
=
True
,
shell
=
True
)
shell
=
True
)
print
(
cp
.
stderr
.
decode
())
cp
.
check_returncode
()
for
line
in
cp
.
stdout
.
decode
().
split
(
"
\n
"
):
for
line
in
cp
.
stdout
.
decode
().
split
(
"
\n
"
):
s
=
line
.
strip
()
s
=
line
.
strip
()
if
not
s
:
if
not
s
:
...
@@ -60,6 +64,8 @@ def benchmark_ck(config, tuning):
...
@@ -60,6 +64,8 @@ def benchmark_ck(config, tuning):
dtime
=
get_device_time
(
line
)
dtime
=
get_device_time
(
line
)
print
(
dtime
)
print
(
dtime
)
return
float
(
dtime
)
return
float
(
dtime
)
print
(
"Failed"
)
sys
.
exit
(
1
)
except
:
except
:
return
sys
.
float_info
.
max
return
sys
.
float_info
.
max
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment