Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
0a72e4df
Unverified
Commit
0a72e4df
authored
Jul 16, 2021
by
Chao Liu
Committed by
GitHub
Jul 16, 2021
Browse files
Change initialization method of tensor for iGEMM (#49)
* change init method
parent
58a80570
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
103 additions
and
81 deletions
+103
-81
driver/conv_bwd_data_driver_v2.cpp
driver/conv_bwd_data_driver_v2.cpp
+31
-19
driver/conv_driver_v2.cpp
driver/conv_driver_v2.cpp
+31
-27
driver/conv_driver_v2_olc.cpp
driver/conv_driver_v2_olc.cpp
+31
-27
driver/include/host_tensor_generator.hpp
driver/include/host_tensor_generator.hpp
+10
-8
No files found.
driver/conv_bwd_data_driver_v2.cpp
View file @
0a72e4df
...
@@ -179,26 +179,38 @@ int main(int argc, char* argv[])
...
@@ -179,26 +179,38 @@ int main(int argc, char* argv[])
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
if
(
do_verification
)
switch
(
init_method
)
{
{
switch
(
init_method
)
case
0
:
{
// no initialization
case
0
:
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
1
:
out
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
out
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
1
:
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
2
:
out
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
out
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
2
:
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
3
:
out
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
out
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
default:
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
4
:
out
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
out
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
}
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
break
;
case
5
:
out
.
GenerateTensorValue
(
GeneratorTensor_3
<
float
>
{
0.0
,
1.0
},
num_thread
);
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
float
>
{
-
0.5
,
0.5
},
num_thread
);
break
;
default:
out
.
GenerateTensorValue
(
GeneratorTensor_2
{
1
,
5
},
num_thread
);
auto
gen_wei
=
[](
auto
...
is
)
{
return
GeneratorTensor_2
{
1
,
5
}(
is
...)
*
GeneratorTensor_Checkboard
{}(
is
...);
};
wei
.
GenerateTensorValue
(
gen_wei
,
num_thread
);
}
}
auto
f_make_for_device_nchw
=
[
&
]()
{
auto
f_make_for_device_nchw
=
[
&
]()
{
...
...
driver/conv_driver_v2.cpp
View file @
0a72e4df
...
@@ -205,34 +205,38 @@ int main(int argc, char* argv[])
...
@@ -205,34 +205,38 @@ int main(int argc, char* argv[])
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
if
(
do_verification
)
switch
(
init_method
)
{
{
switch
(
init_method
)
case
0
:
{
// no initialization
case
0
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
1
:
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
1
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
2
:
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
2
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
3
:
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
3
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
4
:
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
default:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
1
,
5
},
num_thread
);
case
5
:
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
float
>
{
0.0
,
1.0
},
num_thread
);
auto
gen_wei
=
[](
auto
...
is
)
{
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
float
>
{
-
0.5
,
0.5
},
num_thread
);
return
GeneratorTensor_2
{
1
,
5
}(
is
...)
*
GeneratorTensor_Checkboard
{}(
is
...);
break
;
};
default:
wei
.
GenerateTensorValue
(
gen_wei
,
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
1
,
5
},
num_thread
);
}
auto
gen_wei
=
[](
auto
...
is
)
{
return
GeneratorTensor_2
{
1
,
5
}(
is
...)
*
GeneratorTensor_Checkboard
{}(
is
...);
};
wei
.
GenerateTensorValue
(
gen_wei
,
num_thread
);
}
}
auto
f_make_for_device_nchw
=
[
&
]()
{
auto
f_make_for_device_nchw
=
[
&
]()
{
...
...
driver/conv_driver_v2_olc.cpp
View file @
0a72e4df
...
@@ -152,34 +152,38 @@ int main(int argc, char* argv[])
...
@@ -152,34 +152,38 @@ int main(int argc, char* argv[])
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
if
(
do_verification
)
switch
(
init_method
)
{
{
switch
(
init_method
)
case
0
:
{
// no initialization
case
0
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
1
:
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
1
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
2
:
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
2
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
3
:
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
case
3
:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
case
4
:
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
break
;
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
default:
break
;
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
1
,
5
},
num_thread
);
case
5
:
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
float
>
{
0.0
,
1.0
},
num_thread
);
auto
gen_wei
=
[](
auto
...
is
)
{
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
float
>
{
-
0.5
,
0.5
},
num_thread
);
return
GeneratorTensor_2
{
1
,
5
}(
is
...)
*
GeneratorTensor_Checkboard
{}(
is
...);
break
;
};
default:
wei
.
GenerateTensorValue
(
gen_wei
,
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
1
,
5
},
num_thread
);
}
auto
gen_wei
=
[](
auto
...
is
)
{
return
GeneratorTensor_2
{
1
,
5
}(
is
...)
*
GeneratorTensor_Checkboard
{}(
is
...);
};
wei
.
GenerateTensorValue
(
gen_wei
,
num_thread
);
}
}
auto
f_make_for_device_nchw
=
[
&
]()
{
auto
f_make_for_device_nchw
=
[
&
]()
{
...
...
driver/include/host_tensor_generator.hpp
View file @
0a72e4df
...
@@ -9,7 +9,7 @@ struct GeneratorTensor_1
...
@@ -9,7 +9,7 @@ struct GeneratorTensor_1
int
value
=
1
;
int
value
=
1
;
template
<
typename
...
Is
>
template
<
typename
...
Is
>
double
operator
()(
Is
...
is
)
float
operator
()(
Is
...
is
)
{
{
return
value
;
return
value
;
}
}
...
@@ -21,29 +21,31 @@ struct GeneratorTensor_2
...
@@ -21,29 +21,31 @@ struct GeneratorTensor_2
int
max_value
=
1
;
int
max_value
=
1
;
template
<
typename
...
Is
>
template
<
typename
...
Is
>
double
operator
()(
Is
...)
float
operator
()(
Is
...)
{
{
return
(
std
::
rand
()
%
(
max_value
-
min_value
))
+
min_value
;
return
(
std
::
rand
()
%
(
max_value
-
min_value
))
+
min_value
;
}
}
};
};
template
<
typename
T
>
struct
GeneratorTensor_3
struct
GeneratorTensor_3
{
{
T
min_value
=
0
;
T
max_value
=
1
;
template
<
typename
...
Is
>
template
<
typename
...
Is
>
double
operator
()(
Is
...
is
)
float
operator
()(
Is
...)
{
{
std
::
array
<
ck
::
index_t
,
sizeof
...(
Is
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
is
)...}};
float
tmp
=
float
(
std
::
rand
())
/
float
(
RAND_MAX
);
auto
f_acc
=
[](
auto
a
,
auto
b
)
{
return
10
*
a
+
b
;
};
return
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
ck
::
index_t
(
0
),
f_acc
);
return
min_value
+
tmp
*
(
max_value
-
min_value
);
}
}
};
};
struct
GeneratorTensor_Checkboard
struct
GeneratorTensor_Checkboard
{
{
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
double
operator
()(
Ts
...
Xs
)
const
float
operator
()(
Ts
...
Xs
)
const
{
{
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
return
std
::
accumulate
(
dims
.
begin
(),
return
std
::
accumulate
(
dims
.
begin
(),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment