Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
4f1244be
Commit
4f1244be
authored
Apr 07, 2025
by
xgqdut2016
Browse files
issue/130: use collapse
parent
23524591
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
26 deletions
+24
-26
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
+24
-26
No files found.
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
View file @
4f1244be
...
@@ -42,34 +42,32 @@ void calculate(
...
@@ -42,34 +42,32 @@ void calculate(
if
(
info
.
is_transed
)
{
if
(
info
.
is_transed
)
{
std
::
swap
(
a
,
b
);
std
::
swap
(
a
,
b
);
}
}
#pragma omp parallel for
#pragma omp parallel for collapse(3)
for
(
ptrdiff_t
index
=
0
;
index
<
ptrdiff_t
(
info
.
batch
*
info
.
m
*
info
.
n
);
++
index
)
{
for
(
size_t
i
=
0
;
i
<
info
.
batch
;
++
i
)
{
size_t
ind
=
index
;
for
(
size_t
m_
=
0
;
m_
<
info
.
m
;
++
m_
)
{
size_t
n_
=
ind
%
info
.
n
;
for
(
size_t
n_
=
0
;
n_
<
info
.
n
;
++
n_
)
{
ind
/=
info
.
n
;
auto
c_
=
reinterpret_cast
<
Tdata
*>
(
c
)
+
i
*
info
.
c_matrix
.
stride
+
m_
*
info
.
c_matrix
.
row_stride
+
n_
*
info
.
c_matrix
.
col_stride
;
size_t
m_
=
ind
%
info
.
m
;
float
sum
=
0
;
ind
/=
info
.
m
;
for
(
size_t
k_
=
0
;
k_
<
info
.
k
;
++
k_
)
{
size_t
i
=
ind
;
auto
a_
=
reinterpret_cast
<
const
Tdata
*>
(
a
)
+
i
*
info
.
a_matrix
.
stride
+
m_
*
info
.
a_matrix
.
row_stride
+
k_
*
info
.
a_matrix
.
col_stride
;
auto
c_
=
reinterpret_cast
<
Tdata
*>
(
c
)
+
i
*
info
.
c_matrix
.
stride
+
m_
*
info
.
c_matrix
.
row_stride
+
n_
*
info
.
c_matrix
.
col_stride
;
auto
b_
=
reinterpret_cast
<
const
Tdata
*>
(
b
)
+
i
*
info
.
b_matrix
.
stride
+
n_
*
info
.
b_matrix
.
col_stride
+
k_
*
info
.
b_matrix
.
row_stride
;
float
sum
=
0
;
if
constexpr
(
std
::
is_same
<
Tdata
,
fp16_t
>::
value
)
{
for
(
size_t
k_
=
0
;
k_
<
info
.
k
;
++
k_
)
{
sum
+=
utils
::
cast
<
float
>
(
*
a_
)
*
utils
::
cast
<
float
>
(
*
b_
);
auto
a_
=
reinterpret_cast
<
const
Tdata
*>
(
a
)
+
i
*
info
.
a_matrix
.
stride
+
m_
*
info
.
a_matrix
.
row_stride
+
k_
*
info
.
a_matrix
.
col_stride
;
}
else
{
auto
b_
=
reinterpret_cast
<
const
Tdata
*>
(
b
)
+
i
*
info
.
b_matrix
.
stride
+
n_
*
info
.
b_matrix
.
col_stride
+
k_
*
info
.
b_matrix
.
row_stride
;
sum
+=
*
a_
*
(
*
b_
);
if
constexpr
(
std
::
is_same
<
Tdata
,
fp16_t
>::
value
)
{
}
sum
+=
utils
::
cast
<
float
>
(
*
a_
)
*
utils
::
cast
<
float
>
(
*
b_
);
}
}
else
{
if
constexpr
(
std
::
is_same
<
Tdata
,
fp16_t
>::
value
)
{
sum
+=
*
a_
*
(
*
b_
);
if
(
beta
==
0
)
{
*
c_
=
utils
::
cast
<
fp16_t
>
(
alpha
*
sum
);
}
else
{
*
c_
=
utils
::
cast
<
fp16_t
>
(
beta
*
utils
::
cast
<
float
>
(
*
c_
)
+
alpha
*
sum
);
}
}
else
{
*
c_
=
beta
*
(
*
c_
)
+
alpha
*
sum
;
}
}
}
}
}
if
constexpr
(
std
::
is_same
<
Tdata
,
fp16_t
>::
value
)
{
if
(
beta
==
0
)
{
*
c_
=
utils
::
cast
<
fp16_t
>
(
alpha
*
sum
);
}
else
{
*
c_
=
utils
::
cast
<
fp16_t
>
(
beta
*
utils
::
cast
<
float
>
(
*
c_
)
+
alpha
*
sum
);
}
}
else
{
*
c_
=
beta
*
(
*
c_
)
+
alpha
*
sum
;
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment