Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
fengzch-das
nunchaku
Commits
08204531
Commit
08204531
authored
Nov 10, 2024
by
sxtyzhangzk
Committed by
Zhekai Zhang
Nov 10, 2024
Browse files
[major] fix build on windows
parent
b1fec976
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
37 additions
and
36 deletions
+37
-36
src/kernels/gemm_w4a4.cu
src/kernels/gemm_w4a4.cu
+37
-36
No files found.
src/kernels/gemm_w4a4.cu
View file @
08204531
...
@@ -1449,6 +1449,7 @@ public:
...
@@ -1449,6 +1449,7 @@ public:
const
int
laneId
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
laneId
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
warpId
=
threadIdx
.
x
/
WARP_SIZE
;
const
int
warpId
=
threadIdx
.
x
/
WARP_SIZE
;
if
constexpr
(
rank
>
0
)
{
lora_act16_warp
lora_act
=
load_lora_act
(
act
+
warpId
*
(
LORA_M_TILES
*
LORA_R_TILES
*
8
*
WARP_SIZE
),
scales
);
lora_act16_warp
lora_act
=
load_lora_act
(
act
+
warpId
*
(
LORA_M_TILES
*
LORA_R_TILES
*
8
*
WARP_SIZE
),
scales
);
lora_wgt_warp
lora_wgt
=
load_lora_wgt
(
wgt
);
lora_wgt_warp
lora_wgt
=
load_lora_wgt
(
wgt
);
for
(
int
m
=
0
;
m
<
LORA_M_TILES
;
m
++
)
{
for
(
int
m
=
0
;
m
<
LORA_M_TILES
;
m
++
)
{
...
@@ -1463,6 +1464,7 @@ public:
...
@@ -1463,6 +1464,7 @@ public:
}
}
}
}
}
}
}
__device__
__forceinline__
__device__
__forceinline__
void
operator
()(
const
BlockInfo
binfo
,
fpsum_warp
&
fpsum
,
half_t
*
out
,
int
M
,
int
N
,
int
K
,
Arguments
args
)
{
void
operator
()(
const
BlockInfo
binfo
,
fpsum_warp
&
fpsum
,
half_t
*
out
,
int
M
,
int
N
,
int
K
,
Arguments
args
)
{
...
@@ -1498,6 +1500,7 @@ public:
...
@@ -1498,6 +1500,7 @@ public:
const
int
laneId
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
laneId
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
warpId
=
threadIdx
.
x
/
WARP_SIZE
;
const
int
warpId
=
threadIdx
.
x
/
WARP_SIZE
;
if
constexpr
(
rank
>
0
)
{
lora_act_warp
lora_act
;
lora_act_warp
lora_act
;
lora_act
.
fill
(
packed_f32psum_t
::
zeros
());
lora_act
.
fill
(
packed_f32psum_t
::
zeros
());
...
@@ -1528,12 +1531,10 @@ public:
...
@@ -1528,12 +1531,10 @@ public:
// }
// }
}
}
reduce_lora_act
(
act
+
warpId
*
(
LORA_M_TILES
*
LORA_R_TILES
*
8
*
WARP_SIZE
),
lora_act
);
reduce_lora_act
(
act
+
warpId
*
(
LORA_M_TILES
*
LORA_R_TILES
*
8
*
WARP_SIZE
),
lora_act
);
// unused_var(dummy, alwaysfalse);
// unused_var(dummy, alwaysfalse);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment