Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
094b7d94
Unverified
Commit
094b7d94
authored
Feb 25, 2025
by
Henry Tsang
Committed by
GitHub
Feb 25, 2025
Browse files
[Kernel][Build/CI] Bump CUTLASS to 3.8 and add initializers for cutlass epilogues (#13797)
parent
e1fe7591
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
33 additions
and
29 deletions
+33
-29
CMakeLists.txt
CMakeLists.txt
+4
-4
csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
+14
-12
csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
+15
-13
No files found.
CMakeLists.txt
View file @
094b7d94
...
@@ -266,7 +266,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
...
@@ -266,7 +266,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
cutlass
cutlass
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
# Please keep this in sync with CUTLASS_REVISION line above.
# Please keep this in sync with CUTLASS_REVISION line above.
GIT_TAG v3.
7
.0
GIT_TAG v3.
8
.0
GIT_PROGRESS TRUE
GIT_PROGRESS TRUE
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
...
...
csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
View file @
094b7d94
...
@@ -122,8 +122,8 @@ struct ScaledEpilogue
...
@@ -122,8 +122,8 @@ struct ScaledEpilogue
auto
a_args
=
SUPER
::
template
args_from_tensor
<
ScaleA
,
float
>(
a_scales
);
auto
a_args
=
SUPER
::
template
args_from_tensor
<
ScaleA
,
float
>(
a_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
};
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
,
{},
{}
};
return
ArgumentType
{
a_args
,
evt0_args
};
return
ArgumentType
{
a_args
,
evt0_args
,
{}
};
}
}
};
};
...
@@ -167,8 +167,8 @@ struct ScaledEpilogueBias
...
@@ -167,8 +167,8 @@ struct ScaledEpilogueBias
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
bias_args
=
SUPER
::
template
args_from_tensor
<
Bias
,
ElementD
>(
bias
);
auto
bias_args
=
SUPER
::
template
args_from_tensor
<
Bias
,
ElementD
>(
bias
);
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
};
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
,
{},
{}
};
return
ArgumentType
{
a_args
,
evt0_args
,
bias_args
};
return
ArgumentType
{
a_args
,
evt0_args
,
bias_args
,
{}
};
}
}
};
};
...
@@ -230,9 +230,10 @@ struct ScaledEpilogueBiasAzp
...
@@ -230,9 +230,10 @@ struct ScaledEpilogueBiasAzp
auto
azp_adj_args
=
auto
azp_adj_args
=
SUPER
::
template
args_from_tensor
<
AzpWithAdj
,
int32_t
>(
azp_adj
);
SUPER
::
template
args_from_tensor
<
AzpWithAdj
,
int32_t
>(
azp_adj
);
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{{},
azp_adj_args
};
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{{},
azp_adj_args
,
{}};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
b_args
,
evt_azp_args
};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
};
b_args
,
evt_azp_args
,
{}};
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
,
{}};
}
}
};
};
...
@@ -309,10 +310,11 @@ struct ScaledEpilogueBiasAzpToken
...
@@ -309,10 +310,11 @@ struct ScaledEpilogueBiasAzpToken
auto
azp_adj_args
=
auto
azp_adj_args
=
SUPER
::
template
args_from_tensor
<
AzpAdj
,
int32_t
>(
azp_adj
);
SUPER
::
template
args_from_tensor
<
AzpAdj
,
int32_t
>(
azp_adj
);
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{
azp_args
,
azp_adj_args
};
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{
azp_args
,
azp_adj_args
,
{}};
typename
EVTComputeAcc
::
Arguments
evt_acc_args
{{},
evt_azp_args
};
typename
EVTComputeAcc
::
Arguments
evt_acc_args
{{},
evt_azp_args
,
{}};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
b_args
,
evt_acc_args
};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
};
b_args
,
evt_acc_args
,
{}};
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
,
{}};
}
}
};
};
...
...
csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
View file @
094b7d94
...
@@ -146,8 +146,8 @@ struct ScaledEpilogue
...
@@ -146,8 +146,8 @@ struct ScaledEpilogue
auto
a_args
=
SUPER
::
template
args_from_tensor
<
ScaleA
,
float
>(
a_scales
);
auto
a_args
=
SUPER
::
template
args_from_tensor
<
ScaleA
,
float
>(
a_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
};
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
,
{},
{}
};
return
ArgumentType
{
a_args
,
evt0_args
};
return
ArgumentType
{
a_args
,
evt0_args
,
{}
};
}
}
};
};
...
@@ -193,8 +193,8 @@ struct ScaledEpilogueBias
...
@@ -193,8 +193,8 @@ struct ScaledEpilogueBias
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
bias_args
=
SUPER
::
template
args_from_tensor
<
Bias
,
ElementD
>(
bias
);
auto
bias_args
=
SUPER
::
template
args_from_tensor
<
Bias
,
ElementD
>(
bias
);
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
};
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
,
{},
{}
};
return
ArgumentType
{
a_args
,
evt0_args
,
bias_args
};
return
ArgumentType
{
a_args
,
evt0_args
,
bias_args
,
{}
};
}
}
};
};
...
@@ -236,8 +236,8 @@ struct ScaledEpilogueColumnBias
...
@@ -236,8 +236,8 @@ struct ScaledEpilogueColumnBias
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
b_args
=
SUPER
::
template
args_from_tensor
<
ScaleB
,
float
>(
b_scales
);
auto
bias_args
=
SUPER
::
template
args_from_tensor
<
Bias
,
ElementD
>(
bias
);
auto
bias_args
=
SUPER
::
template
args_from_tensor
<
Bias
,
ElementD
>(
bias
);
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
};
typename
EVTCompute0
::
Arguments
evt0_args
{
b_args
,
{},
{}
};
return
ArgumentType
{
a_args
,
evt0_args
,
bias_args
};
return
ArgumentType
{
a_args
,
evt0_args
,
bias_args
,
{}
};
}
}
};
};
...
@@ -297,9 +297,10 @@ struct ScaledEpilogueBiasAzp
...
@@ -297,9 +297,10 @@ struct ScaledEpilogueBiasAzp
auto
azp_adj_args
=
auto
azp_adj_args
=
SUPER
::
template
args_from_tensor
<
AzpWithAdj
,
int32_t
>(
azp_adj
);
SUPER
::
template
args_from_tensor
<
AzpWithAdj
,
int32_t
>(
azp_adj
);
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{{},
azp_adj_args
};
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{{},
azp_adj_args
,
{}};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
b_args
,
evt_azp_args
};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
};
b_args
,
evt_azp_args
,
{}};
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
,
{}};
}
}
};
};
...
@@ -374,10 +375,11 @@ struct ScaledEpilogueBiasAzpToken
...
@@ -374,10 +375,11 @@ struct ScaledEpilogueBiasAzpToken
auto
azp_adj_args
=
auto
azp_adj_args
=
SUPER
::
template
args_from_tensor
<
AzpAdj
,
int32_t
>(
azp_adj
);
SUPER
::
template
args_from_tensor
<
AzpAdj
,
int32_t
>(
azp_adj
);
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{
azp_args
,
azp_adj_args
};
typename
EVTComputeAzp
::
Arguments
evt_azp_args
{
azp_args
,
azp_adj_args
,
{}};
typename
EVTComputeAcc
::
Arguments
evt_acc_args
{{},
evt_azp_args
};
typename
EVTComputeAcc
::
Arguments
evt_acc_args
{{},
evt_azp_args
,
{}};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
b_args
,
evt_acc_args
};
typename
EVTComputeScaleB
::
Arguments
evt_scale_b_args
{
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
};
b_args
,
evt_acc_args
,
{}};
return
ArgumentType
{
a_args
,
evt_scale_b_args
,
bias_args
,
{}};
}
}
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment