Unverified Commit e0db6333 authored by drbh's avatar drbh Committed by GitHub
Browse files

fix: avoid setting use_sgmv if no kernels present (#2796)

parent b57f3703
...@@ -24,6 +24,7 @@ from text_generation_server.utils.sgmv import ( ...@@ -24,6 +24,7 @@ from text_generation_server.utils.sgmv import (
orient_for_rank, orient_for_rank,
pad_rank, pad_rank,
use_cutlass_shrink, use_cutlass_shrink,
has_sgmv,
) )
...@@ -325,8 +326,10 @@ class BatchLoraWeights(BatchAdapterWeights): ...@@ -325,8 +326,10 @@ class BatchLoraWeights(BatchAdapterWeights):
default=0, default=0,
) )
use_sgmv = False
if prefill or max_rank > BGMV_MAX_RANK: if prefill or max_rank > BGMV_MAX_RANK:
use_sgmv = True if has_sgmv():
use_sgmv = True
lora_a_ptr = torch.tensor( lora_a_ptr = torch.tensor(
[ [
( (
...@@ -352,7 +355,6 @@ class BatchLoraWeights(BatchAdapterWeights): ...@@ -352,7 +355,6 @@ class BatchLoraWeights(BatchAdapterWeights):
device=device, device=device,
) )
else: else:
use_sgmv = False
lora_a_ptr = torch.tensor( lora_a_ptr = torch.tensor(
[ [
( (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment