Unverified Commit a4cf2561 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Bugfix] Fix QKVParallelLinearWithShardedLora bias bug (#10844)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent d746268e
......@@ -481,7 +481,6 @@ steps:
- label: LoRA TP Test (Distributed)
num_gpus: 4
soft_fail: true
source_file_dependencies:
- vllm/lora
- tests/lora
......
......@@ -77,13 +77,6 @@ class ColumnParallelLinearWithShardedLoRA(ColumnParallelLinearWithLoRA):
add_input=True)
# now have column partitioned output
if self.bias_stacked is not None:
self.bias_stacked = self.bias_stacked.view(
-1, self.bias_stacked.shape[-1])
self.bias_stacked = self.bias_stacked[
self.punica_wrapper.token_lora_indices]
output += self.bias_stacked
output = output.view(*out_orig_shape)
return output
......@@ -222,7 +215,7 @@ class QKVParallelLinearWithShardedLora(QKVParallelLinearWithLora):
self.punica_wrapper.add_expand(output,
buffer,
self.lora_b_stacked,
self.bias_all,
self.bias_stacked,
add_input=True)
# now have column partitioned output
output = output.view(*out_orig_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment