Commit e3936a44 authored by Casper Hansen's avatar Casper Hansen
Browse files

Avoid fusing for larger Falcon models

parent 3256ffec
...@@ -7,7 +7,10 @@ class FalconAWQForCausalLM(BaseAWQForCausalLM): ...@@ -7,7 +7,10 @@ class FalconAWQForCausalLM(BaseAWQForCausalLM):
@staticmethod @staticmethod
def fuse_layers(model: FalconForCausalLM, quant_config:dict): def fuse_layers(model: FalconForCausalLM, quant_config:dict):
fuser = FalconFuser(model) fuser = FalconFuser(model)
fuser.fuse_transformer()
# TODO: Implement correctly fused modules for Falcon 40B and Falcon 180B
if model.config.num_attention_heads == 71:
fuser.fuse_transformer()
@staticmethod @staticmethod
def get_model_layers(model: FalconForCausalLM): def get_model_layers(model: FalconForCausalLM):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment