Unverified Commit 57b92bbf authored by Zizhao Chen's avatar Zizhao Chen Committed by GitHub
Browse files

disable use_cache if using gradient checkpointing (#30320)

parent 68be1d3c
......@@ -1581,6 +1581,12 @@ class Idefics2Model(Idefics2PreTrainedModel):
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if self.training and self.text_model.gradient_checkpointing and use_cache:
logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False
# retrieve input_ids and inputs_embeds
if input_ids is not None:
batch_size, seq_length = input_ids.shape
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment