Unverified Commit e49c3852 authored by YQ's avatar YQ Committed by GitHub
Browse files

use logger.warning_once to avoid massive outputs (#27428)

* use logger.warning_once to avoid massive outputs when training/finetuning longformer

* update more
parent 6ff10922
...@@ -2223,7 +2223,7 @@ class BigBirdModel(BigBirdPreTrainedModel): ...@@ -2223,7 +2223,7 @@ class BigBirdModel(BigBirdPreTrainedModel):
padding_len = (block_size - seq_len % block_size) % block_size padding_len = (block_size - seq_len % block_size) % block_size
if padding_len > 0: if padding_len > 0:
logger.info( logger.warning_once(
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.block_size`: {block_size}" f"`config.block_size`: {block_size}"
) )
......
...@@ -2014,7 +2014,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel): ...@@ -2014,7 +2014,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
padding_len = (block_size - seq_len % block_size) % block_size padding_len = (block_size - seq_len % block_size) % block_size
if padding_len > 0: if padding_len > 0:
logger.info( logger.warning_once(
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.block_size`: {block_size}" f"`config.block_size`: {block_size}"
) )
......
...@@ -1706,7 +1706,7 @@ class LEDEncoder(LEDPreTrainedModel): ...@@ -1706,7 +1706,7 @@ class LEDEncoder(LEDPreTrainedModel):
padding_len = (attention_window - seq_len % attention_window) % attention_window padding_len = (attention_window - seq_len % attention_window) % attention_window
if padding_len > 0: if padding_len > 0:
logger.info( logger.warning_once(
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}" f"`config.attention_window`: {attention_window}"
) )
......
...@@ -1859,7 +1859,7 @@ class TFLEDEncoder(tf.keras.layers.Layer): ...@@ -1859,7 +1859,7 @@ class TFLEDEncoder(tf.keras.layers.Layer):
padding_len = (attention_window - seq_len % attention_window) % attention_window padding_len = (attention_window - seq_len % attention_window) % attention_window
if padding_len > 0: if padding_len > 0:
logger.info( logger.warning_once(
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}" f"`config.attention_window`: {attention_window}"
) )
......
...@@ -1598,7 +1598,7 @@ class LongformerModel(LongformerPreTrainedModel): ...@@ -1598,7 +1598,7 @@ class LongformerModel(LongformerPreTrainedModel):
# this path should be recorded in the ONNX export, it is fine with padding_len == 0 as well # this path should be recorded in the ONNX export, it is fine with padding_len == 0 as well
if padding_len > 0: if padding_len > 0:
logger.info( logger.warning_once(
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}" f"`config.attention_window`: {attention_window}"
) )
...@@ -1917,7 +1917,7 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel): ...@@ -1917,7 +1917,7 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if global_attention_mask is None: if global_attention_mask is None:
logger.info("Initializing global attention on CLS token...") logger.warning_once("Initializing global attention on CLS token...")
global_attention_mask = torch.zeros_like(input_ids) global_attention_mask = torch.zeros_like(input_ids)
# global attention on cls token # global attention on cls token
global_attention_mask[:, 0] = 1 global_attention_mask[:, 0] = 1
...@@ -2270,7 +2270,7 @@ class LongformerForMultipleChoice(LongformerPreTrainedModel): ...@@ -2270,7 +2270,7 @@ class LongformerForMultipleChoice(LongformerPreTrainedModel):
# set global attention on question tokens # set global attention on question tokens
if global_attention_mask is None and input_ids is not None: if global_attention_mask is None and input_ids is not None:
logger.info("Initializing global attention on multiple choice...") logger.warning_once("Initializing global attention on multiple choice...")
# put global attention on all tokens after `config.sep_token_id` # put global attention on all tokens after `config.sep_token_id`
global_attention_mask = torch.stack( global_attention_mask = torch.stack(
[ [
......
...@@ -2213,7 +2213,7 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn ...@@ -2213,7 +2213,7 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn
) )
global_attention_mask = tf.cast(tf.fill(shape_list(input_ids), value=0), tf.int64) global_attention_mask = tf.cast(tf.fill(shape_list(input_ids), value=0), tf.int64)
else: else:
logger.info("Initializing global attention on question tokens...") logger.warning_once("Initializing global attention on question tokens...")
# put global attention on all tokens until `config.sep_token_id` is reached # put global attention on all tokens until `config.sep_token_id` is reached
sep_token_indices = tf.where(input_ids == self.config.sep_token_id) sep_token_indices = tf.where(input_ids == self.config.sep_token_id)
sep_token_indices = tf.cast(sep_token_indices, dtype=tf.int64) sep_token_indices = tf.cast(sep_token_indices, dtype=tf.int64)
...@@ -2341,7 +2341,7 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque ...@@ -2341,7 +2341,7 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque
global_attention_mask = tf.cast(global_attention_mask, tf.int64) global_attention_mask = tf.cast(global_attention_mask, tf.int64)
if global_attention_mask is None and input_ids is not None: if global_attention_mask is None and input_ids is not None:
logger.info("Initializing global attention on CLS token...") logger.warning_once("Initializing global attention on CLS token...")
# global attention on cls token # global attention on cls token
global_attention_mask = tf.zeros_like(input_ids) global_attention_mask = tf.zeros_like(input_ids)
updates = tf.ones(shape_list(input_ids)[0], dtype=tf.int64) updates = tf.ones(shape_list(input_ids)[0], dtype=tf.int64)
......
...@@ -2139,7 +2139,7 @@ class ReformerModel(ReformerPreTrainedModel): ...@@ -2139,7 +2139,7 @@ class ReformerModel(ReformerPreTrainedModel):
padded_seq_length=None, padded_seq_length=None,
device=None, device=None,
): ):
logger.info( logger.warning_once(
f"Input ids are automatically padded from {input_shape[-1]} to {input_shape[-1] + padding_length} to be a " f"Input ids are automatically padded from {input_shape[-1]} to {input_shape[-1] + padding_length} to be a "
f"multiple of `config.chunk_length`: {padded_seq_length}" f"multiple of `config.chunk_length`: {padded_seq_length}"
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment