Unverified Commit 4185b115 authored by Teven's avatar Teven Committed by GitHub
Browse files

Changing XLNet default from not using memories to 512 context size following paper (#8417)

* Move XLNet memory length FutureWarning

* isort

* style

* Changed default XLNet memory length
parent 190df585
......@@ -15,8 +15,6 @@
# limitations under the License.
""" XLNet configuration """
import warnings
from .configuration_utils import PretrainedConfig
from .utils import logging
......@@ -144,7 +142,7 @@ class XLNetConfig(PretrainedConfig):
initializer_range=0.02,
layer_norm_eps=1e-12,
dropout=0.1,
mem_len=None,
mem_len=512,
reuse_len=None,
bi_data=False,
clamp_len=-1,
......@@ -198,17 +196,6 @@ class XLNetConfig(PretrainedConfig):
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
if mem_len is None or mem_len == 0:
warnings.warn(
"This config doesn't use attention memories, a core feature of XLNet."
" Consider setting `mem_len` to a non-zero value, for example "
"`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`,"
" for accurate training performance as well as an order of magnitude faster inference."
" Starting from version 3.5.0, the default parameter will be 1024, following"
" the implementation in https://arxiv.org/abs/1906.08237",
FutureWarning,
)
@property
def max_position_embeddings(self):
return -1
......
......@@ -16,8 +16,6 @@
"""
PyTorch XLNet model.
"""
from dataclasses import dataclass
from typing import List, Optional, Tuple
......@@ -1087,6 +1085,7 @@ class XLNetModel(XLNetPreTrainedModel):
output_hidden_states=None,
return_dict=None,
):
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment