Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
e3fb4310
Commit
e3fb4310
authored
Jul 11, 2019
by
LysandreJik
Browse files
From pretrained correct initialization. Unknown token handling for gpt2.
parent
50e62a4c
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
4 deletions
+4
-4
pytorch_transformers/modeling_gpt2.py
pytorch_transformers/modeling_gpt2.py
+1
-1
pytorch_transformers/modeling_openai.py
pytorch_transformers/modeling_openai.py
+1
-1
pytorch_transformers/tokenization_gpt2.py
pytorch_transformers/tokenization_gpt2.py
+2
-2
No files found.
pytorch_transformers/modeling_gpt2.py
View file @
e3fb4310
...
@@ -423,7 +423,7 @@ class GPT2PreTrainedModel(PreTrainedModel):
...
@@ -423,7 +423,7 @@ class GPT2PreTrainedModel(PreTrainedModel):
"""
"""
num_special_tokens
=
kwargs
.
pop
(
'num_special_tokens'
,
None
)
num_special_tokens
=
kwargs
.
pop
(
'num_special_tokens'
,
None
)
model
=
super
().
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
model
=
super
(
GPT2PreTrainedModel
,
cls
).
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
# Add additional embeddings for special tokens if needed
# Add additional embeddings for special tokens if needed
# This step also make sure we are still sharing the output and input embeddings after loading weights
# This step also make sure we are still sharing the output and input embeddings after loading weights
...
...
pytorch_transformers/modeling_openai.py
View file @
e3fb4310
...
@@ -431,7 +431,7 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
...
@@ -431,7 +431,7 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
num_special_tokens
=
kwargs
.
get
(
'num_special_tokens'
,
None
)
num_special_tokens
=
kwargs
.
get
(
'num_special_tokens'
,
None
)
kwargs
.
pop
(
'num_special_tokens'
,
None
)
kwargs
.
pop
(
'num_special_tokens'
,
None
)
model
=
super
(
PreTrainedModel
,
cls
).
from_pretrained
(
pretrained_model_name_or_path
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
model
=
super
(
OpenAIGPT
PreTrainedModel
,
cls
).
from_pretrained
(
pretrained_model_name_or_path
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
# Add additional embeddings for special tokens if needed
# Add additional embeddings for special tokens if needed
# This step also make sure we are still sharing the output and input embeddings after loading weights
# This step also make sure we are still sharing the output and input embeddings after loading weights
...
...
pytorch_transformers/tokenization_gpt2.py
View file @
e3fb4310
...
@@ -177,11 +177,11 @@ class GPT2Tokenizer(PreTrainedTokenizer):
...
@@ -177,11 +177,11 @@ class GPT2Tokenizer(PreTrainedTokenizer):
def
_convert_token_to_id
(
self
,
token
):
def
_convert_token_to_id
(
self
,
token
):
""" Converts a token (str/unicode) in an id using the vocab. """
""" Converts a token (str/unicode) in an id using the vocab. """
return
self
.
encoder
.
get
(
token
,
self
.
encoder
.
get
(
self
.
unk_token
)
)
return
self
.
encoder
.
get
(
token
)
def
_convert_id_to_token
(
self
,
index
):
def
_convert_id_to_token
(
self
,
index
):
"""Converts an index (integer) in a token (string/unicode) using the vocab."""
"""Converts an index (integer) in a token (string/unicode) using the vocab."""
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
return
self
.
decoder
.
get
(
index
)
def
_convert_ids_to_string
(
self
,
tokens_ids
):
def
_convert_ids_to_string
(
self
,
tokens_ids
):
"""Converts a sequence of ids in a string."""
"""Converts a sequence of ids in a string."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment