Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
50e15c82
Unverified
Commit
50e15c82
authored
Apr 01, 2020
by
Julien Chaumond
Committed by
GitHub
Apr 01, 2020
Browse files
Tokenizers: Start cleaning examples a little (#3455)
* Start cleaning examples * Fixup
parent
b38d552a
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
14 additions
and
14 deletions
+14
-14
examples/glue/run_pl_glue.py
examples/glue/run_pl_glue.py
+1
-1
examples/hans/test_hans.py
examples/hans/test_hans.py
+2
-2
examples/ner/run_ner.py
examples/ner/run_ner.py
+2
-2
examples/ner/run_pl_ner.py
examples/ner/run_pl_ner.py
+2
-2
examples/ner/run_tf_ner.py
examples/ner/run_tf_ner.py
+2
-2
examples/run_glue.py
examples/run_glue.py
+2
-2
examples/run_multiple_choice.py
examples/run_multiple_choice.py
+1
-1
examples/run_xnli.py
examples/run_xnli.py
+2
-2
No files found.
examples/glue/run_pl_glue.py
View file @
50e15c82
...
@@ -68,7 +68,7 @@ class GLUETransformer(BaseTransformer):
...
@@ -68,7 +68,7 @@ class GLUETransformer(BaseTransformer):
output_mode
=
args
.
glue_output_mode
,
output_mode
=
args
.
glue_output_mode
,
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_token
=
self
.
tokenizer
.
convert_tokens_to_ids
([
self
.
tokenizer
.
pad_token
])[
0
],
pad_token
=
self
.
tokenizer
.
convert_tokens_to_ids
([
self
.
tokenizer
.
pad_token
])[
0
],
pad_token_segment_id
=
4
if
args
.
model_type
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
self
.
tokenizer
.
pad_token_type_id
,
)
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
...
...
examples/hans/test_hans.py
View file @
50e15c82
...
@@ -342,8 +342,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
...
@@ -342,8 +342,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
max_length
=
args
.
max_seq_length
,
max_length
=
args
.
max_seq_length
,
output_mode
=
output_mode
,
output_mode
=
output_mode
,
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_token
=
tokenizer
.
convert_tokens_to_ids
([
tokenizer
.
pad_token
])[
0
]
,
pad_token
=
tokenizer
.
pad_token_id
,
pad_token_segment_id
=
4
if
args
.
model_type
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
tokenizer
.
pad_token_type_id
,
)
)
if
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
local_rank
in
[
-
1
,
0
]:
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
...
...
examples/ner/run_ner.py
View file @
50e15c82
...
@@ -348,8 +348,8 @@ def load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode):
...
@@ -348,8 +348,8 @@ def load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode):
# roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
# roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
# pad on the left for xlnet
pad_token
=
tokenizer
.
convert_tokens_to_ids
([
tokenizer
.
pad_token
])[
0
]
,
pad_token
=
tokenizer
.
pad_token_id
,
pad_token_segment_id
=
4
if
args
.
model_type
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
tokenizer
.
pad_token_type_id
,
pad_token_label_id
=
pad_token_label_id
,
pad_token_label_id
=
pad_token_label_id
,
)
)
if
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
local_rank
in
[
-
1
,
0
]:
...
...
examples/ner/run_pl_ner.py
View file @
50e15c82
...
@@ -64,8 +64,8 @@ class NERTransformer(BaseTransformer):
...
@@ -64,8 +64,8 @@ class NERTransformer(BaseTransformer):
sep_token
=
self
.
tokenizer
.
sep_token
,
sep_token
=
self
.
tokenizer
.
sep_token
,
sep_token_extra
=
bool
(
args
.
model_type
in
[
"roberta"
]),
sep_token_extra
=
bool
(
args
.
model_type
in
[
"roberta"
]),
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
pad_token
=
self
.
tokenizer
.
convert_tokens_to_ids
([
self
.
tokenizer
.
pad_token
])[
0
]
,
pad_token
=
self
.
tokenizer
.
pad_token
_id
,
pad_token_segment_id
=
4
if
args
.
model_type
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
self
.
tokenizer
.
pad_token_type_id
,
pad_token_label_id
=
self
.
pad_token_label_id
,
pad_token_label_id
=
self
.
pad_token_label_id
,
)
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
...
...
examples/ner/run_tf_ner.py
View file @
50e15c82
...
@@ -434,8 +434,8 @@ def load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, batch_s
...
@@ -434,8 +434,8 @@ def load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, batch_s
# roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
# roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
pad_on_left
=
bool
(
args
[
"model_type"
]
in
[
"xlnet"
]),
pad_on_left
=
bool
(
args
[
"model_type"
]
in
[
"xlnet"
]),
# pad on the left for xlnet
# pad on the left for xlnet
pad_token
=
tokenizer
.
convert_tokens_to_ids
([
tokenizer
.
pad_token
])[
0
]
,
pad_token
=
tokenizer
.
pad_token_id
,
pad_token_segment_id
=
4
if
args
[
"model_type"
]
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
tokenizer
.
pad_token_type_id
,
pad_token_label_id
=
pad_token_label_id
,
pad_token_label_id
=
pad_token_label_id
,
)
)
logging
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logging
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
...
...
examples/run_glue.py
View file @
50e15c82
...
@@ -360,8 +360,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
...
@@ -360,8 +360,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
max_length
=
args
.
max_seq_length
,
max_length
=
args
.
max_seq_length
,
output_mode
=
output_mode
,
output_mode
=
output_mode
,
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_token
=
tokenizer
.
convert_tokens_to_ids
([
tokenizer
.
pad_token
])[
0
]
,
pad_token
=
tokenizer
.
pad_token_id
,
pad_token_segment_id
=
4
if
args
.
model_type
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
tokenizer
.
pad_token_type_id
,
)
)
if
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
local_rank
in
[
-
1
,
0
]:
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
...
...
examples/run_multiple_choice.py
View file @
50e15c82
...
@@ -361,7 +361,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False, test=False):
...
@@ -361,7 +361,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False, test=False):
args
.
max_seq_length
,
args
.
max_seq_length
,
tokenizer
,
tokenizer
,
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_on_left
=
bool
(
args
.
model_type
in
[
"xlnet"
]),
# pad on the left for xlnet
pad_token_segment_id
=
4
if
args
.
model_type
in
[
"xlnet"
]
else
0
,
pad_token_segment_id
=
tokenizer
.
pad_token_type_id
,
)
)
if
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
local_rank
in
[
-
1
,
0
]:
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
...
...
examples/run_xnli.py
View file @
50e15c82
...
@@ -350,8 +350,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
...
@@ -350,8 +350,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
max_length
=
args
.
max_seq_length
,
max_length
=
args
.
max_seq_length
,
output_mode
=
output_mode
,
output_mode
=
output_mode
,
pad_on_left
=
False
,
pad_on_left
=
False
,
pad_token
=
tokenizer
.
convert_tokens_to_ids
([
tokenizer
.
pad_token
])[
0
]
,
pad_token
=
tokenizer
.
pad_token_id
,
pad_token_segment_id
=
0
,
pad_token_segment_id
=
tokenizer
.
pad_token_type_id
,
)
)
if
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
local_rank
in
[
-
1
,
0
]:
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment