Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1ab25c49
Commit
1ab25c49
authored
Dec 21, 2019
by
thomwolf
Browse files
Merge branch 'master' into pr/2115
parents
df396112
18601c3b
Changes
143
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
81 additions
and
0 deletions
+81
-0
transformers/tokenization_xlnet.py
transformers/tokenization_xlnet.py
+2
-0
try.py
try.py
+0
-0
utils/link_tester.py
utils/link_tester.py
+79
-0
No files found.
transformers/tokenization_xlnet.py
View file @
1ab25c49
...
@@ -60,6 +60,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
...
@@ -60,6 +60,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
vocab_files_names
=
VOCAB_FILES_NAMES
vocab_files_names
=
VOCAB_FILES_NAMES
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
padding_side
=
"left"
def
__init__
(
self
,
vocab_file
,
def
__init__
(
self
,
vocab_file
,
do_lower_case
=
False
,
remove_space
=
True
,
keep_accents
=
False
,
do_lower_case
=
False
,
remove_space
=
True
,
keep_accents
=
False
,
...
@@ -74,6 +75,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
...
@@ -74,6 +75,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
self
.
max_len_single_sentence
=
self
.
max_len
-
2
# take into account special tokens
self
.
max_len_single_sentence
=
self
.
max_len
-
2
# take into account special tokens
self
.
max_len_sentences_pair
=
self
.
max_len
-
3
# take into account special tokens
self
.
max_len_sentences_pair
=
self
.
max_len
-
3
# take into account special tokens
self
.
_pad_token_type_id
=
3
try
:
try
:
import
sentencepiece
as
spm
import
sentencepiece
as
spm
...
...
try.py
0 → 100644
View file @
1ab25c49
utils/link_tester.py
0 → 100644
View file @
1ab25c49
""" Link tester.
This little utility reads all the python files in the repository,
scans for links pointing to S3 and tests the links one by one. Raises an error
at the end of the scan if at least one link was reported broken.
"""
import
os
import
re
import
sys
import
requests
REGEXP_FIND_S3_LINKS
=
r
"""([\"'])(https:\/\/s3)(.*)?\1"""
def
list_python_files_in_repository
():
""" List all python files in the repository.
This function assumes that the script is executed in the root folder.
"""
source_code_files
=
[]
for
path
,
subdirs
,
files
in
os
.
walk
(
"."
):
if
"templates"
in
path
:
continue
for
name
in
files
:
if
".py"
in
name
and
".pyc"
not
in
name
:
path_to_files
=
os
.
path
.
join
(
path
,
name
)
source_code_files
.
append
(
path_to_files
)
return
source_code_files
def
find_all_links
(
file_paths
):
links
=
[]
for
path
in
file_paths
:
links
+=
scan_code_for_links
(
path
)
return
links
def
scan_code_for_links
(
source
):
""" Scans the file to find links using a regular expression.
Returns a list of links.
"""
with
open
(
source
,
'r'
)
as
content
:
content
=
content
.
read
()
raw_links
=
re
.
findall
(
REGEXP_FIND_S3_LINKS
,
content
)
links
=
[
prefix
+
suffix
for
_
,
prefix
,
suffix
in
raw_links
]
return
links
def
check_all_links
(
links
):
""" Check that the provided links are valid.
Links are considered valid if a HEAD request to the server
returns a 200 status code.
"""
broken_links
=
[]
for
link
in
links
:
head
=
requests
.
head
(
link
)
if
head
.
status_code
!=
200
:
broken_links
.
append
(
link
)
return
broken_links
if
__name__
==
"__main__"
:
file_paths
=
list_python_files_in_repository
()
links
=
find_all_links
(
file_paths
)
broken_links
=
check_all_links
(
links
)
print
(
"Looking for broken links to pre-trained models/configs/tokenizers..."
)
if
broken_links
:
print
(
"The following links did not respond:"
)
for
link
in
broken_links
:
print
(
"- {}"
.
format
(
link
))
sys
.
exit
(
1
)
print
(
"All links are ok."
)
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment