Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
df4594a9
Unverified
Commit
df4594a9
authored
Sep 10, 2020
by
Stas Bekman
Committed by
GitHub
Sep 10, 2020
Browse files
[xlm tok] config dict: fix str into int to match definition (#7034)
parent
d6c08b07
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
152 additions
and
152 deletions
+152
-152
src/transformers/tokenization_xlm.py
src/transformers/tokenization_xlm.py
+152
-152
No files found.
src/transformers/tokenization_xlm.py
View file @
df4594a9
...
@@ -79,37 +79,37 @@ PRETRAINED_INIT_CONFIGURATION = {
...
@@ -79,37 +79,37 @@ PRETRAINED_INIT_CONFIGURATION = {
"xlm-mlm-en-2048"
:
{
"do_lowercase_and_remove_accent"
:
True
},
"xlm-mlm-en-2048"
:
{
"do_lowercase_and_remove_accent"
:
True
},
"xlm-mlm-ende-1024"
:
{
"xlm-mlm-ende-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"0"
:
"de"
,
"1"
:
"en"
},
"id2lang"
:
{
0
:
"de"
,
1
:
"en"
},
"lang2id"
:
{
"de"
:
0
,
"en"
:
1
},
"lang2id"
:
{
"de"
:
0
,
"en"
:
1
},
},
},
"xlm-mlm-enfr-1024"
:
{
"xlm-mlm-enfr-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"0"
:
"en"
,
"1"
:
"fr"
},
"id2lang"
:
{
0
:
"en"
,
1
:
"fr"
},
"lang2id"
:
{
"en"
:
0
,
"fr"
:
1
},
"lang2id"
:
{
"en"
:
0
,
"fr"
:
1
},
},
},
"xlm-mlm-enro-1024"
:
{
"xlm-mlm-enro-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"0"
:
"en"
,
"1"
:
"ro"
},
"id2lang"
:
{
0
:
"en"
,
1
:
"ro"
},
"lang2id"
:
{
"en"
:
0
,
"ro"
:
1
},
"lang2id"
:
{
"en"
:
0
,
"ro"
:
1
},
},
},
"xlm-mlm-tlm-xnli15-1024"
:
{
"xlm-mlm-tlm-xnli15-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"id2lang"
:
{
"0"
:
"ar"
,
0
:
"ar"
,
"1"
:
"bg"
,
1
:
"bg"
,
"2"
:
"de"
,
2
:
"de"
,
"3"
:
"el"
,
3
:
"el"
,
"4"
:
"en"
,
4
:
"en"
,
"5"
:
"es"
,
5
:
"es"
,
"6"
:
"fr"
,
6
:
"fr"
,
"7"
:
"hi"
,
7
:
"hi"
,
"8"
:
"ru"
,
8
:
"ru"
,
"9"
:
"sw"
,
9
:
"sw"
,
"
10
"
:
"th"
,
10
:
"th"
,
"
11
"
:
"tr"
,
11
:
"tr"
,
"
12
"
:
"ur"
,
12
:
"ur"
,
"
13
"
:
"vi"
,
13
:
"vi"
,
"
14
"
:
"zh"
,
14
:
"zh"
,
},
},
"lang2id"
:
{
"lang2id"
:
{
"ar"
:
0
,
"ar"
:
0
,
...
@@ -132,21 +132,21 @@ PRETRAINED_INIT_CONFIGURATION = {
...
@@ -132,21 +132,21 @@ PRETRAINED_INIT_CONFIGURATION = {
"xlm-mlm-xnli15-1024"
:
{
"xlm-mlm-xnli15-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"id2lang"
:
{
"0"
:
"ar"
,
0
:
"ar"
,
"1"
:
"bg"
,
1
:
"bg"
,
"2"
:
"de"
,
2
:
"de"
,
"3"
:
"el"
,
3
:
"el"
,
"4"
:
"en"
,
4
:
"en"
,
"5"
:
"es"
,
5
:
"es"
,
"6"
:
"fr"
,
6
:
"fr"
,
"7"
:
"hi"
,
7
:
"hi"
,
"8"
:
"ru"
,
8
:
"ru"
,
"9"
:
"sw"
,
9
:
"sw"
,
"
10
"
:
"th"
,
10
:
"th"
,
"
11
"
:
"tr"
,
11
:
"tr"
,
"
12
"
:
"ur"
,
12
:
"ur"
,
"
13
"
:
"vi"
,
13
:
"vi"
,
"
14
"
:
"zh"
,
14
:
"zh"
,
},
},
"lang2id"
:
{
"lang2id"
:
{
"ar"
:
0
,
"ar"
:
0
,
...
@@ -168,34 +168,34 @@ PRETRAINED_INIT_CONFIGURATION = {
...
@@ -168,34 +168,34 @@ PRETRAINED_INIT_CONFIGURATION = {
},
},
"xlm-clm-enfr-1024"
:
{
"xlm-clm-enfr-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"0"
:
"en"
,
"1"
:
"fr"
},
"id2lang"
:
{
0
:
"en"
,
1
:
"fr"
},
"lang2id"
:
{
"en"
:
0
,
"fr"
:
1
},
"lang2id"
:
{
"en"
:
0
,
"fr"
:
1
},
},
},
"xlm-clm-ende-1024"
:
{
"xlm-clm-ende-1024"
:
{
"do_lowercase_and_remove_accent"
:
True
,
"do_lowercase_and_remove_accent"
:
True
,
"id2lang"
:
{
"0"
:
"de"
,
"1"
:
"en"
},
"id2lang"
:
{
0
:
"de"
,
1
:
"en"
},
"lang2id"
:
{
"de"
:
0
,
"en"
:
1
},
"lang2id"
:
{
"de"
:
0
,
"en"
:
1
},
},
},
"xlm-mlm-17-1280"
:
{
"xlm-mlm-17-1280"
:
{
"do_lowercase_and_remove_accent"
:
False
,
"do_lowercase_and_remove_accent"
:
False
,
"id2lang"
:
{
"id2lang"
:
{
"0"
:
"ar"
,
0
:
"ar"
,
"1"
:
"de"
,
1
:
"de"
,
"2"
:
"en"
,
2
:
"en"
,
"3"
:
"es"
,
3
:
"es"
,
"4"
:
"fr"
,
4
:
"fr"
,
"5"
:
"hi"
,
5
:
"hi"
,
"6"
:
"it"
,
6
:
"it"
,
"7"
:
"ja"
,
7
:
"ja"
,
"8"
:
"ko"
,
8
:
"ko"
,
"9"
:
"nl"
,
9
:
"nl"
,
"
10
"
:
"pl"
,
10
:
"pl"
,
"
11
"
:
"pt"
,
11
:
"pt"
,
"
12
"
:
"ru"
,
12
:
"ru"
,
"
13
"
:
"sv"
,
13
:
"sv"
,
"
14
"
:
"tr"
,
14
:
"tr"
,
"
15
"
:
"vi"
,
15
:
"vi"
,
"
16
"
:
"zh"
,
16
:
"zh"
,
},
},
"lang2id"
:
{
"lang2id"
:
{
"ar"
:
0
,
"ar"
:
0
,
...
@@ -220,106 +220,106 @@ PRETRAINED_INIT_CONFIGURATION = {
...
@@ -220,106 +220,106 @@ PRETRAINED_INIT_CONFIGURATION = {
"xlm-mlm-100-1280"
:
{
"xlm-mlm-100-1280"
:
{
"do_lowercase_and_remove_accent"
:
False
,
"do_lowercase_and_remove_accent"
:
False
,
"id2lang"
:
{
"id2lang"
:
{
"0"
:
"af"
,
0
:
"af"
,
"1"
:
"als"
,
1
:
"als"
,
"2"
:
"am"
,
2
:
"am"
,
"3"
:
"an"
,
3
:
"an"
,
"4"
:
"ang"
,
4
:
"ang"
,
"5"
:
"ar"
,
5
:
"ar"
,
"6"
:
"arz"
,
6
:
"arz"
,
"7"
:
"ast"
,
7
:
"ast"
,
"8"
:
"az"
,
8
:
"az"
,
"9"
:
"bar"
,
9
:
"bar"
,
"
10
"
:
"be"
,
10
:
"be"
,
"
11
"
:
"bg"
,
11
:
"bg"
,
"
12
"
:
"bn"
,
12
:
"bn"
,
"
13
"
:
"br"
,
13
:
"br"
,
"
14
"
:
"bs"
,
14
:
"bs"
,
"
15
"
:
"ca"
,
15
:
"ca"
,
"
16
"
:
"ceb"
,
16
:
"ceb"
,
"
17
"
:
"ckb"
,
17
:
"ckb"
,
"
18
"
:
"cs"
,
18
:
"cs"
,
"
19
"
:
"cy"
,
19
:
"cy"
,
"
20
"
:
"da"
,
20
:
"da"
,
"
21
"
:
"de"
,
21
:
"de"
,
"
22
"
:
"el"
,
22
:
"el"
,
"
23
"
:
"en"
,
23
:
"en"
,
"
24
"
:
"eo"
,
24
:
"eo"
,
"
25
"
:
"es"
,
25
:
"es"
,
"
26
"
:
"et"
,
26
:
"et"
,
"
27
"
:
"eu"
,
27
:
"eu"
,
"
28
"
:
"fa"
,
28
:
"fa"
,
"
29
"
:
"fi"
,
29
:
"fi"
,
"
30
"
:
"fr"
,
30
:
"fr"
,
"
31
"
:
"fy"
,
31
:
"fy"
,
"
32
"
:
"ga"
,
32
:
"ga"
,
"
33
"
:
"gan"
,
33
:
"gan"
,
"
34
"
:
"gl"
,
34
:
"gl"
,
"
35
"
:
"gu"
,
35
:
"gu"
,
"
36
"
:
"he"
,
36
:
"he"
,
"
37
"
:
"hi"
,
37
:
"hi"
,
"
38
"
:
"hr"
,
38
:
"hr"
,
"
39
"
:
"hu"
,
39
:
"hu"
,
"
40
"
:
"hy"
,
40
:
"hy"
,
"
41
"
:
"ia"
,
41
:
"ia"
,
"
42
"
:
"id"
,
42
:
"id"
,
"
43
"
:
"is"
,
43
:
"is"
,
"
44
"
:
"it"
,
44
:
"it"
,
"
45
"
:
"ja"
,
45
:
"ja"
,
"
46
"
:
"jv"
,
46
:
"jv"
,
"
47
"
:
"ka"
,
47
:
"ka"
,
"
48
"
:
"kk"
,
48
:
"kk"
,
"
49
"
:
"kn"
,
49
:
"kn"
,
"
50
"
:
"ko"
,
50
:
"ko"
,
"
51
"
:
"ku"
,
51
:
"ku"
,
"
52
"
:
"la"
,
52
:
"la"
,
"
53
"
:
"lb"
,
53
:
"lb"
,
"
54
"
:
"lt"
,
54
:
"lt"
,
"
55
"
:
"lv"
,
55
:
"lv"
,
"
56
"
:
"mk"
,
56
:
"mk"
,
"
57
"
:
"ml"
,
57
:
"ml"
,
"
58
"
:
"mn"
,
58
:
"mn"
,
"
59
"
:
"mr"
,
59
:
"mr"
,
"
60
"
:
"ms"
,
60
:
"ms"
,
"
61
"
:
"my"
,
61
:
"my"
,
"
62
"
:
"nds"
,
62
:
"nds"
,
"
63
"
:
"ne"
,
63
:
"ne"
,
"
64
"
:
"nl"
,
64
:
"nl"
,
"
65
"
:
"nn"
,
65
:
"nn"
,
"
66
"
:
"no"
,
66
:
"no"
,
"
67
"
:
"oc"
,
67
:
"oc"
,
"
68
"
:
"pl"
,
68
:
"pl"
,
"
69
"
:
"pt"
,
69
:
"pt"
,
"
70
"
:
"ro"
,
70
:
"ro"
,
"
71
"
:
"ru"
,
71
:
"ru"
,
"
72
"
:
"scn"
,
72
:
"scn"
,
"
73
"
:
"sco"
,
73
:
"sco"
,
"
74
"
:
"sh"
,
74
:
"sh"
,
"
75
"
:
"si"
,
75
:
"si"
,
"
76
"
:
"simple"
,
76
:
"simple"
,
"
77
"
:
"sk"
,
77
:
"sk"
,
"
78
"
:
"sl"
,
78
:
"sl"
,
"
79
"
:
"sq"
,
79
:
"sq"
,
"
80
"
:
"sr"
,
80
:
"sr"
,
"
81
"
:
"sv"
,
81
:
"sv"
,
"
82
"
:
"sw"
,
82
:
"sw"
,
"
83
"
:
"ta"
,
83
:
"ta"
,
"
84
"
:
"te"
,
84
:
"te"
,
"
85
"
:
"th"
,
85
:
"th"
,
"
86
"
:
"tl"
,
86
:
"tl"
,
"
87
"
:
"tr"
,
87
:
"tr"
,
"
88
"
:
"tt"
,
88
:
"tt"
,
"
89
"
:
"uk"
,
89
:
"uk"
,
"
90
"
:
"ur"
,
90
:
"ur"
,
"
91
"
:
"uz"
,
91
:
"uz"
,
"
92
"
:
"vi"
,
92
:
"vi"
,
"
93
"
:
"war"
,
93
:
"war"
,
"
94
"
:
"wuu"
,
94
:
"wuu"
,
"
95
"
:
"yi"
,
95
:
"yi"
,
"
96
"
:
"zh"
,
96
:
"zh"
,
"
97
"
:
"zh_classical"
,
97
:
"zh_classical"
,
"
98
"
:
"zh_min_nan"
,
98
:
"zh_min_nan"
,
"
99
"
:
"zh_yue"
,
99
:
"zh_yue"
,
},
},
"lang2id"
:
{
"lang2id"
:
{
"af"
:
0
,
"af"
:
0
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment