Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
8678ff8d
"git@developer.sourcefind.cn:OpenDAS/torch-scatter.git" did not exist on "da9c7466c1dabd566e45fbe721ac1d0d49767174"
Commit
8678ff8d
authored
Aug 30, 2019
by
thomwolf
Browse files
adding 17 and 100 xlm models
parent
82462c5c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
244 additions
and
3 deletions
+244
-3
pytorch_transformers/tokenization_xlm.py
pytorch_transformers/tokenization_xlm.py
+244
-3
No files found.
pytorch_transformers/tokenization_xlm.py
View file @
8678ff8d
...
@@ -49,7 +49,7 @@ PRETRAINED_VOCAB_FILES_MAP = {
...
@@ -49,7 +49,7 @@ PRETRAINED_VOCAB_FILES_MAP = {
'xlm-clm-ende-1024'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-ende-1024-vocab.json"
,
'xlm-clm-ende-1024'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-ende-1024-vocab.json"
,
'xlm-mlm-17-1280'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-vocab.json"
,
'xlm-mlm-17-1280'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-vocab.json"
,
'xlm-mlm-100-1280'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-vocab.json"
,
'xlm-mlm-100-1280'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-vocab.json"
,
}
}
,
'merges_file'
:
'merges_file'
:
{
{
'xlm-mlm-en-2048'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-merges.txt"
,
'xlm-mlm-en-2048'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-merges.txt"
,
...
@@ -167,8 +167,249 @@ PRETRAINED_INIT_CONFIGURATION = {
...
@@ -167,8 +167,249 @@ PRETRAINED_INIT_CONFIGURATION = {
"1"
:
"en"
},
"1"
:
"en"
},
"lang2id"
:
{
"de"
:
0
,
"lang2id"
:
{
"de"
:
0
,
"en"
:
1
}},
"en"
:
1
}},
'xlm-mlm-17-1280'
:
{
"do_lowercase_and_remove_accent"
:
False
},
'xlm-mlm-17-1280'
:
{
"do_lowercase_and_remove_accent"
:
False
,
'xlm-mlm-100-1280'
:
{
"do_lowercase_and_remove_accent"
:
False
},
"id2lang"
:
{
"0"
:
"ar"
,
"1"
:
"de"
,
"2"
:
"en"
,
"3"
:
"es"
,
"4"
:
"fr"
,
"5"
:
"hi"
,
"6"
:
"it"
,
"7"
:
"ja"
,
"8"
:
"ko"
,
"9"
:
"nl"
,
"10"
:
"pl"
,
"11"
:
"pt"
,
"12"
:
"ru"
,
"13"
:
"sv"
,
"14"
:
"tr"
,
"15"
:
"vi"
,
"16"
:
"zh"
},
"lang2id"
:
{
"ar"
:
0
,
"de"
:
1
,
"en"
:
2
,
"es"
:
3
,
"fr"
:
4
,
"hi"
:
5
,
"it"
:
6
,
"ja"
:
7
,
"ko"
:
8
,
"nl"
:
9
,
"pl"
:
10
,
"pt"
:
11
,
"ru"
:
12
,
"sv"
:
13
,
"tr"
:
14
,
"vi"
:
15
,
"zh"
:
16
}},
'xlm-mlm-100-1280'
:
{
"do_lowercase_and_remove_accent"
:
False
,
"id2lang"
:
{
"0"
:
"af"
,
"1"
:
"als"
,
"2"
:
"am"
,
"3"
:
"an"
,
"4"
:
"ang"
,
"5"
:
"ar"
,
"6"
:
"arz"
,
"7"
:
"ast"
,
"8"
:
"az"
,
"9"
:
"bar"
,
"10"
:
"be"
,
"11"
:
"bg"
,
"12"
:
"bn"
,
"13"
:
"br"
,
"14"
:
"bs"
,
"15"
:
"ca"
,
"16"
:
"ceb"
,
"17"
:
"ckb"
,
"18"
:
"cs"
,
"19"
:
"cy"
,
"20"
:
"da"
,
"21"
:
"de"
,
"22"
:
"el"
,
"23"
:
"en"
,
"24"
:
"eo"
,
"25"
:
"es"
,
"26"
:
"et"
,
"27"
:
"eu"
,
"28"
:
"fa"
,
"29"
:
"fi"
,
"30"
:
"fr"
,
"31"
:
"fy"
,
"32"
:
"ga"
,
"33"
:
"gan"
,
"34"
:
"gl"
,
"35"
:
"gu"
,
"36"
:
"he"
,
"37"
:
"hi"
,
"38"
:
"hr"
,
"39"
:
"hu"
,
"40"
:
"hy"
,
"41"
:
"ia"
,
"42"
:
"id"
,
"43"
:
"is"
,
"44"
:
"it"
,
"45"
:
"ja"
,
"46"
:
"jv"
,
"47"
:
"ka"
,
"48"
:
"kk"
,
"49"
:
"kn"
,
"50"
:
"ko"
,
"51"
:
"ku"
,
"52"
:
"la"
,
"53"
:
"lb"
,
"54"
:
"lt"
,
"55"
:
"lv"
,
"56"
:
"mk"
,
"57"
:
"ml"
,
"58"
:
"mn"
,
"59"
:
"mr"
,
"60"
:
"ms"
,
"61"
:
"my"
,
"62"
:
"nds"
,
"63"
:
"ne"
,
"64"
:
"nl"
,
"65"
:
"nn"
,
"66"
:
"no"
,
"67"
:
"oc"
,
"68"
:
"pl"
,
"69"
:
"pt"
,
"70"
:
"ro"
,
"71"
:
"ru"
,
"72"
:
"scn"
,
"73"
:
"sco"
,
"74"
:
"sh"
,
"75"
:
"si"
,
"76"
:
"simple"
,
"77"
:
"sk"
,
"78"
:
"sl"
,
"79"
:
"sq"
,
"80"
:
"sr"
,
"81"
:
"sv"
,
"82"
:
"sw"
,
"83"
:
"ta"
,
"84"
:
"te"
,
"85"
:
"th"
,
"86"
:
"tl"
,
"87"
:
"tr"
,
"88"
:
"tt"
,
"89"
:
"uk"
,
"90"
:
"ur"
,
"91"
:
"uz"
,
"92"
:
"vi"
,
"93"
:
"war"
,
"94"
:
"wuu"
,
"95"
:
"yi"
,
"96"
:
"zh"
,
"97"
:
"zh_classical"
,
"98"
:
"zh_min_nan"
,
"99"
:
"zh_yue"
},
"lang2id"
:
{
"af"
:
0
,
"als"
:
1
,
"am"
:
2
,
"an"
:
3
,
"ang"
:
4
,
"ar"
:
5
,
"arz"
:
6
,
"ast"
:
7
,
"az"
:
8
,
"bar"
:
9
,
"be"
:
10
,
"bg"
:
11
,
"bn"
:
12
,
"br"
:
13
,
"bs"
:
14
,
"ca"
:
15
,
"ceb"
:
16
,
"ckb"
:
17
,
"cs"
:
18
,
"cy"
:
19
,
"da"
:
20
,
"de"
:
21
,
"el"
:
22
,
"en"
:
23
,
"eo"
:
24
,
"es"
:
25
,
"et"
:
26
,
"eu"
:
27
,
"fa"
:
28
,
"fi"
:
29
,
"fr"
:
30
,
"fy"
:
31
,
"ga"
:
32
,
"gan"
:
33
,
"gl"
:
34
,
"gu"
:
35
,
"he"
:
36
,
"hi"
:
37
,
"hr"
:
38
,
"hu"
:
39
,
"hy"
:
40
,
"ia"
:
41
,
"id"
:
42
,
"is"
:
43
,
"it"
:
44
,
"ja"
:
45
,
"jv"
:
46
,
"ka"
:
47
,
"kk"
:
48
,
"kn"
:
49
,
"ko"
:
50
,
"ku"
:
51
,
"la"
:
52
,
"lb"
:
53
,
"lt"
:
54
,
"lv"
:
55
,
"mk"
:
56
,
"ml"
:
57
,
"mn"
:
58
,
"mr"
:
59
,
"ms"
:
60
,
"my"
:
61
,
"nds"
:
62
,
"ne"
:
63
,
"nl"
:
64
,
"nn"
:
65
,
"no"
:
66
,
"oc"
:
67
,
"pl"
:
68
,
"pt"
:
69
,
"ro"
:
70
,
"ru"
:
71
,
"scn"
:
72
,
"sco"
:
73
,
"sh"
:
74
,
"si"
:
75
,
"simple"
:
76
,
"sk"
:
77
,
"sl"
:
78
,
"sq"
:
79
,
"sr"
:
80
,
"sv"
:
81
,
"sw"
:
82
,
"ta"
:
83
,
"te"
:
84
,
"th"
:
85
,
"tl"
:
86
,
"tr"
:
87
,
"tt"
:
88
,
"uk"
:
89
,
"ur"
:
90
,
"uz"
:
91
,
"vi"
:
92
,
"war"
:
93
,
"wuu"
:
94
,
"yi"
:
95
,
"zh"
:
96
,
"zh_classical"
:
97
,
"zh_min_nan"
:
98
,
"zh_yue"
:
99
}},
}
}
def
get_pairs
(
word
):
def
get_pairs
(
word
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment