paddlenlp.data.tokenizer.po 2.93 KB
Newer Older
yuguo-Jack's avatar
yuguo-Jack committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2021, PaddleNLP
# This file is distributed under the same license as the PaddleNLP package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2022.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PaddleNLP \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-03-18 21:31+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.9.0\n"

#: ../source/paddlenlp.data.tokenizer.rst:2
msgid "tokenizer"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer:1
msgid "基类::class:`paddlenlp.data.tokenizer.BaseTokenizer`"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer:1
msgid ""
"Constructs a tokenizer based on `jieba "
"<https://github.com/fxsjy/jieba>`__. It supports :meth:`cut` method to "
"split the text to tokens, and :meth:`encode` method to covert text to "
"token ids."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer
#: paddlenlp.data.tokenizer.JiebaTokenizer.cut
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode
msgid "参数"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer:5
msgid "An instance of :class:`paddlenlp.data.Vocab`."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:1
msgid "The method used to cut the text to tokens."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:3
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:5
msgid "The text that needs to be cuted."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:5
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:7
msgid ""
"Whether to use the full mode. If True, using full mode that gets all the "
"possible words from the sentence, which is fast but not accurate. If "
"False, using accurate mode that attempts to cut the sentence into the "
"most accurate segmentations, which is suitable for text analysis. "
"Default: False."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:12
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:14
msgid "Whether to use the HMM model. Default: True."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode
msgid "返回"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:15
msgid "A list of tokens."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode
msgid "返回类型"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:19
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:21
msgid "示例"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.encode:1
msgid ""
"The method used to convert the text to ids. It will firstly call "
":meth:`cut` method to cut the text to tokens. Then, convert tokens to ids"
" using `vocab`."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.encode:17
msgid "A list of ids."
msgstr ""