paddlenlp.data.tokenizer.po

# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2021, PaddleNLP
# This file is distributed under the same license as the PaddleNLP package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2022.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PaddleNLP \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-03-18 21:31+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.9.0\n"

#: ../source/paddlenlp.data.tokenizer.rst:2
msgid "tokenizer"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer:1
msgid "基类：:class:`paddlenlp.data.tokenizer.BaseTokenizer`"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer:1
msgid ""
"Constructs a tokenizer based on `jieba "
"<https://github.com/fxsjy/jieba>`__. It supports :meth:`cut` method to "
"split the text to tokens, and :meth:`encode` method to covert text to "
"token ids."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer
#: paddlenlp.data.tokenizer.JiebaTokenizer.cut
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode
msgid "参数"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer:5
msgid "An instance of :class:`paddlenlp.data.Vocab`."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:1
msgid "The method used to cut the text to tokens."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:3
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:5
msgid "The text that needs to be cuted."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:5
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:7
msgid ""
"Whether to use the full mode. If True, using full mode that gets all the "
"possible words from the sentence, which is fast but not accurate. If "
"False, using accurate mode that attempts to cut the sentence into the "
"most accurate segmentations, which is suitable for text analysis. "
"Default: False."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:12
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:14
msgid "Whether to use the HMM model. Default: True."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode
msgid "返回"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:15
msgid "A list of tokens."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode
msgid "返回类型"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.cut:19
#: paddlenlp.data.tokenizer.JiebaTokenizer.encode:21
msgid "示例"
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.encode:1
msgid ""
"The method used to convert the text to ids. It will firstly call "
":meth:`cut` method to cut the text to tokens. Then, convert tokens to ids"
" using `vocab`."
msgstr ""

#: of paddlenlp.data.tokenizer.JiebaTokenizer.encode:17
msgid "A list of ids."
msgstr ""