Commit a52e53db authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #2680 canceled with stages
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/framework/qwen_agent.rst:2
#: aaed24d3edd64e6ab1f20188f3d5ba24
msgid "Qwen-Agent"
msgstr "Qwen-Agent"
#: ../../Qwen/source/framework/qwen_agent.rst:5
#: 1cbbb8d342f243c58e0d66a3e44daac8
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/framework/qwen_agent.rst:7
#: 3e1dbee121bc4a6c91a26618e27c0d86
msgid "`Qwen-Agent <https://github.com/QwenLM/Qwen-Agent>`__ is a framework for developing LLM applications based on the instruction following, tool usage, planning, and memory capabilities of Qwen. It also comes with example applications such as Browser Assistant, Code Interpreter, and Custom Assistant."
msgstr "`Qwen-Agent <https://github.com/QwenLM/Qwen-Agent>`__ 是一个基于 Qwen 的指令跟随、工具使用、计划和记忆能力来开发 LLM 应用程序的框架。它还附带了一些示例应用程序,例如浏览器助手、代码解释器和自定义助手。"
#: ../../Qwen/source/framework/qwen_agent.rst:14
#: f180730da09640169fb93950a2e8cb5f
msgid "Installation"
msgstr "安装"
#: ../../Qwen/source/framework/qwen_agent.rst:23
#: 89f39ac4160d49fba7f9d52dce6527c3
msgid "Developing Your Own Agent"
msgstr "开发您自己的智能体"
#: ../../Qwen/source/framework/qwen_agent.rst:25
#: 307456721ed7469eb7b8f636483188f4
msgid "Qwen-Agent provides atomic components such as LLMs and prompts, as well as high-level components such as Agents. The example below uses the Assistant component as an illustration, demonstrating how to add custom tools and quickly develop an agent that uses tools."
msgstr "Qwen-Agent 提供包括语言模型和提示词等原子级组件,及智能体等高级组件在内的多种组件。以下示例选取助理组件进行展示,阐述了如何整合自定义工具以及如何迅速开发出一个能够应用这些工具的代理程序。"
#: ../../Qwen/source/framework/qwen_agent.rst:94
#: 13034806dd414e19a5f53ece31d0fa16
msgid "The framework also provides more atomic components for developers to combine. For additional showcases, please refer to `examples <https://github.com/QwenLM/Qwen-Agent/tree/main/examples>`__."
msgstr "该框架还为开发者提供了更多的原子组件以供组合使用。欲了解更多示例,请参见 `examples <https://github.com/QwenLM/Qwen-Agent/tree/main/examples>`__。"
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/getting_started/concepts.md:1
#: 581ec8a4d8dd4b5a99caf167b796a6e9
msgid "Key Concepts"
msgstr "核心概念"
#: ../../Qwen/source/getting_started/concepts.md:4
#: fc803dd8f02a4caf9be29e42364659a0
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/getting_started/concepts.md:7
#: 834244ff25a040fe91f63682732dd416
msgid "Qwen"
msgstr "通义千问 (Qwen)"
#: ../../Qwen/source/getting_started/concepts.md:9
#: ee9dee3630614908860b2144007186fd
msgid "Qwen (Chinese: 通义千问; pinyin: _Tongyi Qianwen_) is the large language model and large multimodal model series of the Qwen Team, Alibaba Group. Qwen is capable of natural language understanding, text generation, vision understanding, audio understanding, tool use, role play, playing as AI agent, etc. Both language models and multimodal models are pre-trained on large-scale multilingual and multimodal data and post-trained on quality data for aligning to human preferences."
msgstr "通义千问(英文: Qwen ;读作: _kùn_)是由阿里巴巴通义千问团队开发的大规模语言和多模态系列模型。通义千问可以执行自然语言理解、文本生成、视觉理解、音频理解、工具调用、角色扮演、智能体等多种任务。语言和多模态模型均在大规模、多语言、多模态数据上进行预训练,并在高质量语料上后训练以与人类偏好对齐。"
#: ../../Qwen/source/getting_started/concepts.md:13
#: 6a37d9a0b6e2414a9b7ede0e095476af
msgid "There is the proprietary version and the open-weight version."
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:15
#: 4fba11f4661b4e469f88dc3917b27427
msgid "The proprietary versions include"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:16
#: ../../Qwen/source/getting_started/concepts.md:31
#: be8423cea0b447c2b15de596c120f541 d07679ae34d0463f96aeff896a759118
msgid "Qwen: the language models"
msgstr "通义千问 (Qwen):语言模型"
#: ../../Qwen/source/getting_started/concepts.md:17
#: a1461ec445034ba099aa58b1a13375a0
#, fuzzy
msgid "Qwen Max"
msgstr "通义千问 (Qwen)"
#: ../../Qwen/source/getting_started/concepts.md:18
#: 19f8d7108d69464a8d1ce2980c1e4e92
#, fuzzy
msgid "Qwen Plus"
msgstr "通义千问 (Qwen)"
#: ../../Qwen/source/getting_started/concepts.md:19
#: ede369bc8dd24052ad674131f4a3b68a
msgid "Qwen Turbo"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:20
#: ../../Qwen/source/getting_started/concepts.md:36
#: ddb0acdec40b4f79a3e6517f86727e4b e4df2227d36a46ee8644ce77f9fc1dc0
msgid "Qwen-VL: the vision-language models"
msgstr "通义千问 VL (Qwen-VL): 视觉语言模型"
#: ../../Qwen/source/getting_started/concepts.md:21
#: f9f5a5b50af44e90999a87661cdf4e5a
msgid "Qwen-VL Max"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:22
#: fd0074955211498c8520ef3405bf312f
msgid "Qwen-VL Plus"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:23
#: 40c8d32d570c4a76a5392c8e296c3793
msgid "Qwen-VL OCR"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:24
#: ../../Qwen/source/getting_started/concepts.md:39
#: c0e45bd6e6b44ac7b18ef6a511c0999e f84666b662ab4d5ea41766d46f34fbc0
msgid "Qwen-Audio: the audio-language models"
msgstr "通义千问 Audio: 音频语言模型"
#: ../../Qwen/source/getting_started/concepts.md:25
#: 0584dbb5e76949ea965661c535e982d7
msgid "Qwen-Audio Turbo"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:26
#: aa78dd31bce94f6db05be93976278455
msgid "Qwen-Audio ASR"
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:28
#: df255434cec04d12b8e2d048d4e5baf8
msgid "You can learn more about them at Alibaba Cloud Model Studio ([China Site](https://help.aliyun.com/zh/model-studio/getting-started/models#9f8890ce29g5u) \\[zh\\], [International Site](https://www.alibabacloud.com/en/product/modelstudio))."
msgstr ""
#: ../../Qwen/source/getting_started/concepts.md:30
#: bc0fbc68d29b49da90efba3358f5013f
msgid "The spectrum for the open-weight models spans over"
msgstr "开源模型包括:"
#: ../../Qwen/source/getting_started/concepts.md:32
#: e3107d97ea1b4e0284c2a33c0da02813
msgid "[Qwen](https://github.com/QwenLM/Qwen): 1.8B, 7B, 14B, and 72B models"
msgstr "[Qwen](https://github.com/QwenLM/Qwen): 1.8B、 7B、 14B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:33
#: 8918b660d015430a8d14c3c62b87b19d
msgid "[Qwen1.5](https://github.com/QwenLM/Qwen1.5/tree/v1.5): 0.5B, 1.8B, 4B, 14BA2.7B, 7B, 14B, 32B, 72B, and 110B models"
msgstr "[Qwen1.5](https://github.com/QwenLM/Qwen1.5/tree/v1.5): 0.5B、 1.8B、 4B、 14BA2.7B、 7B、 14B、 32B、 72B 及 110B 模型"
#: ../../Qwen/source/getting_started/concepts.md:34
#: c5ad94aa9d524a7290d9d0ec35321641
msgid "[Qwen2](https://github.com/QwenLM/Qwen2/tree/v2.0): 0.5B, 1.5B, 7B, 57A14B, and 72B models"
msgstr "[Qwen2](https://github.com/QwenLM/Qwen2/tree/v2.0): 0.5B、 1.5B、 7B、 57A14B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:35
#: 5c38bd713ca847b4bb552971cdd75a99
msgid "[Qwen2.5](https://github.com/QwenLM/Qwen2.5/): 0.5B, 1.5B, 3B, 7B, 14B, 32B, and 72B models"
msgstr "[Qwen2.5](https://github.com/QwenLM/Qwen2.5/): 0.5B、 1.5B、 3B、 7B、 14B、 32B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:37
#: aa36bbcafdf742a9addd2a7b32705a02
msgid "[Qwen-VL](https://github.com/QwenLM/Qwen-VL): 7B-based models"
msgstr "[Qwen-VL](https://github.com/QwenLM/Qwen-VL): 基于 7B 的模型"
#: ../../Qwen/source/getting_started/concepts.md:38
#: 9d1d663950d34fefbfc7df37fa1def7a
msgid "[Qwen2-VL](https://github.com/QwenLM/Qwen2-VL): 2B, 7B, and 72B-based models"
msgstr "[Qwen-VL](https://github.com/QwenLM/Qwen2-VL): 基于 2B 、 7B 和 72B 的模型"
#: ../../Qwen/source/getting_started/concepts.md:40
#: bb8a3431ea1f4cc99b4b8dd78e55d9ad
msgid "[Qwen-Audio](https://github.com/QwenLM/Qwen-Audio): 7B-based model"
msgstr "[Qwen-Audio](https://github.com/QwenLM/Qwen-Audio): 基于 7B 的模型"
#: ../../Qwen/source/getting_started/concepts.md:41
#: 2421a5d0f547440bbf0211274bf44d5d
msgid "[Qwen2-Audio](https://github.com/QwenLM/Qwen2-Audio): 7B-based models"
msgstr "[Qwen2-Audio](https://github.com/QwenLM/Qwen2-Audio): 基于 7B 的模型"
#: ../../Qwen/source/getting_started/concepts.md:42
#: df8610d7dfbf4651a955dc909b727061
#, fuzzy
msgid "Q*Q: the reasoning models"
msgstr "通义千问 (Qwen):语言模型"
#: ../../Qwen/source/getting_started/concepts.md:43
#: 67e209da7f5848adab885598a9069f11
#, fuzzy
msgid "[QwQ-Preview](https://github.com/QwenLM/Qwen2.5/): 32B LLM"
msgstr "[Qwen2.5-Coder](https://github.com/QwenLM/Qwen2.5-Coder): 7B 模型"
#: ../../Qwen/source/getting_started/concepts.md:44
#: 4e9ad66b735a4109ab8ec727486c463c
#, fuzzy
msgid "[QVQ-Preview](https://github.com/QwenLM/Qwen2-VL): 72B VLM"
msgstr "[Qwen-VL](https://github.com/QwenLM/Qwen-VL): 基于 7B 的模型"
#: ../../Qwen/source/getting_started/concepts.md:45
#: 728cd9f1dc9d4502ad9a3702e802fc2e
msgid "CodeQwen/Qwen-Coder: the language models for coding"
msgstr "Code通义千问 / 通义千问Coder:代码语言模型"
#: ../../Qwen/source/getting_started/concepts.md:46
#: 133fd513d7084b54bfe910fda13a42ec
msgid "[CodeQwen1.5](https://github.com/QwenLM/CodeQwen1.5): 7B models"
msgstr "[CodeQwen1.5](https://github.com/QwenLM/CodeQwen1.5): 7B 模型"
#: ../../Qwen/source/getting_started/concepts.md:47
#: a903957acc0d458b8200788144be0b4d
#, fuzzy
msgid "[Qwen2.5-Coder](https://github.com/QwenLM/Qwen2.5-Coder): 0.5B, 1.5B, 3B, 7B, 14B, and 32B models"
msgstr "[Qwen2.5](https://github.com/QwenLM/Qwen2.5/): 0.5B、 1.5B、 3B、 7B、 14B、 32B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:48
#: 6c47a9310a6945719b35da4bff3e0c9e
msgid "Qwen-Math: the language models for mathematics"
msgstr "通义千问 Math:数学语言模型"
#: ../../Qwen/source/getting_started/concepts.md:49
#: fadbf7de806d4f288fc4355b52bcc060
msgid "[Qwen2-Math](https://github.com/QwenLM/Qwen2-Math): 1.5B, 7B, and 72B models"
msgstr "[Qwen2-Math](https://github.com/QwenLM/Qwen2-Math): 1.5B、 7B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:50
#: 0066352e253345288d16bb1a8df40e1c
msgid "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math): 1.5B, 7B, and 72B models"
msgstr "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math): 1.5B、 7B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:51
#: b45ed6f1601c41f8a33f6b2b6ff8b47b
#, fuzzy
msgid "Qwen-Math-RM: the reward models for mathematics"
msgstr "通义千问 Math:数学语言模型"
#: ../../Qwen/source/getting_started/concepts.md:52
#: 286e8dd455ef4bab91821d399dd4a582
#, fuzzy
msgid "[Qwen2-Math-RM](https://github.com/QwenLM/Qwen2-Math): 72B models"
msgstr "[Qwen2-Math](https://github.com/QwenLM/Qwen2-Math): 1.5B、 7B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:53
#: 81eb8401de1646309924a74e633b9b45
#, fuzzy
msgid "[Qwen2.5-Math-RM](https://github.com/QwenLM/Qwen2.5-Math): 72B models"
msgstr "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math): 1.5B、 7B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:54
#: e0cd026299ba4809a86504afbe2dd8d5
#, fuzzy
msgid "[Qwen2.5-Math-PRM](https://github.com/QwenLM/Qwen2.5-Math): 7B and 72B models"
msgstr "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math): 1.5B、 7B 及 72B 模型"
#: ../../Qwen/source/getting_started/concepts.md:56
#: acec8c22ff094ebe8295cad38ec7a8db
msgid "**In this document, our focus is Qwen, the language models.**"
msgstr "**本文档针对通义千问 (Qwen) 语言模型。**"
#: ../../Qwen/source/getting_started/concepts.md:58
#: e1e6ade4e85b4975bf992ed0a9c99140
msgid "Causal Language Models"
msgstr "因果语言模型 (Causal Language Models)"
#: ../../Qwen/source/getting_started/concepts.md:60
#: 593921d01e7a41caa52eda69db81c908
msgid "Causal language models, also known as autoregressive language models or decoder-only language models, are a type of machine learning model designed to predict the next token in a sequence based on the preceding tokens. In other words, they generate text one token at a time, using the previously generated tokens as context. The \"causal\" aspect refers to the fact that the model only considers the past context (the already generated tokens) when predicting the next token, not any future tokens."
msgstr "因果语言模型 (causal Language Models),也被称为自回归语言模型 (autoregressive language models) 或仅解码器语言模型 (decoder-only language models) ,是一种机器学习模型,旨在根据序列中的前导 token 预测下一个 token 。换句话说,它使用之前生成的 token 作为上下文,一次生成一个 token 的文本。\"因果\"方面指的是模型在预测下一个 token 时只考虑过去的上下文(即已生成的 token ),而不考虑任何未来的 token 。"
#: ../../Qwen/source/getting_started/concepts.md:64
#: 4b31da2c06c54107857edcb2764e0019
msgid "Causal language models are widely used for various natural language processing tasks involving text completion and generation. They have been particularly successful in generating coherent and contextually relevant text, making them a cornerstone of modern natural language understanding and generation systems."
msgstr "因果语言模型被广泛用于涉及文本补全和生成的各种自然语言处理任务。它们在生成连贯且具有上下文关联性的文本方面尤其成功,这使得它们成为现代自然语言理解和生成系统的基础。"
#: ../../Qwen/source/getting_started/concepts.md:67
#: 98f73b1f049641038ec1b310a219b209
msgid "**Takeaway: Qwen models are causal language models suitable for text completion.**"
msgstr "**要点:Qwen 模型是适用于文本补全的因果语言模型。**"
#: ../../Qwen/source/getting_started/concepts.md
#: 2f5c19be905046e1ae669119e3bb6e7c
msgid "Learn more about language models"
msgstr "了解更多关于语言模型的信息"
#: ../../Qwen/source/getting_started/concepts.md:71
#: 557d7c8bafb94a34b76b6d96a3ce46ff
msgid "They are three main kinds of models that are commonly referred to as language models in deep learning:"
msgstr "在深度学习中,被称为语言模型的主要有三类:"
#: ../../Qwen/source/getting_started/concepts.md:72
#: 89ef0f95d0f5492f877ddceb0233d2fc
msgid "Sequence-to-sequence models: T5 and the likes"
msgstr "序列到序列模型 (sequence-to-sequence models):T5及其类似模型"
#: ../../Qwen/source/getting_started/concepts.md:74
#: 80f14b7e5beb41d7920772b053681e24
msgid "Sequence-to-sequence models use both an encoder to capture the entire input sequence and a decoder to generate an output sequence. They are widely used for tasks like machine translation, text summarization, etc."
msgstr "序列到序列模型同时使用编码器来捕获整个输入序列,以及解码器来生成输出序列。它们广泛应用于诸如机器翻译、文本摘要等任务。"
#: ../../Qwen/source/getting_started/concepts.md:77
#: 0b15c87feae5409f80999e86ad5f5942
msgid "Bidirectional models or encoder-only models: BERT and the likes"
msgstr "双向模型 (bidirectional models) 或仅编码器模型 (encoder-only models) :BERT及其类似模型"
#: ../../Qwen/source/getting_started/concepts.md:79
#: 7439fe506ee64fbfaba86bb409cb76ca
msgid "Bidirectional models can access both past and future context in a sequence during training. They cannot generate sequential outputs in real-time due to the need for future context. They are widely used as embedding models and subsequently used for text classification."
msgstr "双向模型在训练期间可以访问序列中的过去和未来上下文。由于需要未来上下文,它们无法实时生成顺序输出。它们广泛用作嵌入模型,并随后用于文本分类。"
#: ../../Qwen/source/getting_started/concepts.md:83
#: c7f7ae809802445bbaafc7d7f783c71a
msgid "Casual language models or decoder-only models: GPT and the likes"
msgstr "因果语言模型 (casual language models) 或仅解码器模型 (decoder-only models) :GPT及其类似模型"
#: ../../Qwen/source/getting_started/concepts.md:85
#: b2825bdbf41c485c849444fc734fde43
msgid "Causal language models operate unidirectionally in a strictly forward direction, predicting each subsequent word based only on the previous words in the sequence. This unidirectional nature ensures that the model's predictions do not rely on future context, making them suitable for tasks like text completion and generation."
msgstr "因果语言模型以严格向前的单向方式运行,仅根据序列中的前导词汇预测每个后续词汇。这种单向性确保了模型的预测不依赖于未来上下文,使它们适合于文本补全和生成等任务。"
#: ../../Qwen/source/getting_started/concepts.md:89
#: 26bfa80a4e224b9ca3494f83fc37b0b6
msgid "Pre-training & Base models"
msgstr "预训练 (Pre-training) 和基模型 (Base models)"
#: ../../Qwen/source/getting_started/concepts.md:91
#: d75a1bc5132a43e8b41ce24b8021e7ab
msgid "Base language models are foundational models trained on extensive corpora of text to predict the next word in a sequence. Their main goal is to capture the statistical patterns and structures of language, enabling them to generate coherent and contextually relevant text. These models are versatile and can be adapted to various natural language processing tasks through fine-tuning. While adept at producing fluent text, they may require in-context learning or additional training to follow specific instructions or perform complex reasoning tasks effectively. For Qwen models, the base models are those without \"-Instruct\" indicators, such as Qwen2.5-7B and Qwen2.5-72B."
msgstr "基础语言模型 (base language models) 是在大量文本语料库上训练的基本模型,用于预测序列中的下一个词。它们的主要目标是捕捉语言的统计模式和结构,使它们能够生成连贯且具有上下文关联性的文本。这些模型具有多功能性,可以通过微调适应各种自然语言处理任务。虽然擅长生成流畅的文本,但它们可能需要情境学习 (in-context learning)或额外训练才能遵循特定指令或有效执行复杂推理任务。对于 Qwen 模型,基础模型是指那些没有 \"-Instruct\" 标识符的模型,例如 Qwen2.5-7B 和 Qwen2.5-72B 。"
#: ../../Qwen/source/getting_started/concepts.md:97
#: 7f7321ea84f34e29beabf6122a77ec64
msgid "**Takeaway: Use base models for in-context learning, downstream fine-tuning, etc.**"
msgstr "**要点:使用基础模型进行情境学习、下游微调等。**"
#: ../../Qwen/source/getting_started/concepts.md:99
#: b1d8ca8221c0494796dda85ac2456389
msgid "Post-training & Instruction-tuned models"
msgstr "后训练 (Post-training) 和指令微调模型 (Instruction-tuned models)"
#: ../../Qwen/source/getting_started/concepts.md:101
#: 2f55c1d2c9234c44ab55bf90fcb1b10f
msgid "Instruction-tuned language models are specialized models designed to understand and execute specific instructions in conversational styles. These models are fine-tuned to interpret user commands accurately and can perform tasks such as summarization, translation, and question answering with improved accuracy and consistency. Unlike base models, which are trained on large corpora of text, instruction-tuned models undergo additional training using datasets that contain examples of instructions and their desired outcomes, often in multiple turns. This kind of training makes them ideal for applications requiring targeted functionalities while maintaining the ability to generate fluent and coherent text. For Qwen models, the instruction-tuned models are those with the \"-Instruct\" suffix, such as Qwen2.5-7B-Instruct and Qwen2.5-72B-Instruct. [^instruct-chat]"
msgstr "指令微调语言模型 (Instruction-tuned language models) 是专门设计用于理解并以对话风格执行特定指令的模型。这些模型经过微调,能准确地解释用户命令,并能以更高的准确性和一致性执行诸如摘要、翻译和问答等任务。与在大量文本语料库上训练的基础模型不同,指令调优模型会使用包含指令示例及其预期结果的数据集进行额外训练,通常涵盖多个回合。这种训练方式使它们非常适合需要特定功能的应用,同时保持生成流畅且连贯文本的能力。对于 Qwen 模型,指令调优模型是指带有 \"-Instruct\" 后缀的模型,例如 Qwen2.5-7B-Instruct 和 Qwen2.5-72B-Instruct 。 [^instruct-chat]"
#: ../../Qwen/source/getting_started/concepts.md:107
#: d5b5590ccf434715bd57d0746f196cfe
msgid "**Takeaway: Use instruction-tuned models for conducting tasks in conversations, downstream fine-tuning, etc.**"
msgstr "**要点:使用指令微调模型进行对话式的任务执行、下游微调等。**"
#: ../../Qwen/source/getting_started/concepts.md:112
#: 5dc4cca1e5104c67b1a3bcdd004e7a9d
msgid "Tokens & Tokenization"
msgstr "Tokens & Tokenization"
#: ../../Qwen/source/getting_started/concepts.md:114
#: 9e3a74bf95fd40e49fef921a0d0df6ff
msgid "Tokens represent the fundamental units that models process and generate. They can represent texts in human languages (regular tokens) or represent specific functionality like keywords in programming languages (control tokens [^special]). Typically, a tokenizer is used to split text into regular tokens, which can be words, subwords, or characters depending on the specific tokenization scheme employed, and furnish the token sequence with control tokens as needed. The vocabulary size, or the total number of unique tokens a model recognizes, significantly impacts its performance and versatility. Larger language models often use sophisticated tokenization methods to handle the vast diversity of human language while keeping the vocabulary size manageable. Qwen use a relatively large vocabulary of 151,646 tokens in total."
msgstr "token 代表模型处理和生成的基本单位。它们可以表示人类语言中的文本(常规 token),或者表示特定功能,如编程语言中的关键字(控制 token [^special])。通常,使用 tokenizer 将文本分割成常规 token ,这些 token 可以是单词、子词或字符,具体取决于所采用的特定 tokenization 方案,并按需为 token 序列添加控制 token 。词表大小,即模型识别的唯一 token 总数,对模型的性能和多功能性有重大影响。大型语言模型通常使用复杂的 tokenization 来处理人类语言的广阔多样性,同时保持词表大小可控。Qwen 词表相对较大,有 15 1646 个 token。"
#: ../../Qwen/source/getting_started/concepts.md:123
#: 9e1c049b23fc403ea61919a755ae865a
msgid "**Takeaway: Tokenization method and vocabulary size is important.**"
msgstr "**要点:tokenization 和词表大小很重要。**"
#: ../../Qwen/source/getting_started/concepts.md:125
#: 0a01476839134505b1e2e004f67c876b
msgid "Byte-level Byte Pair Encoding"
msgstr "Byte-level Byte Pair Encoding"
#: ../../Qwen/source/getting_started/concepts.md:127
#: e461340d6e834aaeb233649a70618165
msgid "Qwen adopts a subword tokenization method called Byte Pair Encoding (BPE), which attempts to learn the composition of tokens that can represent the text with the fewest tokens. For example, the string \" tokenization\" is decomposed as \" token\" and \"ization\" (note that the space is part of the token). Especially, the tokenization of Qwen ensures that there is no unknown words and all texts can be transformed to token sequences."
msgstr "Qwen采用了名为字节对编码(Byte Pair Encoding,简称BPE)的子词tokenization方法,这种方法试图学习能够用最少的 token 表示文本的 token 组合。例如,字符串\"tokenization\"被分解为\" token\"和\"ization\"(注意空格是 token 的一部分)。特别地,Qwen的 tokenization 确保了不存在未知词汇,并且所有文本都可以转换为 token 序列。"
#: ../../Qwen/source/getting_started/concepts.md:131
#: af40a128cbe44fb59a057f9477737197
msgid "There are 151,643 tokens as a result of BPE in the vocabulary of Qwen, which is a large vocabulary efficient for diverse languages. As a rule of thumb, 1 token is 3~4 characters for English texts and 1.5~1.8 characters for Chinese texts."
msgstr "Qwen词表中因BPE而产生的 token 数量为 15 1643 个,这是一个适用于多种语言的大词表。一般而言,对于英语文本,1个token大约是3~4个字符;而对于中文文本,则大约是1.5~1.8个汉字。"
#: ../../Qwen/source/getting_started/concepts.md:134
#: 3b92bf813f14474f842584fa9bf4fdee
msgid "**Takeaway: Qwen processes texts in subwords and there are no unknown words.**"
msgstr "**要点:Qwen 以子词形式处理文本,不存在未知词汇。**"
#: ../../Qwen/source/getting_started/concepts.md
#: b29e165e1810403dbcd90cfedd8c73a6
msgid "Learn more about tokenization in Qwen"
msgstr "了解更多"
#: ../../Qwen/source/getting_started/concepts.md:137
#: b7fa098dbce946c9847eb414f7d52b9e
msgid "Qwen uses byte-level BPE (BBPE) on UTF-8 encoded texts. It starts by treating each byte as a token and then iteratively merges the most frequent pairs of tokens occurring the texts into larger tokens until the desired vocabulary size is met."
msgstr "Qwen 使用基于字节的BPE (BBPE) 对UTF-8编码的文本进行处理。它开始时将每个字节视为一个 token ,然后迭代地将文本中最频繁出现的 token 对合并成更大的 token,直到达到所需的词表大小。"
#: ../../Qwen/source/getting_started/concepts.md:140
#: 504bb23b689949dd9bbee78f97d7e0a0
msgid "In byte-level BPE, minimum 256 tokens are needed to tokenize every piece of text and avoid the out of vocabulary (OOV) problem. In comparison, character-level BPE needs every Unicode character in its vocabulary to avoid OOV and the Unicode Standard contains 154,998 characters as of Unicode Version 16.0."
msgstr "在基于字节的BPE中,至少需要256个 token 来对每段文本进行 tokenization,并避免未登录词(out of vocabulary, OOV)问题。相比之下,基于字符的 BPE 需要其词表中包含所有 Unicode 字符以避免未登录词,而截至 Unicode 版本16.0,Unicode标准包含 15 4998 个字符。"
#: ../../Qwen/source/getting_started/concepts.md:143
#: cfed44d0c905486cb7e12838014249e1
msgid "One limitation to keep in mind for byte-level BPE is that the individual tokens in the vocabulary may not be seemingly semantically meaningful or even valid UTF-8 byte sequences, and in certain aspects, they should be viewed as a text compression scheme."
msgstr "基于字节的BPE的一个限制是,词表中的个别 token 可能看似没有语义意义,甚至不是有效的 UTF-8 字节序列,在某些方面,它们应该被视为一种文本压缩方案。"
#: ../../Qwen/source/getting_started/concepts.md:146
#: 4c6140ebdb0742e199793a7da566943e
msgid "Control Tokens & Chat Template"
msgstr "控制 Token 和 对话模板"
#: ../../Qwen/source/getting_started/concepts.md:148
#: 7fab9c7227b94996bbdd30a2dd6a11cc
msgid "Control tokens and chat templates both serve as mechanisms to guide the model's behavior and outputs."
msgstr "控制 token 和对话模板都作为指导模型行为和输出的机制。"
#: ../../Qwen/source/getting_started/concepts.md:150
#: 9d38b62cddc34442bffc173b6c5e15ea
msgid "Control tokens are special tokens inserted into the sequence that signifies meta information. For example, in pre-training, multiple documents may be packed into a single sequence. For Qwen, the control token \"<|endoftext|>\" is inserted after each document to signify that the document has ended and a new document will proceed."
msgstr "控制token是插入到序列中的特殊token,表示元信息。例如,在预训练中,多个文档可以被打包成一个单一的序列。对于Qwen,控制令牌 \"<|endoftext|>\" 在每个文档后插入,表示文档已经结束,新的文档将开始。"
#: ../../Qwen/source/getting_started/concepts.md:154
#: aed5af70b3de447b9b3c1312f040f103
msgid "Chat templates provide a structured format for conversational interactions, where predefined placeholders or prompts are used to elicit responses from the model that adhere to a desired dialogue flow or context. Different models may use different kinds of chat template to format the conversations. It is crucial to use the designated one to ensure the precise control over the LLM's generation process."
msgstr "对话模板为对话交互提供了结构化的格式,其中使用预定义的占位符或提示来从模型中引发遵循期望的对话流程或上下文的响应。不同的模型可能使用不同类型的对话模板来格式化对话。使用指定的模板对于确保对语言模型生成过程的精确控制至关重要。"
#: ../../Qwen/source/getting_started/concepts.md:158
#: 7acbb7b28f1746a8b779a004a7dc2d93
msgid "Qwen uses the following format (ChatML[^chatml]), making use of control tokens to format each turn in the conversations"
msgstr "Qwen使用以下格式(ChatML[^chatml]),利用控制 token 来格式化对话中的每一轮。"
#: ../../Qwen/source/getting_started/concepts.md:163
#: 33f3aee8869748fa9f7a51c7efa76338
msgid "The user input take the role of `user` and the model generation takes the role of `assistant`. Qwen also supports the meta message that instruct the model to perform specific actions or generate text with certain characteristics, such as altering tone, style, or content, which takes the role of `system` and the content defaults to \"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\""
msgstr "用户输入扮演 `user` 的 role ,而模型生成则承担 `assistant` 的 role 。 Qwen 还支持元消息,该消息指导模型执行特定操作或生成具有特定特性的文本,例如改变语气、风格或内容,这将承担 `system` 的 role,且内容默认为 \"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\" 。"
#: ../../Qwen/source/getting_started/concepts.md:166
#: 0129cbc394614f5f94047592df13c9b6
msgid "The following is a full example:"
msgstr "下面为一个完整示例"
#: ../../Qwen/source/getting_started/concepts.md:183
#: 59bab0422fa34a19ab2995e6ff15dc56
msgid "Starting from Qwen2.5, the Qwen model family including multimodal and specialized models will use a unified vocabulary, which contains control tokens from all subfamilies. There are 22 control tokens in the vocabulary of Qwen2.5, making the vocabulary size totaling 151,665:"
msgstr "从 Qwen2.5 开始,Qwen 模型家族,包括多模态和专项模型,将使用统一的词汇表,其中包含了所有子系列的控制 token 。Qwen2.5 的词汇表中有 22 个控制 token,使得词汇表的总规模达到 15 1665 。"
#: ../../Qwen/source/getting_started/concepts.md:185
#: 701bd6f896634b0aaf2920d883268a16
msgid "1 general: `<|endoftext|>`"
msgstr "通用 token 1个:`<|endoftext|>`"
#: ../../Qwen/source/getting_started/concepts.md:186
#: 7e78239f93a245dbb046d4ae2afe8a72
msgid "2 for chat: `<|im_start|>` and `<|im_end|>`"
msgstr "对话 token 2个:`<|im_start|>` 和 `<|im_end|>`"
#: ../../Qwen/source/getting_started/concepts.md:187
#: eb686086dfe44d53a5cdfc98e9bbaad8
msgid "2 for tool use: `<tool_call>` and `</tool_call>`"
msgstr "工具调用 token 2个: `<tool_call>` 和 `</tool_call>`"
#: ../../Qwen/source/getting_started/concepts.md:188
#: c8259cada9e94790a759a4b1f8edaf2d
msgid "11 for vision"
msgstr "视觉相关 token 11个"
#: ../../Qwen/source/getting_started/concepts.md:189
#: 9b67870139b144c8ae4451e3deb1c1c5
msgid "6 for coding"
msgstr "代码相关 token 6个"
#: ../../Qwen/source/getting_started/concepts.md:191
#: 32c9581187f640d2a37cca85390bf1de
msgid "**Takeaway: Qwen uses ChatML with control tokens for chat template.**"
msgstr "**要点: Qwen 使用带有控制 token 的 ChatML 作为对话模板。**"
#: ../../Qwen/source/getting_started/concepts.md:195
#: 74d8b323a0864a9c94a78f154a5c86c0
msgid "Length Limit"
msgstr "长度限制"
#: ../../Qwen/source/getting_started/concepts.md:197
#: 2833c71b35d94ff0b6825f86bc9be098
msgid "As Qwen models are causal language models, in theory there is only one length limit of the entire sequence. However, since there is often packing in training and each sequence may contain multiple individual pieces of texts. **How long the model can generate or complete ultimately depends on the use case and in that case how long each document (for pre-training) or each turn (for post-training) is in training.**"
msgstr "由于 Qwen 模型是因果语言模型,理论上整个序列只有一个长度限制。然而,由于在训练中通常存在打包现象,每个序列可能包含多个独立的文本片段。**模型能够生成或完成的长度最终取决于具体的应用场景,以及在这种情况下,预训练时每份文档或后训练时每轮对话的长度。**"
#: ../../Qwen/source/getting_started/concepts.md:201
#: 1d25c6232d924639b313a1a66d1990c9
msgid "For Qwen2.5, the packed sequence length in training is 32,768 tokens.[^yarn] The maximum document length in pre-training is this length. The maximum message length for user and assistant is different in post-training. In general, the assistant message could be up to 8192 tokens."
msgstr "对于Qwen2.5,在训练中的打包序列长度为 3 2768 个 token [^yarn]。预训练中的最大文档长度即为此长度。而后训练中,user和assistant的最大消息长度则有所不同。一般情况下,assistant消息长度可达 8192 个 token。"
#: ../../Qwen/source/getting_started/concepts.md:209
#: f39c2748eccb486794c941d23b23835c
msgid "**Takeaway: Qwen2.5 models can process texts of 32K or 128K tokens and up to 8K tokens can be assistant output.**"
msgstr "**要点:Qwen2 模型可以处理 32K 或 128K token 长的文本,其中 8K 长度可作为输出。**"
#: ../../Qwen/source/getting_started/concepts.md:109
#: 7195ff6a5d1a4e6881f272081c9885d7
msgid "Previously, they are known as the chat models and with the \"-Chat\" suffix. Starting from Qwen2, the name is changed to follow the common practice. For Qwen, \"-Instruct\" and \"-Chat\" should be regarded as synonymous."
msgstr "此前,它们被称为对话模型,并带有\"-Chat\"后缀。从Qwen2开始,名称变更为遵循通用做法。对于Qwen,\"-Instruct\"和\"-Chat\"应被视为同义词。"
#: ../../Qwen/source/getting_started/concepts.md:121
#: f50caec63c8948a894dbf8c718f0b2d8
msgid "Control tokens can be called special tokens. However, the meaning of special tokens need to be interpreted based on the contexts: special tokens may contain extra regular tokens."
msgstr "控制 token 也可以称为“特殊 token”。但是,特殊 token 的意义需要根据上下文进行解释:特殊 token 也可能包含额外的常规 token。"
#: ../../Qwen/source/getting_started/concepts.md:193
#: fc70e6f93b71452ca0d09aa0ff28dd54
msgid "For historical reference only, ChatML is first described by the OpenAI Python SDK. The last available version is [this](https://github.com/openai/openai-python/blob/v0.28.1/chatml.md). Please also be aware that that document lists use cases intended for OpenAI models. For Qwen2.5 models, please only use as in our guide."
msgstr "仅供历史参考,ChatML最初由OpenAI的Python SDK描述。可获取的最新版本是[这个](https//github.com/openai/openai-python/blob/v0.28.1/chatml.md)。请注意,该文档列出的应用案例是为OpenAI模型设计的。对于Qwen2.5模型,请仅按照我们的指南使用。"
#: ../../Qwen/source/getting_started/concepts.md:206
#: a08b83b36c2d4e8d8f3dbb020ecb37a2
msgid "The sequence length can be extended to 131,072 tokens for Qwen2.5-7B, Qwen2.5-14B, Qwen2.5-32B, and Qwen2.5-72B models with YaRN. Please refer to the model card on how to enable YaRN in vLLM."
msgstr "使用YaRN,Qwen2.5-7B、Qwen2.5-14B、Qwen2.5-32B和Qwen2-72B模型的序列长度可以扩展到13 1072个token。请参考模型卡片了解如何在 vLLM 中启用 YaRN。"
#~ msgid "There is the proprietary version hosted exclusively at [Alibaba Cloud \\[zh\\]](https://help.aliyun.com/zh/model-studio/developer-reference/tongyi-qianwen-llm/) and the open-weight version."
#~ msgstr "通义千问分为[闭源](https://help.aliyun.com/zh/model-studio/developer-reference/tongyi-qianwen-llm/)和开源两大版本。"
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:2
#: 6d4d3bb3020f4e4d8dba0ca5778cdcae
msgid "Performance of Quantized Models"
msgstr "量化模型效果评估"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:5
#: 3a541cd8cba74edf9b06b46f59eaaf38
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:7
#: 3a95fc299de141dea4fc729ef907ce17
msgid "This section reports the generation performance of quantized models (including GPTQ and AWQ) of the Qwen2 series. Specifically, we report:"
msgstr "本部分介绍Qwen2量化模型(包括GPTQ与AWQ量化方案)的效果评估,有以下数据集"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:11
#: 9386a3b95eb340568185da78224a1ccd
msgid "MMLU (Accuracy)"
msgstr "MMLU (准确率)"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:12
#: 3cd93b881c90488895c61298104bc7fb
msgid "C-Eval (Accuracy)"
msgstr "C-Eval (准确率)"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:13
#: 7ac4bb515b0a49699d4eb95fc433bb51
msgid "IFEval (Strict Prompt-Level Accuracy)"
msgstr "IFEval (提示词级的严格准确率,Strict Prompt-Level Accuracy)"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:15
#: 08e3f35820344c93877618815650b866
msgid "We use greedy decoding in evaluating all models."
msgstr "所有模型均使用贪心解码。"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
#: 9aec40221219455d8fc4e473e5acf09c
msgid "Quantization"
msgstr "量化模型"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
#: 93f274f4751f445d85f04937b25c7f7d
msgid "Average"
msgstr "平均"
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
#: 776612f5dd4a40d98976bdfe4896508c
msgid "MMLU"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
#: f6e8014116cf4179a934d601ee61d04d
msgid "C-Eval"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
#: 0c40e96c4a3b4cdeaaf1a95ff1aa8f98
msgid "IFEval"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
#: 773ccb0f10bd4cf690e819af51c40e76
msgid "Qwen2-72B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
#: 71e180f75e624b738d56ec2a1fad253c 7ebe73a2e96445c4bb733845c3190240
#: bd5a3b8861d646fa9e8d8bc51bb1b80c cc79a78b34f94c18b7bdaf1bfcc8824d
msgid "BF16"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
#: 08517ffc3e6e4ceb812c3d8710307266 2e879d3d1fef4c878b097550d745e7ae
msgid "81.3"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
#: f795aa42cf7d42ccb5a573a5f44be79f
msgid "82.3"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
#: 01c54f3da3454e178a07a9f88ed5302b
msgid "83.8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
#: 7651df5ccaa14b11a3a89827a5265ae8
msgid "77.6"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
#: 04de04c9ff3640f096301e76fdd291de 301aa8e494ff4fe4aefcc8cfb7a4c065
#: d395be41cf144318a1faeccc6f6965c8 ec513d10a75d44b8bd134287a57b5cdd
msgid "GPTQ-Int8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
#: 411166db878d4d8f8515e9f5d78a651c
msgid "80.7"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
#: e63ce8a2f1cc4cec9b52521015e2aebe
msgid "83.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
#: e6be6c30e0d740d39c6c8807e2d4f5f8
msgid "77.5"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
#: 21720ff324814b2b865f37a40c3586b5 4644a49bcdfd457b84eb5b2771177d78
#: 560dcb4bfa6e45088faefdb504d629a5 7044a0d2dd6945138ea385287ab5bf33
msgid "GPTQ-Int4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
#: 1cb55cd40b3c484d8213c15375b2ad68
msgid "81.2"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
#: 32b889d9ef014f2ab6be6881e20d40ae
msgid "80.8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
#: ba86de9eb27b40e0ba6a57580aed89c3 eed2e99c0edc426e81ec24e961fe971e
msgid "83.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
#: ee3a3132082048d5b79721fa84f6f816
msgid "78.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
#: 632f832fc1f249fa92764538b698550d 8c7ccf4f75f44b27bb1b5aac544836cb
#: b473937c2be94c3490483bb5a820e2fe bc1abd77dd27412992d21bda1831a2a8
msgid "AWQ"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
#: 2711a3f907224e51ba30818b2e730a30
msgid "80.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
#: ca9624c0258b425ba53f024b086c173a
msgid "80.5"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
#: 2f4b57d4394c4cb187407145ce8d5f1e
msgid "76.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
#: 48cc75ed7bf04778b327c7b03d418e37
msgid "Qwen2-7B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
#: 75182905b74a41099ff859fb86752e99
msgid "66.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
#: 80cda712e9dc482fac24952d3bb27b28
msgid "70.5"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
#: 0701d66bc3084aef8937e4b687705f37
msgid "77.2"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
#: 8efb5c133644420c808dfd78f8fcde2f
msgid "53.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
#: 2076e02516bd4ff1856bc12a8d6bd320
msgid "66.2"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
#: 588f4ad13845491d9589ea094265d532
msgid "69.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
#: 0c79963a231a402eb6db1671e851be38
msgid "76.7"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
#: 5d525163672f456289990489459466ae
msgid "52.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
#: 9283ca6491194b59a5edf57228f9b5af a4123c0691a442f6850ae25615c108af
msgid "64.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
#: 9e7ffb49aac34129894b0582c0d8aba1
msgid "67.8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
#: 7c2fc310e5764b7fbf6034ffd3a5d26d
msgid "75.2"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
#: 33e6b6e590a64c08adccf0bb161c1046
msgid "49.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
#: b3cbe7665bdf4f4388f015fb6606540e
msgid "67.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
#: a47d3b52e80249f986c4339b9d3fff10
msgid "73.6"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
#: d76543cff2df434185fbe51712024679
msgid "51.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
#: cee2c965036d41c6a93ffbf9a9788e4b
msgid "Qwen2-1.5B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
#: 8c9d1cd8fb5a4d75b85d0edcb9ed69df
msgid "48.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
#: f5e05b0942a24e2b9cac753932ad51c4
msgid "52.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
#: c6f81ec529004598aa14c55228ff9538
msgid "63.8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
#: 5b2b4092d04f4d02a56bd0df5807e2c5
msgid "29.0"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
#: 08d2bf82e83f4a889d622c72c1e1b3b2
msgid "48.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
#: 3d8ea738153f467ba55d50e6bf0f84c0
msgid "53.0"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
#: 8755d6c4c1e64cd38122f08a92bd90ca
msgid "62.5"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
#: 1c403dbb3692472a88706cb4b4a1f0f3
msgid "28.8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
#: f3f43ea77edc4ff0969e2466e6fe13e1
msgid "45.0"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
#: 9d070c4b9f3e4fceb27b29ecdf90eb41
msgid "50.7"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
#: 24ff991704c440deb34b92512f89c371
msgid "57.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
#: b4645b7317a44cb795fc4190149dd0e0
msgid "27.0"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
#: eeee44d1d65647569999de94e72c00cb
msgid "46.5"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
#: 41630bee9142494c801083cd5d213dc0
msgid "51.6"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
#: 762395735fb34bccbc4d057968bbfbf1
msgid "58.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
#: f5915835bcb24051bebed452fc398728
msgid "29.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
#: 39108e2a66444ca780a720f115251308
msgid "Qwen2-0.5B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
#: 2795adace57c401cb8bacc00082dfd53 a59271d53e434d17a8a0a19529158f2c
msgid "34.4"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
#: c93982789e4e453eb5a02d64f02cb74f
msgid "37.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
#: 213dfd43b2254a2caec1d4b1d231ed55
msgid "45.2"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
#: 11de22e2a04a4c04b0b91d09d028b853
msgid "20.0"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
#: 84b6570bcc8d4c6598336d5bc9b9d36a
msgid "32.6"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
#: b79e88232d114f43a179dcc5b0477c97
msgid "35.6"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
#: 1166b675e1e64e18a82c3219f321e248
msgid "43.9"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
#: fdf340d39b074778b55d36f477f8dc0a
msgid "18.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
#: ed930e1b13dd4c5caf80b2a180a1bcc3
msgid "29.7"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
#: c3d5617389634f7e96c66b4f869379a9
msgid "33.0"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
#: 4573b471c48d4028ad6fb378e75f40aa
msgid "39.2"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
#: c867c42e916f493b9715b1adf656ddcb
msgid "16.8"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
#: 20d4c89c335648bb93f07ebfb8ce9fce
msgid "31.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
#: 25400aeaf79d49cb914ffa5ff26bfe03
msgid "42.1"
msgstr ""
#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
#: d15e246b65b0427d970b78deffd8c2bc
msgid "16.7"
msgstr ""
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/getting_started/quickstart.md:1
#: 595827c46f2e4884b69954cf22e0e957
msgid "Quickstart"
msgstr "快速开始"
#: ../../Qwen/source/getting_started/quickstart.md:3
#: 725288359306417a943352cef10f831c
msgid "This guide helps you quickly start using Qwen3. We provide examples of [Hugging Face Transformers](https://github.com/huggingface/transformers) as well as [ModelScope](https://github.com/modelscope/modelscope), and [vLLM](https://github.com/vllm-project/vllm) for deployment."
msgstr "本指南帮助您快速上手 Qwen3 的使用,并提供了如下示例: [Hugging Face Transformers](https://github.com/huggingface/transformers) 以及 [ModelScope](https://github.com/modelscope/modelscope) 和 [vLLM](https://github.com/vllm-project/vllm>) 在部署时的应用实例。"
#: ../../Qwen/source/getting_started/quickstart.md:6
#: 6bfc020002af4b4eaad8adf3902e30ac
msgid "You can find Qwen3 models in [the Qwen3 collection](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) at HuggingFace Hub and [the Qwen3 collection](https://www.modelscope.cn/collections/Qwen3-9743180bdc6b48) at ModelScope."
msgstr "你可以在 HuggingFace Hub 的 [Qwen3 collection](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) 或 ModelScope 的 [Qwen3 collection](https://www.modelscope.cn/collections/Qwen3-9743180bdc6b48) 中寻找 Qwen3 模型。"
#: ../../Qwen/source/getting_started/quickstart.md:8
#: 1dbf0833f8a5407b8d00056d029eb9d8
msgid "Transformers"
msgstr "Transformers"
#: ../../Qwen/source/getting_started/quickstart.md:10
#: cbe2f022b0b54729a1d3627cb19ad99f
msgid "To get a quick start with Qwen3, you can try the inference with `transformers` first. Make sure that you have installed `transformers>=4.51.0`. We advise you to use Python 3.10 or higher, and PyTorch 2.6 or higher."
msgstr "要快速上手 Qwen3 ,我们建议您首先尝试使用 `transformers` 进行推理。请确保已安装了 `transformers>=4.51.0` 版本。我们建议您使用 Python 3.10 或以上版本, PyTorch 2.6 或以上版本。"
#: ../../Qwen/source/getting_started/quickstart.md:14
#: bd305d4f44484f75bfe0c02a9eda68c4
msgid "The following is a very simple code snippet showing how to run Qwen3-8B:"
msgstr "以下是一个非常简单的代码片段示例,展示如何运行 Qwen3 模型:"
#: ../../Qwen/source/getting_started/quickstart.md:63
#: 0bb48ceb71854514be78497721308702
msgid "Qwen3 will think before respond, similar to QwQ models. This means the model will use its reasoning abilities to enhance the quality of generated responses. The model will first generate thinking content wrapped in a `<think>...</think>` block, followed by the final response."
msgstr "Qwen3 将在实际回复前思考,与 QwQ 模型类似。这意味着模型将运用其推理能力来提升生成回复的质量。模型会首先生成包含在 `<think>...</think>` 块中的思考内容,随后给出最终回复。"
#: ../../Qwen/source/getting_started/quickstart.md:67
#: d110ccfe1d834169992f03bcf932e250
msgid "Hard Switch: To strictly disable the model's thinking behavior, aligning its functionality with the previous Qwen2.5-Instruct models, you can set `enable_thinking=False` when formatting the text."
msgstr "硬开关:为了严格禁用模型的思考行为,使其功能与之前的Qwen2.5-Instruct模型保持一致,您可以在格式化文本时设置`enable_thinking=False`。"
#: ../../Qwen/source/getting_started/quickstart.md:77
#: 4bceeb7e0179470f88620507ade7915b
msgid "It can be particularly useful in scenarios where disabling thinking is essential for enhancing efficiency."
msgstr "在某些需要通过禁用思考来提升效率的场景中,这一功能尤其有用。"
#: ../../Qwen/source/getting_started/quickstart.md:79
#: 16b4b43b7a7b43a698118e17d778a6dd
msgid "Soft Switch: Qwen3 also understands the user's instruction on its thinking behaviour, in particular, the soft switch `/think` and `/no_think`. You can add them to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations."
msgstr "软开关:Qwen3 还能够理解用户对其思考行为的指令,特别是软开关 `/think` 和 `/no_think`。您可以将这些指令添加到用户 (user) 或系统 (system) 消息中,以在对话轮次之间灵活切换模型的思考模式。在多轮对话中,模型将遵循最近的指令。"
#: ../../Qwen/source/getting_started/quickstart.md:85
#: 518d0395430f4920973e6da2753c1507
msgid "For thinking mode, use Temperature=0.6, TopP=0.95, TopK=20, and MinP=0 (the default setting in `generation_config.json`). DO NOT use greedy decoding, as it can lead to performance degradation and endless repetitions. For more detailed guidance, please refer to the Best Practices section."
msgstr "对于思考模式,使用 Temperature=0.6,TopP=0.95,TopK=20,以及 MinP=0(`generation_config.json` 中的默认设置)。不要使用贪婪解码,因为它可能导致性能下降和无尽的重复。更多详细指导,请参阅最佳实践部分。"
#: ../../Qwen/source/getting_started/quickstart.md:89
#: 80bf598dfdf048a791d05c6a21ccd425
msgid "For non-thinking mode, we suggest using Temperature=0.7, TopP=0.8, TopK=20, and MinP=0."
msgstr "对于非思考模式,我们建议使用 Temperature=0.7,TopP=0.8,TopK=20,以及 MinP=0。"
#: ../../Qwen/source/getting_started/quickstart.md:93
#: 7a585706796a4db9a9f34ec1241135b5
msgid "ModelScope"
msgstr "魔搭 (ModelScope)"
#: ../../Qwen/source/getting_started/quickstart.md:95
#: fbf6acee0f534a3d9197221626ce79e4
msgid "To tackle with downloading issues, we advise you to try [ModelScope](https://github.com/modelscope/modelscope). Before starting, you need to install `modelscope` with `pip`."
msgstr "为了解决下载问题,我们建议您尝试从 [ModelScope](https://github.com/modelscope/modelscope) 进行下载。开始之前,需要使用 `pip` 安装 `modelscope` 。"
#: ../../Qwen/source/getting_started/quickstart.md:98
#: e29964895f744793a18058022ad578b8
msgid "`modelscope` adopts a programmatic interface similar (but not identical) to `transformers`. For basic usage, you can simply change the first line of code above to the following:"
msgstr "`modelscope` 采用了与 `transformers` 类似(但不完全一致)的编程接口。对于基础使用,仅需将上面代码第一行做如下修改:"
#: ../../Qwen/source/getting_started/quickstart.md:105
#: 2686cab2a6f54fe7ae813a0aeeb04d14
msgid "For more information, please refer to [the documentation of `modelscope`](https://www.modelscope.cn/docs)."
msgstr "欲获取更多信息,请参考 [`modelscope` 文档](https://www.modelscope.cn/docs)。"
#: ../../Qwen/source/getting_started/quickstart.md:107
#: ce23fee238f8458599cc4d7e16a2e509
msgid "vLLM"
msgstr ""
#: ../../Qwen/source/getting_started/quickstart.md:109
#: cf0e10035e954a328775205ff39e9687
msgid "To deploy Qwen3, we advise you to use vLLM. vLLM is a fast and easy-to-use framework for LLM inference and serving. In the following, we demonstrate how to build a OpenAI-API compatible API service with vLLM."
msgstr "要部署 Qwen3 ,我们建议您使用 vLLM 。 vLLM 是一个用于 LLM 推理和服务的快速且易于使用的框架。以下,我们将展示如何使用 vLLM 构建一个与 OpenAI 兼容的 API 服务。"
#: ../../Qwen/source/getting_started/quickstart.md:113
#: 925651cdb57d478884f151b52834ab3c
msgid "First, make sure you have installed `vllm>=0.8.5`."
msgstr "首先,确保你已经安装 `vLLM>=0.8.5` :"
#: ../../Qwen/source/getting_started/quickstart.md:115
#: 4cb0c9b830984fafa3f5ee2e74dea6dc
msgid "Run the following code to build up a vLLM service. Here we take Qwen3-8B as an example:"
msgstr "运行以下代码以构建 vLLM 服务。此处我们以 Qwen3-8B 为例:"
#: ../../Qwen/source/getting_started/quickstart.md:122
#: c7b58160d10d43a2bb6e63572dbeff46
msgid "Then, you can use the [create chat interface](https://platform.openai.com/docs/api-reference/chat/completions/create) to communicate with Qwen:"
msgstr "然后,可以使用 [\"create chat\" interface](https://platform.openai.com/docs/api-reference/chat/completions/create>) 来与 Qwen 进行交流:"
#: ../../Qwen/source/getting_started/quickstart.md
#: 8f4c1e3692a34137ad9fbf6d7a50969c c685b92ca0ea49c0b3925b24cd43317c
msgid "curl"
msgstr ""
#: ../../Qwen/source/getting_started/quickstart.md
#: 147be07b6f3141c08f8c707a9f06403c ffc3d81775264a00ad0d7bcb85ff6caf
msgid "Python"
msgstr ""
#: ../../Qwen/source/getting_started/quickstart.md:142
#: ../../Qwen/source/getting_started/quickstart.md:192
#: 9a1026d8cf10458b8a3e717e105e8d5e ed7621681c36472a90b4be9c1fe98355
msgid "You can use the API client with the `openai` Python SDK as shown below:"
msgstr "您可以按照下面所示的方式,使用 `openai` Python SDK中的客户端:"
#: ../../Qwen/source/getting_started/quickstart.md:169
#: a5ae1f193b044cb982e5ea4d98b30afb
msgid "While the soft switch is always available, the hard switch is also availabe in vLLM through the following configuration to the API call. To disable thinking, use"
msgstr "虽然软开关始终可用,但硬开关也可以通过以下 API 调用配置在 vLLM 中使用。要禁用思考,请使用"
#: ../../Qwen/source/getting_started/quickstart.md:221
#: a200dc6f700d40f89e22d7745a5f01f0
msgid "Next Step"
msgstr "下一步"
#: ../../Qwen/source/getting_started/quickstart.md:223
#: e22d4b679b36490fb4877ae01bfb515a
msgid "Now, you can have fun with Qwen3 models. Would love to know more about its usage? Feel free to check other documents in this documentation."
msgstr "现在,您可以尽情探索 Qwen3 模型的各种用途。若想了解更多,请随时查阅本文档中的其他内容。"
#~ msgid "Hugging Face Transformers & ModelScope"
#~ msgstr ""
#~ msgid "Install with `pip`:"
#~ msgstr "使用 `pip` 安装:"
#~ msgid "Install with `conda`:"
#~ msgstr "使用 `conda` 安装:"
#~ msgid "Install from source:"
#~ msgstr "从源代码安装:"
#~ msgid "As you can see, it's just standard usage for casual LMs in `transformers`!"
#~ msgstr "如您所见,与 `transformers` 的常规使用方式无二!"
#~ msgid "Streaming Generation"
#~ msgstr "流式生成"
#~ msgid "Streaming mode for model chat is simple with the help of `TextStreamer`. Below we show you an example of how to use it:"
#~ msgstr "借助 `TextStreamer` , 模型生成的流式模式变得非常简单。下面我们将展示一个如何使用它的示例:"
#~ msgid "It will print the text to the console or the terminal as being generated."
#~ msgstr "命令行或终端中将屏显生成的文本。"
#~ msgid "vLLM for Deployment"
#~ msgstr "使用vLLM部署"
#~ msgid "with `vllm>=0.5.3`, you can also use"
#~ msgstr "如 `vllm>=0.5.3` ,也可以如下启动:"
#~ msgid "For more information, please refer to [the documentation of `vllm`](https://docs.vllm.ai/en/stable/)."
#~ msgstr "欲获取更多信息,请参考 [`vllm` 文档](https://docs.vllm.ai/en/stable/)。"
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:2
#: 15e2970b0a874ba0a811c88dcc9afa56
msgid "Speed Benchmark"
msgstr "效率评估"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:5
#: b943913568f3486a8d5f25c46677c6cc
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:7
#: ea8ddfed427143a3a9931495ff77d2d2
msgid "This section reports the speed performance of bf16 models, quantized models (including GPTQ-Int4, GPTQ-Int8 and AWQ) of the Qwen2.5 series. Specifically, we report the inference speed (tokens/s) as well as memory footprint (GB) under the conditions of different context lengths."
msgstr "本部分介绍Qwen2.5系列模型(原始模型和量化模型)的效率测试结果,包括推理速度(tokens/s)与不同上下文长度时的显存占用(GB)。"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:12
#: 321220545e38471981b3b3ee9294ac31
msgid "The environment of the evaluation with huggingface transformers is:"
msgstr "测试HuggingFace ``transformers`` 时的环境配置:"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:14
#: ../../Qwen/source/getting_started/speed_benchmark.rst:25
#: 74436af12fad4f0e8d5b028f6f4d83ec 82d2070795c04fc1b3f2d576ae815ad5
msgid "NVIDIA A100 80GB"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:15
#: ../../Qwen/source/getting_started/speed_benchmark.rst:26
#: 265372c1791d432fb9433f2c301e0533 adb249c4691b48f29caacdb412aef0ee
msgid "CUDA 12.1"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:16
#: d478f901ae9d449a95f80306ec738ad5
msgid "Pytorch 2.3.1"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:17
#: 4e0f4dc2cf97448eafa173e4cabe9fff
msgid "Flash Attention 2.5.8"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:18
#: ../../Qwen/source/getting_started/speed_benchmark.rst:30
#: 21f8a404846140bd9ac89a0d8c10d256 6f31d384305e47a684e220fdc87e6438
msgid "Transformers 4.46.0"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:19
#: f06fc8c23d1045c0ba7b6700c88eb8ad
msgid "AutoGPTQ 0.7.1+cu121 (Compiled from source code)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:20
#: e3d2039804344284ac6a31d5533f9c89
msgid "AutoAWQ 0.2.6"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:23
#: 5a013085785944d9bfc35ed44f25708b
msgid "The environment of the evaluation with vLLM is:"
msgstr "测试vLLM时的环境配置:"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:27
#: 2c8aa18142514f73b6a4fc631a17355a
msgid "vLLM 0.6.3"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:28
#: e777e67e77ec4694a354e83f8dcfcd01
msgid "Pytorch 2.4.0"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:29
#: f1004474078144c8ba0c8f43ca492b55
msgid "Flash Attention 2.6.3"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:33
#: c723126f0a2047f78eb8679d81e37a38
msgid "Notes:"
msgstr "注意:"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:35
#: de02ff9670834fa185edb0d4b603c53e
msgid "We use the batch size of 1 and the least number of GPUs as possible for the evaluation."
msgstr "batch size 设置为1,使用 GPU 数量尽可能少"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:37
#: 1bb988262ab04123ab4ec57095bfb994
msgid "We test the speed and memory of generating 2048 tokens with the input lengths of 1, 6144, 14336, 30720, 63488, and 129024 tokens."
msgstr "我们测试生成2048 tokens时的速度与显存占用,输入长度分别为1、6144、14336、30720、63488、129024 tokens。(超过32K长度仅有 Qwen2-72B-Instuct 与 Qwen2-7B-Instuct 支持)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:40
#: 0e38607f735a4aee942ed6629c19aeb3
msgid "For vLLM, the memory usage is not reported because it pre-allocates all GPU memory. We use ``gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False`` by default."
msgstr "对于vLLM,由于GPU显存预分配,实际显存使用难以评估。默认情况下,统一设定为``gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False``。"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:46
#: 2740febb5a214cc8807b00b8597e4e57
msgid "0.5B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
#: 0d3800fd9320472ead21e1b6ce766960 0daee88c4fcc40d1bf85d9ca7d2d1ed3
#: 2658cecd188b4aa089bf62746b8bc7c9 2778cc188db8410bbcf0da565d9dafa2
#: 31d16a9ce3c04e6e8080a94357eb8e40 4a342b7b7bea429e9f280897ae15f149
#: 556a5c2ccac84fdfbcdf05130669e0f0 695dbc29c02b42f589dfdc08985f0092
#: 7b44531701144fab88e6accd4037e5f7 8e7437142a1c49afb8421cdc0e533e5c
#: 9b082527047b459398c0a4825784cb59 a3fc513b2e8e43e69817f52b060766da
#: b548bbd636054abca043f3fe60129a5e b96a0a6d3a6047d58321cecf1adead4e
msgid "Model"
msgstr "模型"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
#: 18467ba010774755a3aeb9acd845d0d0 1d0852cb78a648db8b7b64a2505145fe
#: 2e233b37910f4085b827380c3a5cc95a 37f34d499d904fc9b06b61b36328956a
#: 4077b79878b54ac187ad3e5906b44513 434fd471714f4f3bbff4ebce1ed02260
#: 58d16b17c4f4439db5a55f63d0550c6a 60a1b825536447c4bafd641a87db51f2
#: 7c5e1c1bc8144e4da090a64fe92919c6 a2a14e09c63249bb933cad3087eacef8
#: bd6c5dcfc9d4479385247f5caf4bd963 da2e631809d04389970e4172167401f2
#: ddf1a9637c084006bc418b13e2061c01 f00ee91c225344b0ae4c7a78128dd4e7
msgid "Input Length"
msgstr "输入长度"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
#: 1657c1e521524c96ab3e321b914de410 16f663d9ba574eabacc50755122d4d51
#: 1894375562ee4ba693ca418c09c4ed1d 25bf8d0629804415bc1119d3d04af19d
#: 2924665a3f7a4e9bbdc99cf807489336 312c120349184b12bac02c340f8671b2
#: 6e4dfbe22352464bad7903057542e36c 75347400d0864b0e87f3f7ac34165e40
#: 9a26311405cc4e51a07afba1ff187c22 b3f824bbc86d4ec18e090e9e87b5f0b6
#: ca7a6f03136048f49962c7163ad52963 d0bac2de0ad4486a808091fa444de8d3
#: eb93a6b940234777b282f5b781fe76d4 ec72d2950327409dadfc2772b21bb663
msgid "Quantization"
msgstr "量化"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
#: 0a9c10c1df394368a9bf7f16e2d133cc 20f68eddfd2d4b2e8bac11698949992f
#: 3bdff2d2f7084c5dbd81c9e51c4e8cb6 562c19a04e7c4cfdab7ec3f259d8e351
#: 5c2a0a292a3b45c08bd24b2002501e91 7b7452b5f7c94063b52f0d234e71fd33
#: 913b0e8e8dc14ee68e385edd7a9bd8fd 99d423566ee24d029e5645e04bb84ddd
#: a148f164b00c480fb8cf0c0589a03888 a8ad3a93dda04af3a9cadc2796a694b7
#: c780cefda234439b86c036f72eab0a72 d8579ec8e3fc40709fd436339b3bba6f
#: e08a842aebf247719fcadf9ccb4899a4 f1477a135092411c8108d3818d49000c
msgid "GPU Num"
msgstr "GPU数量"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
#: 132301b4648a4a78994ffd0047502ffb 1ed10eae6dc24e4085404998cc4f924f
#: 23275c0026d049abaa05d76c9f9a0cdc 2b680afbd0dc49ac952913245410f952
#: 2e5293dcc25c47fb9df4f20580151f21 2fea2c25aa474023a6af451e932fd123
#: 4419eaba8b914d44a1edac7c9f8e18bf 5459abd810f0461da17f1a44f997ca22
#: 74190c0c15bc4d31a6a55864f01d2904 95814a65c98f42649fc0278c0e16a029
#: a54fbc300f1c4fa3b04429f4d36b4dd2 e0e571bc5f6a409c9524e74d20c1ba59
#: e3b3fb7f2078496c87be43456421c2e4 e7a4f3b9a2ee46a689456214c16ee1f0
msgid "Speed(tokens/s)"
msgstr "速度 (tokens/s)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: 0d09a9f5937c4df1b3833adbd25aa608 1a2560a4c8814cb28284389b859caf63
#: 69cc4d5ee89d449482cfd4ad9023324a 7d8bf237f85949119b1809edfb931cc5
#: abcf9c8943774551b67e33b9763abd22 dda237af9b8d4539803f71fab7823175
#: de48bc9a7ab344ac873dba2b0a50d20e
msgid "GPU Memory(GB)"
msgstr "显存占用 (GB)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
#: 1dce6ec0ef864e96b542524a06b05a4e 2691beef5b474c5f8da8504ab2b5274e
#: 3528cc8b5a304fcf98961542cc3062e8 64bad9a37cc74119947998193ef537a1
#: 7273dd27f9244b958575b922bab5278a 771fe7364b0c44428775c91cf84ac836
#: 7ecffcfa9ee74b6d9e85552f5a505e9c 97cd9cfb76cb41b793fceb15ecc9cd6d
#: a1e3724821b748528f8ae590d3f370cf ae21ba10c9284c1098768ef5fa36a459
#: cda4575682284b9f8194769030d695d1
msgid "Note"
msgstr "注意:"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
#: c71daeff1f804dca9c60f4831582b2c7 db7f5f91dd7f4e0ab83e20a9b598d6f5
msgid "Qwen2.5-0.5B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
#: ../../Qwen/source/getting_started/speed_benchmark.rst:92
#: ../../Qwen/source/getting_started/speed_benchmark.rst:94
#: ../../Qwen/source/getting_started/speed_benchmark.rst:96
#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
#: ../../Qwen/source/getting_started/speed_benchmark.rst:100
#: ../../Qwen/source/getting_started/speed_benchmark.rst:102
#: ../../Qwen/source/getting_started/speed_benchmark.rst:104
#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
#: ../../Qwen/source/getting_started/speed_benchmark.rst:108
#: ../../Qwen/source/getting_started/speed_benchmark.rst:110
#: ../../Qwen/source/getting_started/speed_benchmark.rst:112
#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
#: ../../Qwen/source/getting_started/speed_benchmark.rst:116
#: ../../Qwen/source/getting_started/speed_benchmark.rst:118
#: ../../Qwen/source/getting_started/speed_benchmark.rst:120
#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
#: ../../Qwen/source/getting_started/speed_benchmark.rst:171
#: ../../Qwen/source/getting_started/speed_benchmark.rst:173
#: ../../Qwen/source/getting_started/speed_benchmark.rst:175
#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
#: ../../Qwen/source/getting_started/speed_benchmark.rst:179
#: ../../Qwen/source/getting_started/speed_benchmark.rst:181
#: ../../Qwen/source/getting_started/speed_benchmark.rst:183
#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
#: ../../Qwen/source/getting_started/speed_benchmark.rst:187
#: ../../Qwen/source/getting_started/speed_benchmark.rst:189
#: ../../Qwen/source/getting_started/speed_benchmark.rst:191
#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
#: ../../Qwen/source/getting_started/speed_benchmark.rst:195
#: ../../Qwen/source/getting_started/speed_benchmark.rst:197
#: ../../Qwen/source/getting_started/speed_benchmark.rst:199
#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
#: ../../Qwen/source/getting_started/speed_benchmark.rst:250
#: ../../Qwen/source/getting_started/speed_benchmark.rst:252
#: ../../Qwen/source/getting_started/speed_benchmark.rst:254
#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
#: ../../Qwen/source/getting_started/speed_benchmark.rst:258
#: ../../Qwen/source/getting_started/speed_benchmark.rst:260
#: ../../Qwen/source/getting_started/speed_benchmark.rst:262
#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
#: ../../Qwen/source/getting_started/speed_benchmark.rst:266
#: ../../Qwen/source/getting_started/speed_benchmark.rst:268
#: ../../Qwen/source/getting_started/speed_benchmark.rst:270
#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
#: ../../Qwen/source/getting_started/speed_benchmark.rst:274
#: ../../Qwen/source/getting_started/speed_benchmark.rst:276
#: ../../Qwen/source/getting_started/speed_benchmark.rst:278
#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
#: ../../Qwen/source/getting_started/speed_benchmark.rst:330
#: ../../Qwen/source/getting_started/speed_benchmark.rst:332
#: ../../Qwen/source/getting_started/speed_benchmark.rst:334
#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
#: ../../Qwen/source/getting_started/speed_benchmark.rst:338
#: ../../Qwen/source/getting_started/speed_benchmark.rst:340
#: ../../Qwen/source/getting_started/speed_benchmark.rst:342
#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
#: ../../Qwen/source/getting_started/speed_benchmark.rst:346
#: ../../Qwen/source/getting_started/speed_benchmark.rst:348
#: ../../Qwen/source/getting_started/speed_benchmark.rst:350
#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
#: ../../Qwen/source/getting_started/speed_benchmark.rst:354
#: ../../Qwen/source/getting_started/speed_benchmark.rst:356
#: ../../Qwen/source/getting_started/speed_benchmark.rst:358
#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
#: ../../Qwen/source/getting_started/speed_benchmark.rst:426
#: ../../Qwen/source/getting_started/speed_benchmark.rst:428
#: ../../Qwen/source/getting_started/speed_benchmark.rst:430
#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
#: ../../Qwen/source/getting_started/speed_benchmark.rst:434
#: ../../Qwen/source/getting_started/speed_benchmark.rst:436
#: ../../Qwen/source/getting_started/speed_benchmark.rst:438
#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
#: ../../Qwen/source/getting_started/speed_benchmark.rst:442
#: ../../Qwen/source/getting_started/speed_benchmark.rst:444
#: ../../Qwen/source/getting_started/speed_benchmark.rst:446
#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
#: ../../Qwen/source/getting_started/speed_benchmark.rst:450
#: ../../Qwen/source/getting_started/speed_benchmark.rst:452
#: ../../Qwen/source/getting_started/speed_benchmark.rst:454
#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
#: ../../Qwen/source/getting_started/speed_benchmark.rst:527
#: ../../Qwen/source/getting_started/speed_benchmark.rst:529
#: ../../Qwen/source/getting_started/speed_benchmark.rst:531
#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
#: ../../Qwen/source/getting_started/speed_benchmark.rst:535
#: ../../Qwen/source/getting_started/speed_benchmark.rst:537
#: ../../Qwen/source/getting_started/speed_benchmark.rst:539
#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
#: ../../Qwen/source/getting_started/speed_benchmark.rst:543
#: ../../Qwen/source/getting_started/speed_benchmark.rst:545
#: ../../Qwen/source/getting_started/speed_benchmark.rst:547
#: ../../Qwen/source/getting_started/speed_benchmark.rst:551
#: ../../Qwen/source/getting_started/speed_benchmark.rst:553
#: ../../Qwen/source/getting_started/speed_benchmark.rst:555
#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
#: 004050b870c44233902f4a931ba6b7ca 0087e64dbea04c55b34161fea6d96609
#: 0190e138080b4fb593089fe84be67f49 0399ee86047345b1acffeb01e5bf765d
#: 069f3cf9c545472c8eb97c91487a8b1d 0766ab6e940847488c5d140726dee08a
#: 0793c4db2ef74086bd006023aa3a0b11 08a9bec8b7104ecf9c915fa1ac32699e
#: 08f3f6b804764c4ab340a8a2bcd22405 09069c21c8104509a81351b3bf0bf79f
#: 095d6370b91d4003840094c2a73fdec1 0c5ac64b8e5f44a696e5b87f8d2f63b3
#: 0d1f4a4beb1a485794685d63452fcb83 0dbdbb6a1aef42eb818f6d5b069c39b3
#: 0f5c4d9fd646426681623dd784a14624 108f6e63c7034b1ea3bdaf04973a23ee
#: 1192c4ea981d4bd0b7975cb9e13eb790 11e19467a2a94ac180b991348fcaf680
#: 125aa1af6f2a446b853e56aa535e1ec4 146303d399174036b90f5579a73d223a
#: 1555d2ede84d464bbb4f4a7c7ed533a5 16211e628dbe49cb95c118f0ec0d16aa
#: 18b3cada791c42ffba8fab6e1ef24d04 1b4170ac681943348d70fb6a254d318f
#: 1bb2f628b8da44c495d60d96e6ffcd25 1c78a9eea00345aa91b222192c61b57f
#: 1d4652545a5941e789f1314ef8932a2e 1db10d28212d49b5ab99d2999bcd9c89
#: 1e86c36de75d48f2bf7fe17ccf141a8d 1f751e3b878c464cb616bba7c32efb3b
#: 1f9deac7fce24242a2b1fad24a7ad0fe 21b59eeabe4e41cc8933470471b18124
#: 22a38b84ca0446e6b586954179796d9c 22bae4394baf455ea14081d604ba3502
#: 239dcc4750b34cafb6bdb9db3d4db7ae 244ac6fa7d83498fb0a4b1f8c37243e1
#: 25ba29127e2d442e87a39161a83aaa67 265a73b6f2a04080ab8e4429a0135819
#: 26c1bee487744245942604a2102fdf36 272d51198eaa4e029c07a8f8264f79b4
#: 276dd61cc9ce4ed8819f1d4c4b276e25 2aebfb452340469b96f6cb0f4e4be102
#: 2bd4124ced3a491ab7e992093bfe733f 2c16e57e4e624ba5a3c689db835f21ed
#: 2c19ceacdfc14e80ac6f89a282317cbb 2f1293191a0147579e4a7a61d2f3ccbf
#: 31c609844402498bba0950caa3147513 326a335f9fd1408496ab99278cd82f79
#: 33ec2936cd324655b0df255576283d18 33fc668521af49ad91463d06c06a51d4
#: 34c3102a5cb64b10ae415cbe0a0c6c71 36271bdd7a824fa08f23bf2b5e6e9c35
#: 3668da3bbfda4645bfdb01d2616584dc 36cc902c8bb84c958613b7777ec01741
#: 37f5f1f0d3364b0a8e78088a651bae12 399d4ec6796f4de69b113db548917d0b
#: 3a7d0d89a3e641f99d5e4e01bbbd053a 3c1da93dabba4aeaa3f7f6919dc5a416
#: 3f6bb0dac1234df4824123b6980971df 4014ce97bfc847548fd838ac56805bb1
#: 40280d577d364bb5897b1cce94948a04 407a049c84e24991bbf4539f7f0e98f8
#: 4166e398923f4f7f9f2a527db209bc34 43484a20a1e24eba92bdc52e2c4b97be
#: 459842cae7384ae29bac98ce8af43465 468f21ba39d0464d9034f64a33bce568
#: 4781f9780bcc48a59b8fabc951ef8669 47f16127806340b1b5e378d2d2c815ce
#: 49647d7237b6469c91b988807d57ddaa 49de3f271c6441af932ce865eba21591
#: 49e258e9a7a24e42bd7fac56e9b97825 4b3cb38d23e04761818a5d52b0f115f8
#: 4b6a1eb1888a462697c96dda4dc07f01 4c3f03d181bd454f81b739dab9635887
#: 4f3e8749ec11409fbdb0adeca0a5b9e6 50036636f0b74b3bb9722f70dd16e660
#: 50dff419f5124e9fb754673bbacfd647 545edad7273242a8abda88153bb56ff5
#: 55829e68ece043dc8c44477b676dd8df 570cd5b5da634719aebe39d28dd52d25
#: 57391e9f0aa545d98a1efc6a97c35362 5a21a7d629054577b9348fd56ff60732
#: 5c442dfb0f444ee19183ef58a5e037f4 5d7f40999b6b46be8f37868a70bc4b05
#: 5e064d56238544d495717e79ebe3fbde 5f3960ee8dd3435f88553f581cb2e7fc
#: 6036b79650b04956afa4f4f24c2e0705 608d7f7675a047daae27408c22edbf6f
#: 6145ea1263c040b3ad96a7291d0a1da1 621d77aa80fc4ccc8fd9a77760d22939
#: 626c7ec14bd84b8db6dd86aec4faab9b 62a40b213f514aab999e9cb7633fecc8
#: 62a8fc78ee7144c1a2ca545fb657dd00 668236b83e224be3be19b6e1542834b3
#: 66e7065d8bff4cadba93d7e49e28f5b4 671880fd119e4cf58929436e77679523
#: 69eed85322114d9ea859383fc4831c75 6a5799c6df2d46139a6477d8d72a12a5
#: 6aabf57be3a842c986358fb67202c11c 6cd7c973a8974f2096ad6aa2a6132cab
#: 6d782c8ee93d4e1cbb774d25e5f756b4 70e42e8f4d644a9a83f289143306e97c
#: 712b9a5dfd664565b68f36d1b33b6fcd 71d9e9cd810e445fb0626d82a766cbdb
#: 739a24ebc4ab44cea8f4953f3fd9b45e 767f36b1085a49d1a8b98e4a1031d525
#: 7696d65acdc64f62a5158e7c48627529 7705e6acf11147739a33384048dc7600
#: 771e205929d8414898f7527d5c595c79 77485309a5bc416491c5f99cba091411
#: 785876e4d7c84b05a0e0f8e81a6e2b01 78cf7b36819345d58a4ccb0074f846c9
#: 78e784e57675458181b1f991a8a43653 79e4235309c84f8098b0f787b5d39c2b
#: 7a49aa261fc1409bbc2d656818ab0ce9 7bbe1d4792674f688756f650dd90531a
#: 7c754b2ba5d94bc8a5ea81ae146cf190 7ccabf54eb194ff9a1b9c9ec6f1a19ea
#: 7dbdb15bff4745f5ae78ea042ae5831a 7f5f4d11aaeb499e91fc1848330838d7
#: 813af099a49747daa0ea5fed33dd8c5f 84c49226f1c24fbc9dafbf0c3507f163
#: 852514144c594b43b69ec7fdba48d51d 85d73f336c3e41d6a8706576d392abc1
#: 863c357179ae46a493314006d710624e 87cb21eb258f4f9aa1495845aa7c0d6d
#: 8b54f7f45317454c963970d180c52a28 8de5f37385bf482887ce9ef9e766f73b
#: 8f023d6f49c34771bc8491e7c01e204f 8f11f3c03f2c43f2b0f29cd54558b311
#: 8f9cb9f8dc7a4f26994ad9f091d35081 90234aedb000409d856f4af8e70da1d9
#: 902ba5445dba4834a5283393790f74fb 90a1e25fcf94459fa8f43c23c2df6bd0
#: 910e6a083c3a465198b0d1a2b3254362 916014a242664103a85d36aa6ae59e04
#: 92a82e951f784ee7bdfac0bb969e0870 94abb2956e9c4299803eece1ac3db55d
#: 97bedf7333234cb7ae9bd9300f1145a4 97e6a617bdd345a2a84dc6908058e48d
#: 98c8ea14ec4342f6ad48a6d44b5c947c 9c32b5a291a842d8b9364e5ce4b009f0
#: 9c33b7bf4ea74503aed1f06c4be2c0b0 9d98c68ed8004626b88e434c3c3b6058
#: 9e57190fe54e45ee9bd1889e8934916a 9e9788a59be14895ad4a291cc5f18780
#: 9efad049e9874db0b70acdf19abaa9ea a1b4961480ac48bda4ef9aa6bdda2b53
#: a20a15510d6346558ac724878e74c0ab a211e50976994fd7a59c0c5cc018ce6e
#: a2bfad23e6c3410db342ccd707c56dd5 a38d38efd2af4fd9877ca7b674a213c1
#: a5cc981777cb474fbf3e1d5683e55a77 a7877201b1894fcd8befe2f68e9ba1f0
#: aa15f60707484d998b941223889bb9f1 abf8ea3949f547df9c0f5111e68126c4
#: acadfaf66d974c39a36694be6a1cdbaf acf71f87b4c3425c93607035b45f5cda
#: ad81e1ab1288459eb50028ff4a0fcbb2 ae97483a84f545ab82e4dc06050b9565
#: afe9f511dae942a5b56a2a905db5dcf4 b07e439111cb4ba1af2086e1630476d3
#: b120a81c778a44c69a50d2735cb982d0 b1b00028ed2047d9933d1676e77d0528
#: b713b8870d5f40179b8b426de714f7fa b788c9ec22fc464f951f895f4a492297
#: b853d64c89f2464db907043d7058c100 ba57f3e800bd4318baaeeea1b9b25437
#: bbcf688e47e54345bf6c9c9627a6dc08 bbd24563c0454119be9e045cab784fa7
#: bc4120e45b1b45a68022476f0cd2ef55 bc9596de311243d389fb89e169594fe8
#: c07fad76e9cc4235aa5959331f3971aa c22cfc6129104afb8ac4443ce2466384
#: c2a5d013f4c54105a2378913d0a0b158 c312db100211463988249a33d602fc54
#: c400676e810f4f29a954b4a4828098ca c4d6c1531dd142e897ccfd6b04dfd198
#: c50100eb05d74b86b693426e5b4a7484 c6d0b4d609d74d79b82eea37839516a2
#: c6de0b1b012a434893ff3c35a2a3de48 c70df173fc574fb48976690b442d0e69
#: ca1a5ded7d02454686e0f4abdbf46d4f caa5e2cfaaac4ccd8a9eb6b3d37081e7
#: cab1460f044e4df9a64e0807ed27de40 cb01e6cbbe7247ed8aa4bd0a1cf4fcf1
#: cb086f25908a4414b81f856e712b25b9 cbd21a73a807432087fb0cec7003b006
#: cdb8a85ceb644aaab49fbe41d1c4d90a d03876bf54a04fb6a5140c4ad3f8c15d
#: d1e5555794584a6e99200842c351dd35 d21c6dee16c848939844bbdce74a5354
#: d2a887ac625546dd901d9b945375fd41 d3eb4d2b08814f8fbe84faec2e802654
#: d40b413976ad4af5ae7f9757ae7e30ed d40dcbcdf8804d4eba69e30d7a1cf822
#: d4d5a70f30864e98a78172c9a0113ea3 d755fd82bfcc429391770b19e3e43b84
#: d775043e1bf74f4993b18c7de1b2c3e3 d7bc2a71d91d4348bc12ffc688a29ba4
#: d8f45e49296a435699d1f936d8b9442f db78cca7bbc34298808df0929f7a72a7
#: dbceb874cc684ebc829efb78ce703bc7 dbd465638386487187a9286c7d4ad6fc
#: de1b0e2fc3f34df6aa18ec7e964a9cf2 de50a5cdcdb143b3ae7d24b43eb12b03
#: deea3077046f47de9f2d4e3171aa03af e0c019034ac947ff8c977ef6be09a84f
#: e15573096e0441eaa186b9c6cba15d56 e1b147fcbea6419d91a5043a121aa72d
#: e4cae171971c40a58bfc7ec2a28b4ccf e4da0a9ef6e14a42a84d7241b21a8153
#: e53213c4447744ce85ddae8a692178b9 e55bd6ad665b4bc8bf76ff477fa396eb
#: e6b689b8169a4a1b83b71f68ab4922bc e7c50774cd834ac8b683b35ffed29949
#: e7ca63b493bd4bc79031d17f7d1fc107 e844389615964d0bad31ea881ed8b66a
#: eb11c8ee408343cc813cf0963011ccf3 ec494d915f1a401c9ed49ffcb490ba56
#: ecfec3a7f51d45449be9c025a500777a f0b240fa78604a359268f7eedcfa818d
#: f2958a2bd82b46f2949562177836aed6 f2af8e82a14e4907ac5542c495c0381a
#: f2fb9166d14a4053b8e8aa12a81a8c4e f32b3d2eaf4a441b880c12096644e578
#: f62c6d740ea447ab9d42724109a91815 f62cca727dca4267a606cd3d3365f80e
#: f7ee0419ce9e416fa310375b0132dbdc f87b949186324acb90d74005743eb4da
#: f8b4435fd6194b2282d4a35118ed8a0b f920421e722e48e3a97d985ccab4a04c
#: f9fbc3ad43a9496f802630395f05ebae fe81092c0305472f8ec4accc772c36f0
#: ffc6c14cdf8248e1b5dfeb0124dc9e5b
msgid "1"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
#: 0187a5bc6d5e4a8c921bcdd54a3d86ef 03b2c2084287404390755a31b5b6cd54
#: 0b67fb96a58e4231b8959ca1daca5902 0be21bc4a5b24a75a9ac8b3d0d89423f
#: 0fa1d6dda02a4261b9875d8ad6b1d4f0 11f4ebf7b6464d10915fd53fc0fc7283
#: 142cdfc204494fff9d76030b424cdd01 152d2a814a2c473cb061d4576c646837
#: 168fa1a1ea534041bff4a5c864f398da 218751b6743240e4a6bc231eb2783ba5
#: 276d6029aca2416eae04c25e24dcd7bb 278c55fc81584c5682aa615ef5872342
#: 2ddf2872603c436eb3f8f3d19f52a5ed 2e36dcbb2cc749c6b7fa58428d190f15
#: 2eec2fbeb78141ea8f86db67523c88d2 31116afdf9804ef188c0126ce36e6974
#: 32aa4aa420b24722a344b40253922934 3692cf66e46049398f571bbd6f7a979e
#: 3950ab866ce7427681baba920b24f697 39efeff04b0b4e488ca8a016919caaad
#: 3f293df1b7b447639e2b6094b877a2ee 43fc110e0ccc43a08cf682a73c63f589
#: 4bef714ab197418b914953e5c67f35de 5188383c08a44ec1970fb906cc279de5
#: 526cc34db8a042659adb64736c196a09 5d2671d77cb4494e895418cea5e53679
#: 5d611913c6de4f21a2bf1fce88a5e84e 639e8df2f3bc426eafab1e2063a22e28
#: 6fc9cd89d0a64a80a9e0066a256748ad 75659406147f4912a999b44044ef50ca
#: 77e7a0f551f3410a9e91bffe71872dc4 78c4172d61cb4084ab97224f9b0382ac
#: 7bae0b3e1c7b40bdb79df3fb6dbcf308 7f807186b8f347438366ba841377d7c6
#: 84d2124c2aa14116a2c9f4d2f6326ade 858659aafa054a30bcf40194f56c2191
#: 8b70ee6facd44ee69106e197d8878525 8d30050d007e4d458b435da6f6fd20ff
#: 9508742224314e3d8bf7af1a48c65bbc 9a96025c586f4e20b05ac7462ba49cfc
#: 9c7f5bade0194fcbb41e79f6aa2e7c0d 9fc633552a7340fdaf712e704cecd3b6
#: a0534503fce840a0bfdb8ccbcd09b721 a3d9c4953ec043e09eba2c321463a7dd
#: a60cf673ac974226a427b91637224dab aa918c2646674ff69f1c5b439502f44c
#: aad41b758405461b8196b0c674d4e926 af0de692dc8640f7bfdf99aacaf696c1
#: af8a464a4138489bad6f7f90d2843b53 b1b2207c8cfe431e9afbef68bd0a1935
#: b221750df1674db2bf98012e2524fa18 b93c23b44d044b07b8b574b17139b7f0
#: bbf9645f2fda41e7aaba6f40a344d6de bfbbcaff52ae4677965e0fbab9c1f8f9
#: c06984c0c5de490caac6475b2c1e16ee c1ed51d291a6449793a14a34d49275f1
#: c6e544a56be8478eba5afd694e8ddbd5 d01011fb8196450fa2be92bbb9bce21d
#: d66d0d4e6d114fa1be64d2fec0a4e034 daf01a908d364efeb1e8376cd55f84cf
#: f10b58f89f5d430587d95048325fc92c f1555bdb216c4ec388b369c86005fbee
#: f21875c9ab86487d9a6b82aaa45b9e6d f2465042fd1144e9b2d054230ce1d4c8
#: fb46c57902d445caa07ffac3603a4091
msgid "BF16"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
#: 7b5e3a36f6a043a284ec5055301e9bc8
msgid "47.40"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
#: 102c9db1961c4753a2676ef55b4cdd62
msgid "0.97"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
#: ../../Qwen/source/getting_started/speed_benchmark.rst:92
#: ../../Qwen/source/getting_started/speed_benchmark.rst:100
#: ../../Qwen/source/getting_started/speed_benchmark.rst:108
#: ../../Qwen/source/getting_started/speed_benchmark.rst:116
#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
#: ../../Qwen/source/getting_started/speed_benchmark.rst:171
#: ../../Qwen/source/getting_started/speed_benchmark.rst:179
#: ../../Qwen/source/getting_started/speed_benchmark.rst:187
#: ../../Qwen/source/getting_started/speed_benchmark.rst:195
#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
#: ../../Qwen/source/getting_started/speed_benchmark.rst:250
#: ../../Qwen/source/getting_started/speed_benchmark.rst:258
#: ../../Qwen/source/getting_started/speed_benchmark.rst:266
#: ../../Qwen/source/getting_started/speed_benchmark.rst:274
#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
#: ../../Qwen/source/getting_started/speed_benchmark.rst:330
#: ../../Qwen/source/getting_started/speed_benchmark.rst:338
#: ../../Qwen/source/getting_started/speed_benchmark.rst:346
#: ../../Qwen/source/getting_started/speed_benchmark.rst:354
#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
#: ../../Qwen/source/getting_started/speed_benchmark.rst:426
#: ../../Qwen/source/getting_started/speed_benchmark.rst:434
#: ../../Qwen/source/getting_started/speed_benchmark.rst:442
#: ../../Qwen/source/getting_started/speed_benchmark.rst:450
#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
#: ../../Qwen/source/getting_started/speed_benchmark.rst:527
#: ../../Qwen/source/getting_started/speed_benchmark.rst:535
#: ../../Qwen/source/getting_started/speed_benchmark.rst:543
#: ../../Qwen/source/getting_started/speed_benchmark.rst:551
#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
#: 05a42541d3f84bbe8907b79c47bce543 1021916df4e7427798c523206178ea7f
#: 178faa6d7a3342a38d67b46da4b63522 188efaea164f483b90eb54365004d664
#: 220316d41a1a46d4b0808a2667f708cd 238b44852d64437393daf048ea712831
#: 241b4c595305446aa668e6e006fe4e85 24aefaacddf24fed9e0beb0a1fe42cf0
#: 25588df142b741a1abd227117302b4ee 26ccf1fd6d514bcd8cab702b85ece8b1
#: 27ee842afd36468a92eade93f115e693 30346d361fd2463d9760116c95a3eefa
#: 31758a374f034777b78e094686b73231 390e0cd6adf24bb486a2b0a74ff38c92
#: 3adbecaaf65243abb8116084c6ec6595 3ff9d0ac031349f49929503c5c589203
#: 4090bcf01cf94745876397c133ea6fdf 42a10a939b6d4ab0a00fde66ff3ca680
#: 4f267f4ef0a74914844823f2fe4e816f 55a5ea1a951248b38303fd1d91fc6f45
#: 581370ace15f466f86fedab33a34a786 5c920896e5134184b1759f59ee91ff7e
#: 5ea4f0929c134f0797198abbe4fce56a 68f86e78520844e6b841f991dba260df
#: 6a138b9cf5794f26a16169ee5e91a1c7 6a56951c490649b8afb0b55046a1bc0c
#: 6def8a436f634c25b0508e3ce4bf5892 6e26e2f0c3784b328d26c4ac3b66203d
#: 6e303b01c6e949c58f66a212568ec7e9 73120c72875d479380e11a7d00672841
#: 75b1e27ff2eb42e590ae7c8886a332d2 7a6ed8da05694ecbb5fd3bbec2614c31
#: 7bd9324b2fa14079a55b85a44f820c1f 80469796054047b0af8e9a9e2e82b52d
#: 8aea75408cfb471c847cd130de556d94 954a47c396bb4a62b5366190c5f4bf79
#: 975ebd2751474e1b9440e2ce6b210dc7 98bc2dca66da4d79b2c1189d93fb28d2
#: 993ee44c1b984989b8274b4ec8585604 9975c8ebdc5943d7ae087521995fab2b
#: 9f65d55416db4bf091520145dfc736f2 a021c2a23edc44adaf9e27cad0d7a468
#: a1d647995e5b46f5b35c284fdbfe1cb3 a2f98f670ad34bdfa7c76d07ed606aa8
#: a3d433970f674f2a8017c5b50265d04f a3f9d42e41114a75a510ab4e017e70ee
#: addc7deabf1649f0b2c936dae5036113 af5bdeed13a0450c90b5ed38b56ab4f5
#: b1e820fe88cb4112bab8db8e7ee331ae bdc855477b2d4b43a93a8244b15c6d7d
#: c66c6c0aa5e14f158d876a454c9335c9 c9445b8212a344d9a92b28bbe30f92f3
#: ca5c0273837a433e847ff6d23d74a50c cbca22813c7b43d299ab8e5fab9845d4
#: cd2ff300c0be4cd491c75c3cef51cfbe cdcf9b8b6e2c4a38bbfa5e6b1ea4f807
#: ce24199ad7a2448ca6282423adbcffb6 d50c80e831a740c092f8b69785e493f8
#: e67081be3505499298503f4816e56656 e8af8a99134a402b80ff638d53978160
#: eb85a4ed2dd54dddb1ebe7e3f270a692 ecb7d1f18ad14b33931c939d234ea1f0
#: f065709afac441268254a04cd99eef78 f1265510b3b2497089fcfdcacf7b2ca8
msgid "GPTQ-Int8"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
#: 11423407180b44be82420def384b765c
msgid "35.17"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
#: 1196b144f4da4ab8a9b0fe18247efc47
msgid "0.64"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
#: 013d1b40c6ae40dc99e48dacbb8824ff 095d1882d6d24b0fb0dfc53f379d8ff5
#: 0bd6672b6511428a8eb758a438b6a67e 166ab5c82ec945009f8918afcb69b4c7
#: 1e03bb0feded479a85cc843d2552f738 28f60cf44044427caebf7a4fafdb3317
#: 3967d3eead5f4e0cb86cf7b16aa2068e 4e8af5ada1f54045ab70128edb035848
#: 53022700901043aea02410df922ff5f2 5c360a5acfc7481baed2b3d488a4d7d9
#: 5d80c9ec79b04217bfee4150a2cde57b 63750b304bc545f39aa7c5f91c36180f
#: 6604f5723a704317a898a5091d16a3b0 6aa77d8baea24a3dac1ea7f40d2a8545
#: 6cc7ee00fb6e40298e2e6c46724822ce 708d3f04e2e149c99f231b28acb7f232
#: 732df26eebef45b5abe97daaf152b560 87ded1a1e4434310a4a5731403a04613
#: 8c4cd3ebd6854859822c00816ee5f0b6 a4a413c20b0f4428bacec0409b34edca
#: be01ed05733e4fde8a4551409d05802d c4d69c7e2d6d4eefb6a089dff2ac1a1a
#: c72ad980b1da4e398f21ebc9d8884dbd c773fc80123945f7b9fa5e9587acbffb
#: d62ef19a73d64aa5aaf1ce8aeaecc38a e0268ada69784a11957b5f633a6e1ca6
#: e4dcd51cdeba4d01b2c818dc3b00f3ad ecf5befcd30b4302aad752ac6cd6d112
msgid "auto_gptq==0.6.0+cu1210"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
#: ../../Qwen/source/getting_started/speed_benchmark.rst:94
#: ../../Qwen/source/getting_started/speed_benchmark.rst:102
#: ../../Qwen/source/getting_started/speed_benchmark.rst:110
#: ../../Qwen/source/getting_started/speed_benchmark.rst:118
#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
#: ../../Qwen/source/getting_started/speed_benchmark.rst:173
#: ../../Qwen/source/getting_started/speed_benchmark.rst:181
#: ../../Qwen/source/getting_started/speed_benchmark.rst:189
#: ../../Qwen/source/getting_started/speed_benchmark.rst:197
#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
#: ../../Qwen/source/getting_started/speed_benchmark.rst:252
#: ../../Qwen/source/getting_started/speed_benchmark.rst:260
#: ../../Qwen/source/getting_started/speed_benchmark.rst:268
#: ../../Qwen/source/getting_started/speed_benchmark.rst:276
#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
#: ../../Qwen/source/getting_started/speed_benchmark.rst:332
#: ../../Qwen/source/getting_started/speed_benchmark.rst:340
#: ../../Qwen/source/getting_started/speed_benchmark.rst:348
#: ../../Qwen/source/getting_started/speed_benchmark.rst:356
#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
#: ../../Qwen/source/getting_started/speed_benchmark.rst:428
#: ../../Qwen/source/getting_started/speed_benchmark.rst:436
#: ../../Qwen/source/getting_started/speed_benchmark.rst:444
#: ../../Qwen/source/getting_started/speed_benchmark.rst:452
#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
#: ../../Qwen/source/getting_started/speed_benchmark.rst:529
#: ../../Qwen/source/getting_started/speed_benchmark.rst:537
#: ../../Qwen/source/getting_started/speed_benchmark.rst:545
#: ../../Qwen/source/getting_started/speed_benchmark.rst:553
#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
#: 04e9663c7b2b4e4ea435e4c9462b1790 05f89ea76a7846f88fa9496913a0ae86
#: 06835d06095549f3ba175adb92811d5b 0898b0a9910e4c29b8b4dc867c6bc1e9
#: 09135405b9ec49e3b84d0d4acf307245 0e82f54795634a54b7e8085dc3179203
#: 1d4d4906df9946a98e6791377c812b88 24de7c8b77254d0db35bf0572d07f3b7
#: 2ac992fb2f3648e7a9d99213337636b4 32b69f934b7a4bdab494714ef936c20c
#: 352b00561584443da20cb8db2693e2e4 3592145247744da9b4631c97e3457ed1
#: 38bfcd452ef5411198a156a77b5c9973 3f1883c0e82842a38ccb799b969d8ca5
#: 4333fcc876f54e1e92f1270471e6acfe 4ccc8a9919454ab19f4b765ec0a8c5e1
#: 4da28702c6284f58afa6f5a8de7ba695 4f94b1eb712f412bbdb335e0f77bb3c6
#: 50d883123f0f4fd7bb9ae076610a1d26 51f435f934ad47c0a9e621853a9fe36e
#: 520c1d30dafb4952822baeae0b522894 521d886efb0541769308674f79bdf1dd
#: 541fee9d1f0a4ea49fa9ca196dfcf88e 58a72a46fab943da9094a0c2a1651f0c
#: 5f1d9e513c1d4a9aa75acef36c4d08d3 5f71befd9b084bb3ba870d9f6be161b8
#: 6dfd2ef4ba5948a0b0eac6d9564e036e 75633e27bff04c3b9e5af9e372cbc717
#: 77a3660f3817454a9015642780181a00 79d62cc4d0a94f1ba85ce3224e2b0b9d
#: 7aadfa2355774b30890ac1239643b89b 7ac870470e284768ae4fc1ee573626cf
#: 7ad8a151669b4e628b0f5841e68504a5 7b6d7832a9634a928c3b122497438d97
#: 7e83253ea49c4fc0b32c0e9642393366 7ee6e1f43139430c9091d83c8d2fe9e7
#: 80711f2d88664ec0b54f3341bc6fdfd7 80a97c0c25c34d02b496a6216778cce0
#: 876f2c85465b43d2b4409c477367cdf0 8828c3b68a404c1aa57b10a11a273358
#: 8bd922e6938346509b52c125387d1530 8e249c1b08384fdcb341feb200456c9f
#: 96c0650187e54b808ba84817a250eb84 98ecc925675e4b1f9d5a781ac04473f7
#: 9c3d4141de45437ebc6ec711dfb6155e 9c96b56a84f449a8ad1588f627356d91
#: a868d4dd843f4b359540f2fe478ab8c9 abdcbade98114f2eb2f784214633d178
#: ac47ae4525a04b2088616b48222c1ca9 b29555ec0a604c7386e9625df6ba2d13
#: b4649b7715ac4b03bea02b54c9d9e285 b9ad8a254a1d4ae582b4988d50dea2a0
#: bbadc3dfc4c14d96b697303085065f83 be21b0c3fa5448959099882097a72060
#: be6fcff5e7e34fb09b55d478d2cca1b7 c6fcf05da252434c96a33cf09d68a858
#: d4d1da8f04fe426bb9bc1c3481627bc3 d535a39ef7924fe3a85e13b9cce2e979
#: dcbff717b5264110affec708bcdd7924 e51462e7a43849d185e3ce6f6000ba3f
#: e5c3470a7ba84d11b9b570ef5211241f e6e2e563280d437fbd496fd91f18def1
#: e79053df14df4cb79676af641d972961 ed529d012a274493a6361e86396a5675
#: f9091ceacfad4b6abb5273e269dc5c9d fea027dc6c0543a985643fe6770b9200
msgid "GPTQ-Int4"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
#: 629feb238755401a90f5a01b5549ed65
msgid "50.60"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
#: b661a2357b9f4d6292924a6a8fac51af
msgid "0.48"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
#: ../../Qwen/source/getting_started/speed_benchmark.rst:96
#: ../../Qwen/source/getting_started/speed_benchmark.rst:104
#: ../../Qwen/source/getting_started/speed_benchmark.rst:112
#: ../../Qwen/source/getting_started/speed_benchmark.rst:120
#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
#: ../../Qwen/source/getting_started/speed_benchmark.rst:175
#: ../../Qwen/source/getting_started/speed_benchmark.rst:183
#: ../../Qwen/source/getting_started/speed_benchmark.rst:191
#: ../../Qwen/source/getting_started/speed_benchmark.rst:199
#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
#: ../../Qwen/source/getting_started/speed_benchmark.rst:254
#: ../../Qwen/source/getting_started/speed_benchmark.rst:262
#: ../../Qwen/source/getting_started/speed_benchmark.rst:270
#: ../../Qwen/source/getting_started/speed_benchmark.rst:278
#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
#: ../../Qwen/source/getting_started/speed_benchmark.rst:334
#: ../../Qwen/source/getting_started/speed_benchmark.rst:342
#: ../../Qwen/source/getting_started/speed_benchmark.rst:350
#: ../../Qwen/source/getting_started/speed_benchmark.rst:358
#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
#: ../../Qwen/source/getting_started/speed_benchmark.rst:430
#: ../../Qwen/source/getting_started/speed_benchmark.rst:438
#: ../../Qwen/source/getting_started/speed_benchmark.rst:446
#: ../../Qwen/source/getting_started/speed_benchmark.rst:454
#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
#: ../../Qwen/source/getting_started/speed_benchmark.rst:531
#: ../../Qwen/source/getting_started/speed_benchmark.rst:539
#: ../../Qwen/source/getting_started/speed_benchmark.rst:547
#: ../../Qwen/source/getting_started/speed_benchmark.rst:555
#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
#: 00ae1e8f6e68408b91120e3f5fe12e26 0278b7327f3f49c29ee45c9669529640
#: 06a24cece162424391957836c3908dc3 0764598b1ee149dc88066f8083f3c2b8
#: 09aba84a110640769e2ab231a1751c55 0bc3aa9660074c72be7fac7bc48e9896
#: 13bb8df9dda54af5903b2c8f2a24f964 1a16e066fe1b445dae9f67d6b9c8b093
#: 2246463bbf8f4aff8755b11448211bac 23331268f0d74cb7a2d86410d1aae0d4
#: 25448c3d6b0b4061a1c893e36d675292 283b842a54264925bf4fbd46585ae96c
#: 2a50122e46d34b04a1fb2eb0a2b9589a 3726e44061a2410cb0ec1f629d44fe5d
#: 3fe8de4f4e29419b9641479658edd557 40c4374fcf4143999a4d33ae66ba7152
#: 443a018b8ca44104aa7c64efbd02790d 444ed3fde47a4844872d96ef8ad88913
#: 466d5f5de058466ea26e610b321a8208 50331b0cdaa94c99b39047d529cbef19
#: 55b3477fd81845c4bbeea92daabf5021 596916e951ab45e88a17b12788439c85
#: 5e4856b9ac3f40258cde5b6886069f3f 62a1042ff3564780809aadffb768ee26
#: 684b05dde1054dfaa497f7c440c1fa98 68e673d95d6a4f02b8a902eb91eda4c2
#: 695dd775d1f9465fa8b1af17821307ff 6ade2a445a074772a112b5b32088fc76
#: 705d2af811be49a3b47516274f43660a 755f394f28394b34af28a74d6cd9c290
#: 7646c4beaab849a19b6c763ad1d182ee 76ceb7a8d2e4401591be7ab6f388d92a
#: 76d2d27e65fe4c4faddec7b80a607848 7a2aaa7b5d294344826661c522293d38
#: 7aaca1cea47b42dcb5493436f3827763 7ed659a75a934d84b196534906bd5c41
#: 82e2b6a540a3431f89c89659bc70ef91 8b0238660306499b9bfba0c282aa7373
#: 8d4b2b7430b543cd8e55be896f0f3f38 8df02ac807c9410aa8a5026aca90e836
#: 96d82259bb054cc6add5dc2ddccfaa84 9850ed95902d4b53b34ec4c4f6887068
#: 9931f4682ff843b0ad3d9a93fe7c9cb6 9c3d1b30462c4f6c81375573756cf358
#: 9d8503ecd4cf4701a8948df67a4cb00e 9eae857b2a98484989a45ba5041c7ee6
#: a05e3ae470a342b1b55740466ce00982 a94ee937561a40cd8d5519fc5879095d
#: afd41c79d4da4daea6fbe3a47a2c9f31 b17f3e5cc47a49559fd1ee6bdba0a8c4
#: b289b859a08345f2b6615dfe9acd2618 b29109f960fe411f852f14d0cf0e3d96
#: b7618ed0c17e4f19ba74062f52fc683c b7687cc114fc4e30a49fa8a29a48a516
#: c3c94a19caff4a1ab7c35b8374c25792 c84272d15fa04c5ca00a0acf8128c0ac
#: cd3d6429ec534826809dbb32dedcc4bc d1118e19e6cf426f851eb2d01e072fd9
#: dd32eb658c614de0b24db49fe56c20ca deadb165183a410e8cebe1a08107dfd3
#: e189016ae67c40139d78c18895eeea03 e73e8540c2124dda9cd3dfe97193c607
#: ed6288c5910b48539c8e71afe03a4460 fc9f345e86404be1918b0cc814548a6d
msgid "AWQ"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
#: a0bc1ca319c84934b36fdbdbba00e465
msgid "37.09"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
#: 9b5e76fec9c24a9481ea34ff1bc45010
msgid "0.68"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
#: 009cdbacf29d4d8e8da4bac87eafa353 1337f79317654239a250c2ac20c50043
#: 198a10cab1134b0a9985680f07c0263c 3ed34d5274de4bdd8471c653245f298a
#: 46b45940b42b47ad9b44b4dddc99e653 7cc280d41e8d4fac847af04379dbcaf0
#: 82d0c36b743544f5ae8169dbce2f2f9e 8daafc001afb45e0b1bd72bc341b3a4a
#: 8eb73b66b02e47058940ac3c22ae5c17 9ed68035fda444b280cd081922448986
#: acbe015187b44f1f8685400ee9e33904 acfd3839ab144a7c9236023e68c1f372
#: f2dd730ce308495fa91f2e1ce0e18bfa ffed67b39b5a4d019c891dceb38cc511
msgid "6144"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
#: 7cf82f7c6106450286dfe1932280c66f
msgid "47.45"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
#: 49982cc89a2f4d8684d2559d86e462dd
msgid "1.23"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
#: fab3837a99594d58a869f5a024cc4740
msgid "36.47"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
#: a7f3f3b8edf247da827869b863ced852
msgid "0.90"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
#: e4b624bc551c4cbb88e5f86134baa92b
msgid "48.89"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
#: 73d8e4c6909245e19ac39990988690f7
msgid "0.73"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
#: 56a34310e1b349958b9af2f284b5090f
msgid "37.04"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
#: d732132274644028abf604cb8c277c54
msgid "0.72"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
#: 05dfb65f355645b2899f88fc85a589a8 07f2cf0a7f1e4c61be46b107d74bc7a8
#: 0830e2d4d07a4e3798b281c08cd558a6 33e08e87f1e34edfbb8b4ac678d29636
#: 47c9804609944facbf3078e896875a19 5b86e781168442afafbf44741d1e3cfb
#: 6875d8909e564c338648c3daab46f5c5 71f18016a417403a94fffb4b68104863
#: 955d295ff9e4460b80b1f78a67befdd8 b03403473d1b4db9be0cc264efe5c836
#: c910a578448f458abe76f56cf9bc141f d0202d134f4a4b4885d01fb6af0b1c66
#: d252bf874eb04c339a1ba000ec1414b0 ead0c46dbe3c4ca587064072ac574705
msgid "14336"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
#: 8c7a2a48e76a4f1a9d1e7df9ffde1f13
msgid "47.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
#: 9b6161192b5246179f2a955699063597
msgid "1.60"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
#: 4892cd9cbf134174b6ff77465855e327
msgid "35.44"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
#: 86dfdfb5386a4708aa407c655aa35cdf
msgid "1.26"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
#: 88bc181521f042568e62824d7691a28e
msgid "48.26"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
#: 1d2494b97b1c406d8a13174f66d3d7ea 2ada85ef0ac74b9db45a9ab4c22c7f9d
msgid "1.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
#: a3d015312df8486a958f673dd5540b3a
msgid "37.14"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
#: 0ea7051371974a64b62d5c646b1b39f6 2cb7e33db1874c2e8de26725eeb6d30e
#: 2dc955911e47467ebc6f8d8e8f00438b 41c9958bbe0c45e3a5294ebcc018e49e
#: 4769ee6f27bf4d7083865d994de83db7 634568d05ed2451b8cd4572e1e3506c7
#: 6d405bde32d1451083eb8d4f5e07873e 6dba0bf51ab6484b99e7cb69b8305cb8
#: 7e4e611ad61446d7b5053c3313c76abc 884977f8a1e647bd8251b6322823cc18
#: a351fe3592f340638d0a2869a4fa2c55 c0cf78c6ce1249cd945895aa1c28ea9b
#: d529809e409e4fa881727901cd523114 dc44327da67642d5b54a79a72fe80359
msgid "30720"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
#: bcb9b58426e44c98b18a89dfa3e52f8e
msgid "47.16"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
#: d4be552d77f74c72ab635263a9856014
msgid "2.34"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
#: 137e804e37154192bdadb0e260e56d7c
msgid "36.25"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
#: 382b06eb5b9c417ab734b5f8b6d38b7d
msgid "2.01"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
#: 5fc275b4a5b04d6d8d8f9f6ab89f0db4
msgid "49.22"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
#: 9134135b7454451ab3bc19a9726d9561
msgid "1.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
#: 9faaa71b541c42aaa6d70d2ddf7c18f9
msgid "36.90"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
#: 5c7c3192f04f49f09ba1e687128356c8
msgid "1.84"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:85
#: a3f6fcee9c5640babc4b5eabf5b88c25
msgid "0.5B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
#: 8ad6a7a9060646bc895949824e36841c
msgid "311.55"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:92
#: 953ee3b503da4795bb1fd49839943b8a
msgid "257.07"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:94
#: 8d465e0b7a4b4b55b2a776a7c6c65b16
msgid "260.93"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:96
#: f663227e81ce406d92a3fcd830212046
msgid "261.95"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
#: 83849f7959384eaba9dd51876ae36f87
msgid "304.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:100
#: 15ef7f8fd94447fd90b95bab150bdeea
msgid "254.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:102
#: 5937b73265a142cb8563aad53eaa3691
msgid "257.33"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:104
#: a1c8eb8b702140d38a20264edff6a4fd
msgid "259.80"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
#: 1d60b9a09997456ba14dc698ae4abebd
msgid "290.28"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:108
#: 308528a8430241f1bb3f107f627a0a0f
msgid "243.69"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:110
#: 888169c1fd1c405687c3cfca214c31fd
msgid "247.01"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:112
#: e40fb50ed73c49a2b3f82ebca1fff731
msgid "249.58"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
#: 733262faf4fb45cd87d519b4edae4976
msgid "264.51"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:116
#: 5270fd0a61ae462d835b6e703e646bd5
msgid "223.86"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:118
#: bb8aed8e18f540adb12258701658ea86
msgid "226.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:120
#: 5bea8e63075f444a99556dab7c2085d1
msgid "229.84"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:125
#: 4c1779fa371543268441473c511eea00
msgid "1.5B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
#: 2b47649655d3403c8a7bfa72ad59438f 77772cf114fd416f9bdf97c7f767c7d0
msgid "Qwen2.5-1.5B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
#: 2317a3c491824506bce0dc8f308ed64a
msgid "39.68"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
#: 7b74222442c541a89a196a2d04a1c995
msgid "2.95"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
#: af0210f522654f73a772664c99ad3c49
msgid "32.62"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
#: ebfb7961553b444a9104de2d2a43162d
msgid "1.82"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
#: 3f373b8291654d22a305e8ac0b5bac1b
msgid "43.33"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
#: 75dd937abe9d4daca3b4a89984a9b078
msgid "1.18"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
#: 75eae517faca488db106a0dd561dbf7d
msgid "31.70"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
#: 00604ef3dff44cbfa888a50178fa17c5
msgid "1.51"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
#: 6c4f1865aefe42809a6fa2411988204c
msgid "40.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
#: eda3010cbba844459135a33045effa7d
msgid "3.43"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
#: b79801dd0f794042b6eba4194ff49675
msgid "31.46"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
#: 5bf84b4f616041239731a65e072114e8
msgid "2.30"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
#: ecb5328d5a89408e8cad5d279266d181
msgid "43.96"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
#: 04cf902f0b0b4c9c92e8d6f201e8284a
msgid "1.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
#: 4df3df3f06614d62b77b869ce135a90e
msgid "32.30"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
#: 25d4bc8fd75f4778986da18c567968e6
msgid "1.63"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
#: b5986361e2bc442289618d57681d1934
msgid "40.43"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
#: b56c6512adca45a182e883b279807bfe
msgid "4.16"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
#: df39a39653e9479f84ede4ccea98cfc4
msgid "31.06"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
#: a119b069ab274d0e911516ce73273ee0
msgid "3.03"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
#: 01c289f40ab8446796a16d95d97f7a38
msgid "43.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
#: afe2f3a364394d078c4c52ae6cad5cdc
msgid "2.39"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
#: f889474d8d334a90ac4f6cad42d70e16
msgid "32.39"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
#: 977283c361914a6e91ec57451591f400
msgid "2.36"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
#: 1a0a4dfa1c7c4957970f6026eb50870b
msgid "38.59"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
#: 3b9acf6ca56f4c6395bccf4b1d8afee2
msgid "5.62"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
#: 4e0a926dcf39492db47679b90f9302a5
msgid "31.04"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
#: 9561feb00871463ebc9af8e3f845e335
msgid "4.49"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
#: 815fadc5cb074cdfa15a0bd9b949f6b3
msgid "35.68"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
#: 8339dd4bc73d4ebabfb9813ae6a49d67
msgid "3.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
#: 08811d687ae14e09bb336f5337cf00f5 279a452c591847d4a64758e9575b4d27
msgid "31.95"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
#: 788fede2692c4c68a150c04aee3af2cc
msgid "3.82"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:164
#: d6835e02c1564c0292dc6c3055603b49
msgid "1.5B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
#: f3dd9fb7e85940a994f0724992eca2aa
msgid "183.33"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:171
#: 4d36e27e41144a29acd66b7a2eb8264e
msgid "201.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:173
#: 566569eae862413eaae62378ec60642f
msgid "217.03"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:175
#: a0cdffa7e2834b8bbc66a0c5e750155c
msgid "213.74"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
#: 17fe460794ef429baaff0fc7d7385d33
msgid "176.68"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:179
#: c28483d5aa174366813d5139a7fae967
msgid "192.83"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:181
#: 78ee7e3f9f784ae696d736ef998eea2f
msgid "206.63"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:183
#: 307f7bf98ba445289ca5214054f226f0
msgid "203.64"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
#: 517cc68aa2104f54afd4065e0bcbd2dc
msgid "168.69"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:187
#: 705b401bfbb94547bba99d7497377493
msgid "183.69"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:189
#: e55b3f9c71bf4a408c0edb40142f1a70
msgid "195.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:191
#: b18ed4384ac0400093621b744a79d07a
msgid "192.64"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
#: 288f88b10f164d488934b44e52d6f7f7
msgid "152.04"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:195
#: fef494f0a7864f20a20ecc28d38ed49d
msgid "162.82"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:197
#: cf34ed991cf34bfd9dbc65998268fa0d
msgid "173.57"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:199
#: 7b4e77ed996945e5b11210aa94f1164e
msgid "170.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:204
#: 29612d8d31cc46f09b29727561c51155
msgid "3B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
#: 5daa7c1e15b44259b8c76ca8e835c9cb f9988b1c6e94450ab0a9643847292d46
msgid "Qwen2.5-3B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
#: 0807d2037b47455297ab3687fcb3c33a
msgid "30.80"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
#: cc3af58a90aa4bd5bfa750b3acbb6735
msgid "5.95"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
#: 49a46da1fe0a4ccb9970a235cb439876
msgid "25.69"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
#: 8b4a4859a0c1406399000f00bc6db354
msgid "3.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
#: 52096558c50b49958c30aeac722bbf42
msgid "35.21"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
#: 1cc8a35645d14e88aaed0d02e8a85236
msgid "2.06"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
#: 957a98992c524bdd84a056b7bef8ddfd
msgid "25.29"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
#: 689ecba4fab64dba81093564589bb594
msgid "2.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
#: b2cefb3935a84b2397035912535b597e
msgid "32.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
#: 51964695c1304e858960a1e2c6da46e3
msgid "6.59"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
#: 9d3e132d27764a5b8c696c976ca3f781
msgid "24.69"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
#: d31b90bd313b44a2bb03ecc5339df340
msgid "3.98"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
#: bbac59966e2e4a3b8d03d32578d29234
msgid "34.47"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
#: 07c1e6d2a08f4f909bc19cb9641b62c3
msgid "2.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
#: 21e61804c449444d833d4de19839348b
msgid "24.86"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
#: b892f6ef7d8d46bf98e48e62006eacbc
msgid "2.62"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
#: 31adfd13385f4c6195c7f346debdfce2
msgid "31.72"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
#: 97102e8713254b7bbb4cc62df3dc13ef
msgid "7.47"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
#: 5422d3054c0048ac81914ac7becb9b42
msgid "24.70"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
#: 23ef1837888b405195fbc90dc8bcb571
msgid "4.89"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
#: cce3841d106e4fe0a359b33edec99bf2
msgid "34.36"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
#: c6a0812d0b0b4f8f9ddb656f52e3731c
msgid "3.58"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
#: 6c889655c3cc43a4b167132e64a2f6f9
msgid "25.19"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
#: 31a4bb37f23b4e4d9437f8dc779b6472
msgid "3.54"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
#: 96af50cc30974e10a27cf14a2da4811d
msgid "25.37"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
#: 719843655d5c442cb10b7b5aa4bd2a97
msgid "9.30"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
#: 70547ac36f3f48238456a7ed58f78688
msgid "21.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
#: 42910a5aabf540b4bcea07b02a851032
msgid "6.72"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
#: 978ca61e6d0a4998b64c3154dd66b1d4
msgid "23.60"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
#: f73642b4e2394fdc9668c9db25f52df9
msgid "5.41"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
#: 04165880d1684173805853bf7259bcc6
msgid "24.56"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
#: 34569f818f834c5c8ff784b6ffd99c93
msgid "5.37"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:243
#: 220814be8142425e84766a0321be4d5a
msgid "3B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
#: f2d9ccca39114b63a5286a6611de8125
msgid "127.61"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:250
#: 2a077e8cd8d94d8785264d4cbf9a68dd
msgid "150.02"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:252
#: 0400ca4767594ecfb4defd558692c824
msgid "168.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:254
#: 9d3b059ac69b4a5591ce3ee520a96029
msgid "165.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
#: 3285226c40d84875a54f59c295b551fa
msgid "123.15"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:258
#: 8f9f85faeaa2427b809dd31490586b5a
msgid "143.09"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:260
#: e096b5debc964ee0878c772decc9422b
msgid "159.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:262
#: 5486a0cba5304a6099702797d0ae549c
msgid "156.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
#: 66691e93c10c4678946767c7f309da04
msgid "117.35"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:266
#: 0671808a30884988a2ecbd069766e7b6
msgid "135.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:268
#: bb9c6c1190bb40d0bbff3191e0c4498b
msgid "149.35"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:270
#: ef98f5b0376148008b73c5aecc2fa63b
msgid "147.75"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
#: 53231a6c550840d8abd9bcd6938a51b9
msgid "105.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:274
#: 1f6d3b325eb24c659f1d9c3620c8628b
msgid "118.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:276
#: 548feaba1e574b098b90cdbd84ebc399
msgid "129.28"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:278
#: 9199d71ff5174e8d85d70a23ae7f3ec4
msgid "127.19"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:283
#: 56e99eb6d72c42689aaf59d789826803
msgid "7B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
#: c59531af032446c18bc0e83534193b9f df1166c61309496aba079d93893e2c73
msgid "Qwen2.5-7B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
#: 24c881a74ac0458cb5550fc0900bba79
msgid "40.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
#: a7cdbcfaf60b45ea8b6cd5b52ac0d746
msgid "14.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
#: 4a9bf7b3f9e74ceabfc41137dd140a7a
msgid "31.55"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
#: 38206be6768b4b9880a2182c3c6958f2
msgid "8.42"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
#: 8cd20063c79e415fa8db65f73f64f0d9
msgid "43.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
#: b37f8a8bcd6540469630d71d413e1c74
msgid "5.52"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
#: 8a451541cbc24efeb6cf91ac421d42fc
msgid "32.03"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
#: 05f7cc11f8ef46a08908b3371001e6d9
msgid "5.39"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
#: 864b5a9dc6844aa28aea1a1da8635051
msgid "38.76"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
#: 7c6500b87f8b4a22b889c6ffce0c0e87
msgid "15.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
#: 64f2c38e7f8c4166ba938e9bb4466d7d
msgid "31.26"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
#: d897f533338b4c62bb0d70069a758d32
msgid "9.43"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
#: f33b03aa4b044164b3d2d75c04d8006d
msgid "38.27"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
#: 329c1e0c94ea434987fbe24d75cd6a2e
msgid "6.52"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
#: 7677046faff74313ba2c46c62cf21263
msgid "32.37"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
#: 6284f4a2d24243a28a7303edfa551656 684608e8456a46eba2a061ffdfc03bdc
#: 7575637305cc49e5bbd65b8c64f102cb
msgid "6.39"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
#: 20c6ee40a980497da9a42c6903bdc5ca
msgid "29.78"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
#: 217fcd110e054ed09b81f583efa7e226
msgid "16.91"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
#: c275314a31984fcba2f269f2ed665919
msgid "26.86"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
#: d48e57d65be94896a7a354e5a3adcff0
msgid "10.96"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
#: b3849871d8be4810a4efd55f90d72758
msgid "28.70"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
#: 3b30c8cf22174fbea359eaafd3d80554
msgid "8.05"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
#: 6a26d4e6932c47ff9488adbb09892983
msgid "30.23"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
#: aff7f4b31ebd41dc998571d2485f7c66
msgid "7.92"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
#: 52197e28b9be4c4aa292556a598a6842
msgid "18.83"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
#: ac2bc04160e54ec392561dc3fa1b62a6
msgid "19.97"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
#: af03dc28d6ed4191b76122d847700d65
msgid "17.59"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
#: a74328e0664e45439e47cc1edd31ac5b
msgid "14.01"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
#: 62f32fcb14ed4047a9f6c05cde7f3139
msgid "18.45"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
#: bf83072778ef44aeb4ff280d3e8e724a
msgid "11.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
#: 8596ffd1e39b4879bd3fd934d22974ce
msgid "19.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
#: 19eabac83a364622ad98eef88909d151
msgid "10.98"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:323
#: 03be9057a5634dceb5abd12098d1a94d
msgid "7B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
#: 8b6c550accd74ab7b0376fe5d4f7b28f
msgid "84.28"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:330
#: 893e8c4f6ad8422395a14084bda0dcb9
msgid "122.01"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:332
#: 0ff410dada7c4eb09377e4e643821669
msgid "154.05"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:334
#: 871ced1c25884d8fb340ce3e420bb681
msgid "148.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
#: 8b91ba7dac544e1e865406047454e505
msgid "80.70"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:338
#: b36155ad7a1f4f46886b69202391a107
msgid "112.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:340
#: e83968912eee4144a2a619fdaf2b2e23
msgid "141.98"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:342
#: b1e09afc9e444c13a87e05e51533ed03
msgid "137.64"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
#: e805d3fd9b7b4df88306d9d8d031fa32
msgid "77.69"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:346
#: 5288955cf8a749aab0f29781c0a01eed
msgid "105.25"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:348
#: 09f607e74caf4469b406af624b1954b0
msgid "129.35"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:350
#: 1bdd8e6d9c7d412d9d9c262634ba663c
msgid "124.91"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
#: e3842b73312347fb98c58b26e3e4503f
msgid "70.33"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:354
#: de12b0eb3ea14e33ba09e1efaba35c15
msgid "90.71"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:356
#: d869b4da1b9d490ab63ab7b63d311811
msgid "108.30"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:358
#: e725f7181b764adc97d2dbf3aa04a97b
msgid "104.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
#: 50ce6144945049c08b89fae7f1096bc1 538e4f4bfd6647d0961b53aaeb93cbeb
#: 6831225e1c2c4493bbc0b94f3284f2ae ae2971e3d03642c8b87d9996a792874e
msgid "63488"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
#: e0ccc29135df40a8bc52e8eaa7047d0f
msgid "50.86"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
#: 00c26ad47e7e483f812f1a7686ec946c 115475fff86d489f9da9a050d580ac35
#: 39e209cc54ef4f249a9e10b4d2b59e9a 471c2edbd90f48599720e4835f4ceccb
#: 5aeecd3e741e4f7eadc58ec109bcc517 6378f5dfc0c748049349bfe281ede172
#: 67b476001be94ed1939d90892d4fa792 8512c45d834c44b3bc900ea9b03da35e
#: 87765daec63740a2928bee81a093ab4e a8599ff0fa864ff89a5536828a23bbd6
#: b6e42477ae4e489bb80dd0da67e257d4 f3b86ac794b9432182cca62e582e218e
msgid "setting-64k"
msgstr "[设定3]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
#: b778e2090cfb4eb0b4fea135fbe7f018
msgid "60.52"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
#: e8932a3264da487a9d21ef0d46f46ca8
msgid "67.97"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
#: 4b0a406a6de24339ac3d150668f236f9
msgid "66.42"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
#: 4b0bcf340b13449d925405d1b6146c0d 4ed666fe3b7b4f2cbeb3724360e7f873
#: a4c73d02cdce4822af91add6cbfdc9c1 d84244305719472ea8ea3aa5b1f96026
msgid "129024"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
#: 88281dfdb16f49f196441f8fd83a777a
msgid "28.94"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
#: 0b5ace22aa334ae293e187c274d24af3 23c9d9e6b64547238214fa6855a7f3d4
#: 287f483703d345d7a197ca9664e0d209 6dc7a99a3fd6403da605b1b4fe3ee0f2
#: 8a87d97b5f254259ab06b8cde9cb9d31 9af99966e7a14e89a7af2aa01537a0b2
#: ade5ee0f233f4532902eb97817d9faaf b24e425c9d094dfc800f8f9be1b686c5
#: d01c429ad2414cfda68d399ae4b1ef3c d279d55974c34202830512a83958f0a7
#: e829a4c032df4e5e9fb1f15828f19920 e8558dccdc7c4071b9d655f86001bb52
msgid "vllm==0.6.2, new sample config"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
#: 3049540f3214425987489dbbc60f25e5
msgid "25.97"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
#: 7999f5b1856e44f791d1b050f0db8332
msgid "26.37"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
#: ee69d83c04864081b5a1b15cb8db3b04
msgid "26.57"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:377
#: ../../Qwen/source/getting_started/speed_benchmark.rst:473
#: ../../Qwen/source/getting_started/speed_benchmark.rst:577
#: 241f00f91e934d029960f03c27bd3739 c4780aea46374bdbab1a7ad7460f9691
#: e26e5864d5c84d888cc9a4ee09f2209e
msgid "[Setting-64k]=(gpu_memory_utilization=0.9 max_model_len=65536 enforce_eager=False)"
msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:378
#: ../../Qwen/source/getting_started/speed_benchmark.rst:474
#: ../../Qwen/source/getting_started/speed_benchmark.rst:578
#: 0dd68ddb0a5645c184f2f9a378c1843b 8f9d36ee815e4c65b6ee9c4bdd6a701d
#: df4ab6ac46aa4631a3b06c2d2409c0a5
msgid "[new sample config]: for vLLM, set the following sampling parameters: SamplingParams(temperature=0.7,top_p=0.8,top_k=20,repetition_penalty=1,presence_penalty=0,frequency_penalty=0,max_tokens=out_length)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:380
#: 92b7bbe75de343ca9bf5dfe34e1eb018
msgid "14B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
#: 33d36e38b5fa4949b8de55c2bbaf132f 69f3d88ca3cb435f9ad699700e9d8b10
msgid "Qwen2.5-14B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
#: 338933e55828474387d3671533e441eb
msgid "24.74"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
#: b5c2b2011e1740829a9ab848c9d1bc67
msgid "28.08"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
#: 4bec6d1379e247829df14b913f05108e
msgid "18.84"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
#: ef756d9d57cc43558d26a2e2cdcf5951
msgid "16.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
#: cb18696826a140f4b856bf7e45092436
msgid "25.89"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
#: 9dfab6d9504c4c0da73b38d301aa9bff
msgid "9.94"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
#: 66c1a4c3ee854d578fbffd38590a6173
msgid "19.23"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
#: ce33adff743948feb696c33af18a3766
msgid "9.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
#: 34c1a8fb645b4b9a9fd2d8ce5a67feab
msgid "20.51"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
#: bb5678c644b541b6a0f5fae43d7c0526
msgid "29.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
#: 87bb23f9e2ac49819434e5ab076922ae
msgid "17.80"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
#: f0962fa7bf5741209577003ddb4cf5f1
msgid "17.61"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
#: ab83231e92aa46f5ba1e3f62648318b8
msgid "20.06"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
#: eb1cdd4e04d64039948029eaf8b4a00b
msgid "11.36"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
#: 3dcefb1d53274627bca7e625eee85869
msgid "19.21"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
#: 8a1e9eaddca74d09b64f935a94595c67
msgid "11.22"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
#: 2f6eb0bc7f424e6a9f0ad4460a7a4efa
msgid "13.92"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
#: 1c1d115e36f44119a97a89ac206e52a0
msgid "12.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
#: e3593e304bbe45bd8418842fd851580b
msgid "19.98"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
#: ba374bdd369642199b56cfb2a9d0784e
msgid "13.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
#: 0cd9daa65abb42728d9dcdce24e351af 5bbb032dcaff408fb538e1491f165423
msgid "13.81"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
#: dd59a5da243240f98a196c8a8ad75954
msgid "14.17"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
#: 701485096f6f4c1bac865dfc343f9aaa
msgid "13.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
#: e93d854562b343ada25beed98c702c4f
msgid "8.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
#: b43df19558bc4526b4e1c7eef86950e1
msgid "36.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
#: a28d25884edd4b25a2c6609a0831b244
msgid "7.77"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
#: 1cf634f3eead4b63a3b18221ca1cc276
msgid "24.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
#: 04012ce321284d4b92301661b3d7f54d
msgid "8.14"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
#: 691467f010c140289335343b4e317885
msgid "18.71"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
#: 1bcbae57b17f4171901b2cfa61ca31ba
msgid "8.31"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
#: 1a1f2770dcf34fb08318658c872f2cc7
msgid "18.57"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:419
#: 694c73bc709841779cd45459fb251729
msgid "14B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
#: 190675eb767d4436a465e37c10dbfbce 64048cd8fa354dff9ec1c8098a9c5d6b
msgid "46.30"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:426
#: c309b01f907e4a4889aaab7c03d77c7a
msgid "70.40"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:428
#: 34e73fd890db4b92a10bb2787ba967ea
msgid "98.02"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:430
#: b11faf7833904f608b8f25e96e33a7e8
msgid "92.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
#: 45bb4137b3c544ec929b1d1b5eed109a
msgid "43.83"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:434
#: 6f1d8d18f22e4df38ea4ae324bf69111
msgid "64.33"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:436
#: ec572c9bc33d419e8e236fc839861a42
msgid "86.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:438
#: c3a1bb544325403b9176ed8e196f9aae
msgid "83.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
#: 9a0d2d7a5ec040819269a01d9b720eee
msgid "41.91"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:442
#: 0d8b29f114624260b87d1db16c8ba5bf
msgid "59.21"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:444
#: 283793d00d9d4461a627c46ad889ff1e
msgid "76.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:446
#: d415f7ac9bdd4d86b97d814b4928e53e
msgid "74.03"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
#: 7d901c10446e4eb8a2cad830947e3474
msgid "37.18"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:450
#: 974d590a66ff47bfb0b790dfbfe8fe8c
msgid "49.23"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:452
#: 529758ddb0d14b4e8fe69d88034c0829
msgid "60.91"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:454
#: 3e4501a402574108a70cf1cb72d7623c
msgid "59.01"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
#: a9d4b6a6e1f544c28c871147fee7e46f
msgid "26.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
#: 9f5ea50357aa474c8142f12b8f3b870a
msgid "32.83"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
#: ed09f6b07ff64e35bdb359f1d3b66370
msgid "37.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
#: 8b91ca3412bb473abbc0ab2eef544d38
msgid "36.71"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
#: 0ad36a5039164677b1fb1f1c1b83004a
msgid "14.53"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
#: 4b0e26b3f1df41a8be6ef38bfd3e5604
msgid "15.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
#: adcacd971a064a03a19874f639fd2804
msgid "15.13"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
#: 08f646ac94cb48ca80d86d35bf3aab59
msgid "15.25"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:478
#: 6187de60c7674655aa4aeb929a040681
msgid "32B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
#: 87e9496da06b46eeb9eb003821e17fc1 df6f5ff00cc3491a8d44f70f76a0ae22
msgid "Qwen2.5-32B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
#: 1187e6c07e0846c4ba91019dec6fd881
msgid "17.54"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
#: 0b57fc4aed17424d8b2468ce5f116895
msgid "61.58"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
#: 8a7ba27ab37547f6a6bfbdbb0b62fa2d
msgid "14.52"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
#: 274d30ddbed14e839a7942ce65bb25f5
msgid "33.56"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
#: 73b5e1fe4ec0451280181b26c43b0f61
msgid "19.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
#: a5ca5659f3424b86be625ade3a7a60be
msgid "18.94"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
#: 00ab7a5ec2484fb9b0ce1ca7e11fec0a
msgid "14.60"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
#: 50a6bf9efa544f0a8efcc4d0adc892b5
msgid "18.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
#: a6f66b46d37447fd9c1ded781ebc56bb
msgid "12.49"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
#: 7d4f95aea3f849b0a329135057b8097f
msgid "63.72"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
#: 7d6c5d94c54e489aa17ed42c7a907f4a
msgid "11.61"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
#: dd2b7801beb94b4baea41b036dcfc09d
msgid "35.86"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
#: 80acdcef63444172b32bb11548e0df1c
msgid "13.42"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
#: b8ba8f9c6830492695568619711000a9
msgid "21.09"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
#: 29ed0a90af354532a68f101dd869a740
msgid "20.81"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
#: c8e9d0507b4f4f5bb1db5dc19d905a6a
msgid "8.95"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
#: de277b24e3a543118cd4924d7369cf48
msgid "67.31"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
#: e50b322f12284aeb934a9ece4851ea57
msgid "8.53"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
#: 8f70b6466fbc40b4998021d901b00712
msgid "39.28"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
#: ea984b7052b744b88ac7126331ca259a
msgid "9.48"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
#: 4628bc748bc049baaf3ba35256e91f23
msgid "24.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
#: 8ad0734da7ad45e4998aeee24bfb6cbf
msgid "9.71"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
#: ad0f34cbeb904db9858c4d5c91ca2468
msgid "24.39"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
#: 20f4237587b14cb88a8da4ebab7587e3
msgid "5.59"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
#: 38d6938125aa4a2da58da0423628e399
msgid "74.47"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
#: 5c7c9721136d4be1b730b73a440b2ee2
msgid "5.42"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
#: 2a228f8a1096453cad463f101d2da912
msgid "46.45"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
#: f277ef5442384d39952e77b11802de9a
msgid "5.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
#: cfbda016e2694bb2917c2500bc8086fb
msgid "31.84"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
#: d660478e5a494a1089f8ca771fa6f6cf
msgid "5.85"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
#: 15394742cc55409bab9ebca52602b265
msgid "31.56"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:520
#: c3a65729136e4e3aba49f12c6162955e
msgid "32B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
#: dc5d038782af4181a9473a2606134b14
msgid "22.13"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
#: 068b2f72584d47b39f1b5081a3ec41d0 39003cc5472c4ee281132b0bbae2b0fc
#: f8bde774b59644778436b7ed1b1220f5
msgid "setting1"
msgstr "[设定3]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:527
#: 544f5cbf0b91431e96492201661352a1
msgid "37.57"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:529
#: 9f7fa40d209644e5ba834e4548f92cb9
msgid "55.83"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:531
#: 88cf119d087c48b389bd01b4b1e488c0
msgid "51.92"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
#: 6463b2e7cb3e4254b7b34c767e2951d0
msgid "21.05"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:535
#: dcb89762e0764f77a074e18b56cb040f
msgid "34.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:537
#: bb33a31e88cb44fcb75486b2093626fa
msgid "49.96"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:539
#: a6966da4828342ff9f9498eb53cf3050
msgid "46.68"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
#: 3767dac4fecf44e684b4c85e623ab467
msgid "19.91"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:543
#: 93963d97949640d38f56ff2bb94d5e65
msgid "31.89"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:545
#: 4d5070af41e14faf86dd896ea915b64a
msgid "44.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:547
#: c57efbb51fd84f5f81d36f6a7d30f411
msgid "41.83"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
#: 04a842bad8354ed2b67dbef03040f428 168304fd8ba94efbaf4d7d2141133392
#: 198178cd5a7747528f3bbd10ffce8c9f 1fd3e4fd78574a6897f656fc7cf1c21c
#: 21b19938194946bdb1e3c08f9a9e65d7 252accfef1f043fa94db8f7416fb82e4
#: 29e616ccd970481182ba77bb48e2cf88 34a7967c3941474e8995bf3d72af9b97
#: 3fa2c7d0eec845a39e77223d196cc55c 664d31a65c3942a5af01983a23e8c627
#: 69036f8a038c4f40a5637979d42d7607 731e5467166a4342af42cd0ac51d84cc
#: 747f5d35f48542b39a49ce0a24a0d687 75c3543f06234cf58511661bb60a0c01
#: 7d5749971c8b45dbb6b281780869a053 8416e10a86144cd394e459eba35fd8f5
#: 883a579d68a3465090857db825c1a91a 889e6b5c831b4804a18884b2b1203db7
#: 965366c2247a42469424a9a3d5c44988 99eb73e79fc448caaa84fd2f17964b19
#: 9e77d837d40448788b772fe0270d27d7 a03f7a47720b48b580c5abad8c8e826d
#: a12d87a0fd20484c8292bcc9168f5512 ad18e58b25b34e349f03fb0b8330f325
#: b738b0e096e64a66b11e24be110ee277 d48c3ca204334fbc94b149f143dc7034
#: e8147654e27547cda3404b254cab3a9b eefe53745e72455a86572ad53b69b056
#: f4bf1185016f4e7696e2c0592397a669
msgid "2"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
#: 820041e86bfb41efa8dc85841617881a
msgid "31.82"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:551
#: 5e4ef5182e0f417d9d2c9405ae51110f
msgid "26.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:553
#: 435f106050674ed982e291a6a96dd31f
msgid "35.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:555
#: 59165f45acc04f5ba9db908e1f707c5d
msgid "33.75"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
#: fe4921cfe9ba447bba5428bb6d68477c
msgid "24.45"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
#: 2ceca97bf1424b1b9f5dfc08eb74ed11
msgid "18.60"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
#: 30bf0c9cf06146a1a470e4c5a75e9c28
msgid "22.72"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
#: e45e23f08f97407781920b4ef5d2b5a6
msgid "21.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
#: 2d31bf4124644faeb2d0265c86b76b98
msgid "14.31"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
#: 4bad5bc53fa24071ae79ff3a66e2b4e8
msgid "9.77"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
#: 59fc78b4a54e4b30b83590c0cf55d2c2
msgid "10.39"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
#: acde06dfa1974cd5bba88b1f91efb950
msgid "10.34"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:574
#: 942b8b91b6b045278ab18ef225e4e716
msgid "For context length 129024, the model needs to be predicted with the following config: \"model_max_length\"=131072"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:575
#: ../../Qwen/source/getting_started/speed_benchmark.rst:683
#: 61b86605ec9948e1919424a27af302d6 b72c77ebaf7549edaeec155dc4303e17
msgid "[Default Setting]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:576
#: f9a7440af9174bd9ad6b7cbb9a446e4c
msgid "[Setting 1]=(gpu_memory_utilization=1.0 max_model_len=32768 enforce_eager=True)"
msgstr "[设定 3]=(gpu_memory_utilization=1.0 max_model_len=8192 enforce_eager=True)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:582
#: 46bf81ba73f147dcabb6849cf3201052
msgid "72B (Transformer)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
#: 91768f34ee6b4eeb94cd5da42c26c0bd e4dd97c84c0c403390c0c69610b73e0d
msgid "Qwen2.5-72B-Instruct"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
#: 5c47cabeaf474ed588d4c38e4b028097
msgid "8.73"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
#: 569f321b558d44a58839acd828ae96bd
msgid "136.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
#: f0413098de2f44b0a7234d37fa59b654
msgid "8.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
#: 683101e8658a4688bdc08a0558859f7b
msgid "72.61"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
#: f7036d248182477e92c7a7abbe395646
msgid "11.07"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
#: 61f64731ce2f47d4b4bc2cb0a21778d4
msgid "39.91"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
#: c36abd184cf74037b12e523090cce321
msgid "11.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
#: 5317e202c5334b8c93483684f289fc66
msgid "39.44"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
#: 0b5dcb5053134e9c862e3b7d8c032fdc
msgid "140.00"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
#: 0c9a76f57fd044ceaab57ffa3883fac6
msgid "77.81"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
#: 1875133832db435cabd15c0bc45dee3e
msgid "7.56"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
#: 3d2bc391914042148de39eefb840b94a 8c64b1e0e32b48aaa1a7da4968576ca5
msgid "42.50"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
#: 2cdd652eb82b4b959de116c4ef1594f9
msgid "8.17"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
#: 1145f18b41564ff9a3701ee8f5b91ae5
msgid "42.13"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
#: 10f2b403b1ec4c52b2040310779aa010 4b4fd3701363487bba52acab22070c3e
msgid "3"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
#: 15752ef061a546a4830e0c0fa3746623
msgid "4.25"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
#: be9753117c304f6b875beb241499acea
msgid "149.14"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
#: 177bfb7606df4fa9b2c4d76a596d094e
msgid "4.66"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
#: 8971c615f1c843669406e93620fb83c8
msgid "82.55"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
#: ec651a2587994931a71cd584c6b60106
msgid "5.27"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
#: c8b14125b2d44cceae756b97937bc26e
msgid "46.86"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
#: cb2116264ea54f1f9c8c504468545115
msgid "5.57"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
#: 198f6b99f1614dc7b14682cfefccdb80
msgid "46.38"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
#: 0a2ef7146f834160b307581169403a30 763d64c6856e4e04bf25657020aaefc5
msgid "2.94"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
#: 994398398e3f45e39df3773604ec3a43
msgid "164.79"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
#: 3129aea7c75d41729a42ea7e36a0ab12
msgid "94.75"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
#: 75afd12832eb494192acc82a5dedd6b6
msgid "3.14"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
#: f1f6cf8f615342819b23c30082622676
msgid "62.57"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
#: e465aac3db0e4e71a3845e20cb791677
msgid "3.23"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
#: 8e595b1b68184a97ae29e53d41b5be09
msgid "61.64"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:623
#: 9d4bcba347274fea92f019a978acfc75
msgid "72B (vLLM)"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
#: 6c68033a6d6c4555b638646e8b30755f
msgid "18.19"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
#: 16ac2284befb476aaf6f61e6757ce46a
msgid "Setting 1"
msgstr "[设定3]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
#: 00f47e95a8644e729a215fbab8f79533 1c65888e78d44caa964b2b2cc9aaddbc
#: 3313fc64770e48708415a978295295ae 8a023809a0ae461e817412886a66c234
#: 97ca85cc013c4eef83f7c1f12dd899d6 ac102e30612c46528f80a3346c093b68
#: fbca157eadbc4436ab24c66bc43cdbd1
msgid "4"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
#: 3394960b4ab8479986a994ff3cb09a53
msgid "31.37"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
#: 15e47b765995446390cc6d9a8f4181df 1b107cbd953a4334809a06174b468034
#: 2b4282fdc8904d8794efcf0626b35030 32619cf0332c4e21a62b076dcf6b2a6c
#: 429fb1a0ecc441b093ad7896513abe06 477d6a3bd1794bf5a1b8d7eaab6a0be4
#: 64212f42ec2940c0b83de99b44dd9eac 88d6a99ce03c423b95320c97a022e0f3
#: 8d7678a0072f4d058f4335853a443149 8e6b9d2013504734935372ed8690a4ec
#: acaa974c2ebe4d2d9345413fc0f75f7a acd9a5e7fc9a4422a9941148ba47b9cc
#: ae8e497e6ea24b579399fcb0a0c3859f c7532543a7284856bf187256b3bfd632
#: da3744a0015149a29872277c2e34e03d f885d3562a864b51920c38162f6cf57a
msgid "Default"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
#: 2a3f347427f24ef6b95fe193fc477c0b
msgid "31.40"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
#: e9e96b0a5a714b91afd15a31f559540d
msgid "16.47"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
#: 03142321ea94413f8679595fcf312065
msgid "Setting 2"
msgstr "[设定2]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
#: 5d670ffb973a4d2d8fa07b5a1db7abfb
msgid "44.30"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
#: a6072ac996434f329fa02e5924a24d16
msgid "29.90"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
#: fb43ba1f4171450a9d4a80b5b1d2197c
msgid "29.37"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
#: 354826fb7e994793b018b30cae6273d5
msgid "13.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
#: d008144e052447cda1a5458108d14911
msgid "Setting 3"
msgstr "[设定3]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
#: 8a59bfabdb0249d28dc0927721fc8f2d
msgid "40.67"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
#: a1b9350ae6124c219cf53a4033cfc076
msgid "30.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
#: c01c56e86a774ad6bb98efa013eb5608
msgid "27.20"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
#: f2e1e743b27140ac97279ae7dac85c51
msgid "38.10"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
#: 968a0db4bc144188b542754060c62e6b
msgid "36.63"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
#: e91d009de1024c58817534bbef9bc008
msgid "27.53"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
#: dce88b5c6a4c4908a977f3fb6b5cebfe
msgid "23.32"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
#: 0dcb709babd9477f80f9c479da123f0e
msgid "30.98"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
#: c61e42c38b784a24a9c4e2d2625b9882
msgid "30.02"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
#: d2f8a32da9a448f0a0c43828263f7879
msgid "20.74"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
#: 3b5a1c59742843b6baab195dbbc9a210 62b65d0e63fa4eb48925a3b02e55ded5
#: ef9e6861ff4c4a3a9dcf130ea7b7c66d f74ef4dcd1c94cfd860ca19748730cef
msgid "Setting 4"
msgstr "[设定3]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
#: 964f97e07ced4f18912e5375d4f32b46
msgid "16.27"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
#: 92ce2a8f3f794cc8a3fff6aea1a5c240
msgid "19.84"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
#: fc2de23e6c2f45e0a6e1f8f9fd69865f
msgid "19.32"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
#: 0b6fb2697d274581a22bc874082dc0b7
msgid "12.68"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
#: 15ed72087f634bf282c0fa88d276172a 3b515a0fb51f43aca07377c753098537
#: 992fa4d8087e46a2a687787aa811e66e 9d280c5a856b48e1bc81b6703a64d6e9
msgid "Setting 5"
msgstr "[设定3]"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
#: 58eea16300fb49e08c5f281f20a17746
msgid "14.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
#: 36fdacaae5cd45ecb1b719fb196ecc9a
msgid "10.11"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
#: 722cf7e2520545d5b08e722364b68971
msgid "9.88"
msgstr ""
#: ../../Qwen/source/getting_started/speed_benchmark.rst:684
#: 0f9a1c93969748f2a48c18a69d5681f8
msgid "[Setting 1]=(gpu_memory_utilization=0.98 max_model_len=4096 enforce_eager=True)"
msgstr "[设定 1]=(gpu_memory_utilization=0.98 max_model_len=4096 enforce_eager=True)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:685
#: 6d5bc59b7e70456f9c2ab2e85599dd1d
msgid "[Setting 2]=(gpu_memory_utilization=1.0 max_model_len=4096 enforce_eager=True)"
msgstr "[设定 2]=(gpu_memory_utilization=1.0 max_model_len=4096 enforce_eager=True)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:686
#: 9f881e4c985043dc85e6d8af993fdc38
msgid "[Setting 3]=(gpu_memory_utilization=1.0 max_model_len=8192 enforce_eager=True)"
msgstr "[设定 3]=(gpu_memory_utilization=1.0 max_model_len=8192 enforce_eager=True)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:687
#: 91e2784f053349cd89718c9da941ddb6
msgid "[Setting 4]=(gpu_memory_utilization=0.9 max_model_len=65536 enforce_eager=False)"
msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
#: ../../Qwen/source/getting_started/speed_benchmark.rst:688
#: 17c8714ac88d4b7e97bf99a3fcf8f8fb
msgid "[Setting 5]=(gpu_memory_utilization=0.9 max_model_len=131072 enforce_eager=False)"
msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/index.rst:34
msgid "Getting Started"
msgstr "快速开始"
#: ../../Qwen/source/index.rst:44
msgid "Inference"
msgstr "推理"
#: ../../Qwen/source/index.rst:51
msgid "Run Locally"
msgstr "本地运行"
#: ../../Qwen/source/index.rst:60
msgid "Deployment"
msgstr "部署"
#: ../../Qwen/source/index.rst:71
msgid "Quantization"
msgstr "量化"
#: ../../Qwen/source/index.rst:80
msgid "Training"
msgstr "训练"
#: ../../Qwen/source/index.rst:87
msgid "Framework"
msgstr "框架"
#: ../../Qwen/source/index.rst:2 6e52d3a497924f828d4c6b9dd59370d5
msgid "Welcome to Qwen!"
msgstr "欢迎来到Qwen"
#: ../../Qwen/source/index.rst:4 235805a6d4a34184821c0f4f81020ef1
msgid "Qwen3"
msgstr ""
#: ../../Qwen/source/index.rst:11 b8a3aa3f31594232959a08d89e9dc7db
msgid "Qwen is the large language model and large multimodal model series of the Qwen Team, Alibaba Group. Both language models and multimodal models are pretrained on large-scale multilingual and multimodal data and post-trained on quality data for aligning to human preferences. Qwen is capable of natural language understanding, text generation, vision understanding, audio understanding, tool use, role play, playing as AI agent, etc."
msgstr "Qwen是阿里巴巴集团Qwen团队研发的大语言模型和大型多模态模型系列。无论是语言模型还是多模态模型,均在大规模多语言和多模态数据上进行预训练,并通过高质量数据进行后期微调以贴近人类偏好。Qwen具备自然语言理解、文本生成、视觉理解、音频理解、工具使用、角色扮演、作为AI Agent进行互动等多种能力。"
#: ../../Qwen/source/index.rst:14 8735c67355064a97b2793b721a701b21
msgid "The latest version, Qwen3, has the following features:"
msgstr "最新版本Qwen3有以下特点:"
#: ../../Qwen/source/index.rst:16 1956d75084244379aad9503fcc572f00
msgid "**Dense and Mixture-of-Experts (MoE) models**, available in 0.6B, 1.7B, 4B, 8B, 14B, 32B and 30B-A3B, 235B-A22B."
msgstr "**全尺寸稠密与混合专家模型**:0.6B, 1.7B, 4B, 8B, 14B, 32B and 30B-A3B, 235B-A22B"
#: ../../Qwen/source/index.rst:17 1fdf12161cd14663b67b2c08f9219ddb
msgid "**Seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose chat) **within a single model**, ensuring optimal performance across various scenarios."
msgstr "支持在**思考模式**(用于复杂逻辑推理、数学和编码)和 **非思考模式** (用于高效通用对话)之间**无缝切换**,确保在各种场景下的最佳性能。"
#: ../../Qwen/source/index.rst:18 189ff2a03ad249ef88202c34e9f8aa86
msgid "**Significantly enhancement in reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning."
msgstr "**显著增强的推理能力**,在数学、代码生成和常识逻辑推理方面超越了之前的 QwQ(在思考模式下)和 Qwen2.5 指令模型(在非思考模式下)。"
#: ../../Qwen/source/index.rst:19 64ebcda0381148cb8edf8d92b49469ea
msgid "**Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience."
msgstr "**卓越的人类偏好对齐**,在创意写作、角色扮演、多轮对话和指令跟随方面表现出色,提供更自然、更吸引人和更具沉浸感的对话体验。"
#: ../../Qwen/source/index.rst:20 ec0ebb91f1ed491f8672aefef6307d85
msgid "**Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks."
msgstr "**擅长智能体能力**,可以在思考和非思考模式下精确集成外部工具,在复杂的基于代理的任务中在开源模型中表现领先。"
#: ../../Qwen/source/index.rst:21 526b161edf284e1b913aabc7e7fcc77c
msgid "**Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**."
msgstr "**支持 100 多种语言和方言**,具有强大的多语言理解、推理、指令跟随和生成能力。"
#: ../../Qwen/source/index.rst:23 79ed3f0e7da043bb8b53f510ed244814
msgid "For more information, please visit our:"
msgstr "想了解更多信息,欢迎访问:"
#: ../../Qwen/source/index.rst:25 b2e579ae57de4d2985ab1c350fdf2458
msgid "`Blog <https://qwenlm.github.io/>`__"
msgstr "`博客 <https://qwenlm.github.io/>`__"
#: ../../Qwen/source/index.rst:26 406389fe90064e879bd28665a021ee7e
msgid "`GitHub <https://github.com/QwenLM>`__"
msgstr "`GitHub <https://github.com/QwenLM>`__"
#: ../../Qwen/source/index.rst:27 714c64df6aed4e608571de0155199fef
msgid "`Hugging Face <https://huggingface.co/Qwen>`__"
msgstr "`Hugging Face <https://huggingface.co/Qwen>`__"
#: ../../Qwen/source/index.rst:28 214e12e0b1c04b268582b2c46d22334d
msgid "`ModelScope <https://modelscope.cn/organization/qwen>`__"
msgstr "`ModelScope <https://modelscope.cn/organization/qwen>`__"
#: ../../Qwen/source/index.rst:29 9c64e461dc3a440ab92d94887fe3d2d8
msgid "`Qwen3 Collection <https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f>`__"
msgstr ""
#: ../../Qwen/source/index.rst:31 c6056edc8a3a4a12bd3a75eeb210f7a2
msgid "Join our community by joining our `Discord <https://discord.gg/yPEP2vHTu4>`__ and `WeChat <https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png>`__ group. We are looking forward to seeing you there!"
msgstr "加入社区,加入 `Discord <https://discord.gg/yPEP2vHTu4>`__ 和 `微信群 <https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png>`__ 。很期待见到你们!"
#~ msgid "Web UI"
#~ msgstr "Web UI"
#~ msgid "Benchmark"
#~ msgstr "评测"
#~ msgid "Qwen2.5"
#~ msgstr ""
#~ msgid "Dense, easy-to-use, decoder-only language models, available in **0.5B**, **1.5B**, **3B**, **7B**, **14B**, **32B**, and **72B** sizes, and base and instruct variants."
#~ msgstr "易于使用的仅解码器稠密语言模型,提供 **0.5B** 、**1.5B** 、**3B** 、**7B** 、**14B** 、**32B** 和 **72B** 共7种参数规模的模型,并且有基模型和指令微调模型两种变体(其中“ B ”表示“十亿”, 72B 即为 720 亿)"
#~ msgid "Pretrained on our latest large-scale dataset, encompassing up to **18T** tokens."
#~ msgstr "利用我们最新的数据集进行预训练,包含多达 18T tokens (其中“ T ”表示“万亿”, 18T 即为 18 万亿)"
#~ msgid "Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON."
#~ msgstr "在遵循指令、生成长文本(超过 8K tokens )、理解结构化数据(例如,表格)以及生成结构化输出特别是 JSON 方面有了显著改进"
#~ msgid "More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots."
#~ msgstr "更加适应多样化的系统提示,增强了角色扮演的实现和聊天机器人的背景设置。"
#~ msgid "Context length support up to **128K** tokens and can generate up to **8K** tokens."
#~ msgstr "支持最多达 **128K** tokens 的上下文长度,并能生成多达 **8K** tokens 的文本。"
#~ msgid "Multilingual support for over **29** languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more."
#~ msgstr "支持超过 **29** 种语言,包括中文、英文、法文、西班牙文、葡萄牙文、德文、意大利文、俄文、日文、韩文、越南文、泰文、阿拉伯文等。"
#~ msgid "`Qwen2.5 Collection <https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e>`__"
#~ msgstr ""
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/inference/transformers.md:1
#: 0614c94c5d284106b6157f7b89fa087f
msgid "Transformers"
msgstr ""
#: ../../Qwen/source/inference/transformers.md:3
#: d3760c125a4049b9848d4c98d60104f8
msgid "Transformers is a library of pretrained natural language processing for inference and training. Developers can use Transformers to train models on their data, build inference applications, and generate texts with large language models."
msgstr "Transformers 是一个用于推理和训练的预训练自然语言处理库。开发者可以使用 Transformers 在自己的数据上训练模型、构建推理应用,并通过大型语言模型生成文本。"
#: ../../Qwen/source/inference/transformers.md:6
#: 795ca76ad33c4cf89341343edc36115f
msgid "Environment Setup"
msgstr "环境配置"
#: ../../Qwen/source/inference/transformers.md:8
#: 1331055ed63c4702b1c5a300dab74b09
msgid "`transformers>=4.51.0`"
msgstr ""
#: ../../Qwen/source/inference/transformers.md:9
#: 5167e357b63a4abc814e861f0f87adf0
msgid "`torch>=2.6` is recommended"
msgstr "推荐使用 `torch>=2.6`"
#: ../../Qwen/source/inference/transformers.md:10
#: 0be0ab63e365422c8d48cc1143704782
msgid "GPU is recommended"
msgstr "推荐使用 GPU"
#: ../../Qwen/source/inference/transformers.md:13
#: 0567a5b164704d0eb72c23e3c0525131
msgid "Basic Usage"
msgstr "基本用法"
#: ../../Qwen/source/inference/transformers.md:15
#: 823777aaf3b149c9bc2dbe19fb70cf06
msgid "You can use the `pipeline()` interface or the `generate()` interface to generate texts with Qwen3 in transformers."
msgstr "您可以使用 `pipeline()` 接口或 `generate()` 接口在 transformers 中通过 Qwen3 生成文本。"
#: ../../Qwen/source/inference/transformers.md:17
#: 0a59ebb37456410c82118211759d17d9
msgid "In general, the pipeline interface requires less boilerplate code, which is shown here. The following shows a basic example using pipeline for mult-iturn conversations:"
msgstr "通常,pipeline 接口需要的样板代码更少,如下所示。以下展示了一个使用 pipeline 进行多轮对话的基本示例:"
#: ../../Qwen/source/inference/transformers.md:44
#: 970a2d7d3090478580bf41085235ec13
msgid "There are some important parameters creating the pipeline:"
msgstr "创建 pipeline 时有一些重要的参数:"
#: ../../Qwen/source/inference/transformers.md:45
#: 7a6748475636475c83a23e0f2649d145
msgid "**Model**: `model_name_or_path` could be a model ID like `Qwen/Qwen3-8B` or a local path."
msgstr "**模型**:`model_name_or_path` 可以是像 `Qwen/Qwen3-8B` 这样的模型 ID,也可以是本地路径。"
#: ../../Qwen/source/inference/transformers.md:47
#: 5836c86d2c9e46f1a29d8a827bae7266
msgid "To download model files to a local directory, you could use"
msgstr "要将模型文件下载到本地目录,可以使用"
#: ../../Qwen/source/inference/transformers.md:51
#: ee52b7ed97df4d3d8eee53879369a8a0
msgid "You can also download model files using ModelScope if you are in mainland China"
msgstr "如果您在中国大陆,还可以使用 ModelScope 下载模型文件"
#: ../../Qwen/source/inference/transformers.md:55
#: 0fe0e5f3d5d044f584d89008a0f89b0e
msgid "**Device Placement**: `device_map=\"auto\"` will load the model parameters to multiple devices automatically, if available. It relies on the `accelerate` pacakge. If you would like to use a single device, you can pass `device` instead of device_map. `device=-1` or `device=\"cpu\"` indicates using CPU, `device=\"cuda\"` indicates using the current GPU, and `device=\"cuda:1\"` or `device=1` indicates using the second GPU. Do not use `device_map` and `device` at the same time!"
msgstr "**设备分配**:如果可用,`device_map=\"auto\"` 将自动将模型参数加载到多个设备上。它依赖于 `accelerate` 包。如果您想使用单个设备,可以传递 `device` 而不是 `device_map`。`device=-1` 或 `device=\"cpu\"` 表示使用 CPU,`device=\"cuda\"` 表示使用当前 GPU,`device=\"cuda:1\"` 或 `device=1` 表示使用第二个 GPU。不要同时使用 `device_map` 和 `device`!"
#: ../../Qwen/source/inference/transformers.md:60
#: b304ed147f944a47b89bb27bc4b6142f
msgid "**Compute Precision**: `torch_dtype=\"auto\"` will determine automatically the data type to use based on the original precision of the checkpoint and the precision your device supports. For modern devices, the precision determined will be `bfloat16`."
msgstr "**计算精度**:`torch_dtype=\"auto\"` 将根据检查点的原始精度和设备支持的精度自动确定要使用的数据类型。对于现代设备,确定的精度将是 `bfloat16`。"
#: ../../Qwen/source/inference/transformers.md:63
#: 13295d55b1dd493887c9f0aab35ebfcd
msgid "If you don't pass `torch_dtype=\"auto\"`, the default data type is `float32`, which will take double the memory and be slower in computation."
msgstr "如果您不传递 `torch_dtype=\"auto\"`,默认数据类型为 `float32`,这将占用两倍的内存并且计算速度较慢。"
#: ../../Qwen/source/inference/transformers.md:66
#: 280ff00b4ccd4e59be8d798cd7d4377d
msgid "Calls to the text generation pipleine will use the generation configuration from the model file, e.g., `generation_config.json`. Those configuration could be overridden by passing arguments directly to the call. The default is equivalent to"
msgstr "调用文本生成 pipeline 时,将使用模型文件中的生成配置,例如 `generation_config.json`。这些配置可以通过直接向调用传递参数来覆盖。默认配置等效于"
#: ../../Qwen/source/inference/transformers.md:73
#: f3cd39b62e8848dd8493086915577cd2
msgid "For the best practices in configuring generation parameters, please see the model card."
msgstr "有关配置生成参数的最佳实践,请参阅模型卡片。"
#: ../../Qwen/source/inference/transformers.md:75
#: c69916ddab134eadbb509748b73bb515
msgid "Thinking & Non-Thinking Mode"
msgstr "思考与非思考模式"
#: ../../Qwen/source/inference/transformers.md:77
#: e23485edb6654b588965a22a20332dce
msgid "By default, Qwen3 model will think before response. It is also true for the `pipeline()` interface. To switch between thinking and non-thinking mode, two methods can be used"
msgstr "默认情况下,Qwen3 模型会在回复前进行思考,`pipeline()` 接口也是如此。要切换思考与非思考模式,可以使用以下两种方法:"
#: ../../Qwen/source/inference/transformers.md:80
#: 3597031da80f43f5883b499eb42e153f
msgid "Append a final assistant message, containing only `<think>\\n\\n</think>\\n\\n`. This method is stateless, meaning it will only work for that single turn. It will also strictly prevented the model from generating thinking content. For example,"
msgstr "追加一条仅包含 `<think>\\n\\n</think>\\n\\n` 的最终助手 (assistant) 消息。此方法是无状态的,意味着它仅对当前轮对话生效,并且会严格阻止模型生成思考内容。例如:"
#: ../../Qwen/source/inference/transformers.md:97
#: 5c4b107221d14749a0c61541068be791
msgid "Add to the user (or the system) message, `/no_think` to disable thinking and `/think` to enable thinking. This method is stateful, meaning the model will follow the most recent instruction in multi-turn conversations. You can also use instructions in natural language."
msgstr "在用户 (user) 或系统 (system) 消息中添加 `/no_think` 以禁用思考、添加 `/think` 以启用思考。此方法是有状态的,意味着在多轮对话中,模型将遵循最近的指令。您还可以使用自然语言指令。"
#: ../../Qwen/source/inference/transformers.md:113
#: c93fb1cf4f5849128d0fe52f86a74aca
msgid "Parsing Thinking Content"
msgstr "解析思考内容"
#: ../../Qwen/source/inference/transformers.md:115
#: 32af8e0e86e646d1900ddb45b6b576e9
msgid "If you would like a more structured assistant message format, you can use the following function to extract the thinking content into a field named `reasoning_content` which is similar to the format used by vLLM, SGLang, etc."
msgstr "如果您希望获得更结构化的助手消息格式,可以使用以下函数将思考内容提取到名为 `reasoning_content` 的字段中,该字段的格式类似于 vLLM、SGLang 等使用的格式。"
#: ../../Qwen/source/inference/transformers.md:131
#: ae7a948b483c4e3d973301698cfc82e6
msgid "Parsing Tool Calls"
msgstr "解析工具调用"
#: ../../Qwen/source/inference/transformers.md:133
#: 9798bad1fc1d49e28c8576a67ea59044
msgid "For tool calling with Transformers, please refer to [our guide on Function Calling](../framework/function_call.md#hugging-face-transformers)."
msgstr "有关使用 Transformers 进行工具调用的信息,请参阅[函数调用指南](../framework/function_call.md#hugging-face-transformers)。"
#: ../../Qwen/source/inference/transformers.md:135
#: 555f509828dc4e5ab201530f04168b44
msgid "Serving Quantized models"
msgstr "使用量化模型"
#: ../../Qwen/source/inference/transformers.md:137
#: bb57ca974bda4158b1c3efe90b72d557
msgid "Qwen3 comes with two types of pre-quantized models, FP8 and AWQ. The command serving those models are the same as the original models except for the name change:"
msgstr "Qwen3 提供了两种类型的预量化模型:FP8 和 AWQ。使用这些模型的命令与原始模型相同,只是名称有所更改:"
#: ../../Qwen/source/inference/transformers.md:155
#: 091c7fd44b804573be565b59f4498a5d
msgid "FP8 computation is supported on NVIDIA GPUs with compute capability > 8.9, that is, Ada Lovelace, Hopper, and later GPUs."
msgstr "FP8 计算在计算能力 > 8.9 的 NVIDIA GPU 上受支持,即 Ada Lovelace、Hopper 及更新的 GPU。"
#: ../../Qwen/source/inference/transformers.md:157
#: 468b4de54b0c436086877aeb58894345
msgid "For better performance, make sure `triton` and a CUDA compiler compatible with the CUDA version of `torch` in your environment are installed."
msgstr "为了获得更好的性能,请确保安装了 `triton` 和与环境中 `torch` 的 CUDA 版本兼容的 CUDA 编译器。"
#: ../../Qwen/source/inference/transformers.md:161
#: 4c26b8d556764c28813e27713fa14962
msgid "As of 4.51.0, there are issues with Tranformers when running those checkpoints **across GPUs**. The following method could be used to work around those issues:"
msgstr "在 4.51.0 版本中,在**跨 GPU**的情况下运行 FP8 存在一些与 Transformers 相关的问题。可以使用以下方法来解决这些问题:"
#: ../../Qwen/source/inference/transformers.md:163
#: 67ef3f3b2a6c461aaa5a40d4867e8f7b
msgid "Set the environmnt variable `CUDA_LAUNCH_BLOCKING=1` before running the script; or"
msgstr "在运行脚本之前设置环境变量 `CUDA_LAUNCH_BLOCKING=1`;或者"
#: ../../Qwen/source/inference/transformers.md:164
#: 25d447166ecb4e069a7cdc3f4c2844d8
msgid "Uncomment [this line](https://github.com/huggingface/transformers/blob/0720e206c6ba28887e4d60ef60a6a089f6c1cc76/src/transformers/integrations/finegrained_fp8.py#L340) in your local installation of `transformers`."
msgstr "取消注释您本地安装的 `transformers` 中的[这一行](https://github.com/huggingface/transformers/blob/0720e206c6ba28887e4d60ef60a6a089f6c1cc76/src/transformers/integrations/finegrained_fp8.py#L340)。"
#: ../../Qwen/source/inference/transformers.md:169
#: 4405b99bd1a941e2b7243a860e65ed1f
msgid "Enabling Long Context"
msgstr "启用长上下文"
#: ../../Qwen/source/inference/transformers.md:171
#: 5b50bba7f4eb48f6a0e3f408ddc476ea
msgid "The maximum context length in pre-training for Qwen3 models is 32,768 tokens. It can be extended to 131,072 tokens with RoPE scaling techniques. We have validated the performance with YaRN."
msgstr "Qwen3 模型在预训练中的最大上下文长度为 32,768 个 token。通过 RoPE 缩放技术,它可以扩展到 131,072 个 token。我们已使用 YaRN 验证了性能。"
#: ../../Qwen/source/inference/transformers.md:175
#: 32af7906e1ae44f282e22f3eb656bc47
msgid "Transformers supports YaRN, which can be enabled either by modifying the model files or overriding the default arguments when loading the model."
msgstr "Transformers 支持 YaRN,可以通过修改模型文件或在加载模型时覆盖默认参数来启用。"
#: ../../Qwen/source/inference/transformers.md:177
#: 4d80b834eb444b65885583b6a0db6a93
msgid "Modifying the model files: In the config.json file, add the rope_scaling fields:"
msgstr "修改模型文件:在 config.json 文件中,添加 rope_scaling 字段:"
#: ../../Qwen/source/inference/transformers.md:188
#: 432eb2b9ed054ea3b5a6597ffc207b53
msgid "Overriding the default arguments:"
msgstr "覆盖默认参数:"
#: ../../Qwen/source/inference/transformers.md:210
#: 9d7651802c2344fa987f709a7ac87c08
msgid "Transformers implements static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.** We advise adding the `rope_scaling` configuration only when processing long contexts is required. It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 65,536 tokens, it would be better to set `factor` as 2.0."
msgstr "Transformers 实现了静态 YaRN,这意味着无论输入长度如何,缩放因子保持不变,**这可能会对较短文本的性能产生影响。** 我们建议仅在需要处理长上下文时添加 `rope_scaling` 配置。还建议根据需要修改 `factor`。例如,如果您的应用程序的典型上下文长度为 65,536 个 token,则最好将 `factor` 设置为 2.0。"
#: ../../Qwen/source/inference/transformers.md:216
#: dcc2913c11534349a3aa28988055044c
msgid "Streaming Generation"
msgstr "流式输出"
#: ../../Qwen/source/inference/transformers.md:218
#: 23238ec6181e498fa754f6d3ea363f52
msgid "With the help of `TextStreamer`, you can modify your chatting with Qwen3 to streaming mode. It will print the response as being generated to the console or the terminal."
msgstr "借助 `TextStreamer` ,您可以将与 Qwen3 的对话切换到流式传输模式。下面是一个关于如何使用它的示例:"
#: ../../Qwen/source/inference/transformers.md:238
#: 4a6ba39c851b4296907f624a38766570
msgid "Besides using `TextStreamer`, we can also use `TextIteratorStreamer` which stores print-ready text in a queue, to be used by a downstream application as an iterator:"
msgstr "除了使用 `TextStreamer` 之外,我们还可以使用 `TextIteratorStreamer` ,它将可打印的文本存储在一个队列中,以便下游应用程序作为迭代器来使用:"
#: ../../Qwen/source/inference/transformers.md:267
#: 4c6797668ec84bf4866c6e3be2350e82
msgid "Batch Generation"
msgstr "批处理"
#: ../../Qwen/source/inference/transformers.md:270
#: 132f318ada71477192c48cefc7eeb229
msgid "Batching is not automatically a win for performance."
msgstr "批处理不总能提速。"
#: ../../Qwen/source/inference/transformers.md:296
#: 3371e43683aa4c6284e08193a40110c4
msgid "FAQ"
msgstr "常见问题解答"
#: ../../Qwen/source/inference/transformers.md:298
#: 5b68ed1d7a1e48ef9b0943de038e7ebc
msgid "You may find distributed inference with Transformers is not as fast as you would imagine. Transformers with `device_map=\"auto\"` does not apply tensor parallelism and it only uses one GPU at a time. For Transformers with tensor parallelism, please refer to [its documentation](https://huggingface.co/docs/transformers/v4.51.3/en/perf_infer_gpu_multi)."
msgstr "您可能会发现使用 Transformers 进行分布式推理的速度不如预期。Transformers 使用 `device_map=\"auto\"` 时并未应用张量并行 (Tensor Parallelism),且一次仅使用一个 GPU。如需支持张量并行的 Transformers,请参阅[其文档](https://huggingface.co/docs/transformers/v4.51.3/en/perf_infer_gpu_multi)。"
#~ msgid "The most significant but also the simplest usage of Qwen2.5 is to chat with it using the `transformers` library. In this document, we show how to chat with `Qwen2.5-7B-Instruct`, in either streaming mode or not."
#~ msgstr "使用 Qwen2.5 最简单的方法就是利用 `transformers` 库与之对话。在本文档中,我们将展示如何在流式模式或非流式模式下与 Qwen2.5-7B-Instruct 进行对话。"
#~ msgid "Select the interface you would like to use:"
#~ msgstr "选择编程接口"
#~ msgid "Manual"
#~ msgstr "手动"
#~ msgid "Using `AutoTokenizer` and `AutoModelForCausalLM`."
#~ msgstr "使用 `AutoTokenzier` 和 `AutoModelForCausalLM`。"
#~ msgid "Pipeline"
#~ msgstr "流水线"
#~ msgid "Using `pipeline`."
#~ msgstr "使用 `pipeline`。"
#~ msgid "You can just write several lines of code with `transformers` to chat with Qwen2.5-Instruct. Essentially, we build the tokenizer and the model with `from_pretrained` method, and we use `generate` method to perform chatting with the help of chat template provided by the tokenizer. Below is an example of how to chat with Qwen2.5-7B-Instruct:"
#~ msgstr "你只需借助 `transformers` 库编写几行代码,就能与 Qwen2.5-Instruct 进行对话。实质上,我们通过 `from_pretrained` 方法构建 tokenizer 和模型,然后利用 `generate` 方法,在 tokenizer 提供的对话模板 (Chat Template) 的辅助下进行对话。以下是一个如何与 Qwen2.5-7B-Instruct 进行对话的示例:"
#~ msgid "To continue the chat, simply append the response to the messages with the role assistant and repeat the procedure. The following shows and example:"
#~ msgstr "如要继续对话,只需将回复内容以 assistant 为 role 加入 messages ,然后重复以上流程即可。下面为示例:"
#~ msgid "Note that the previous method in the original Qwen repo `chat()` is now replaced by `generate()`. The `apply_chat_template()` function is used to convert the messages into a format that the model can understand. The `add_generation_prompt` argument is used to add a generation prompt, which refers to `<|im_start|>assistant\\n` to the input. Notably, we apply ChatML template for chat models following our previous practice. The `max_new_tokens` argument is used to set the maximum length of the response. The `tokenizer.batch_decode()` function is used to decode the response. In terms of the input, the above `messages` is an example to show how to format your dialog history and system prompt. By default, if you do not specify system prompt, we directly use `You are Qwen, created by Alibaba Cloud. You are a helpful assistant.`."
#~ msgstr "请注意,原 Qwen 仓库中的旧方法 `chat()` 现在已被 `generate()` 方法替代。这里使用了 `apply_chat_template()` 函数将消息转换为模型能够理解的格式。其中的 `add_generation_prompt` 参数用于在输入中添加生成提示,该提示指向 `<|im_start|>assistant\\n` 。尤其需要注意的是,我们遵循先前实践,对 chat 模型应用 ChatML 模板。而 `max_new_tokens` 参数则用于设置响应的最大长度。此外,通过 `tokenizer.batch_decode()` 函数对响应进行解码。关于输入部分,上述的 `messages` 是一个示例,展示了如何格式化对话历史记录和系统提示。默认情况下,如果您没有指定系统提示,我们将直接使用 `You are Qwen, created by Alibaba Cloud. You are a helpful assistant.` 作为系统提示。"
#~ msgid "`transformers` provides a functionality called \"pipeline\" that encapsulates the many operations in common tasks. You can chat with the model in just 4 lines of code:"
#~ msgstr "`transformers` 同时提供了“流水线” (\"pipeline\") 功能,封装了常用任务的处理流程,仅用4行代码即可开启对话:"
#~ msgid "To continue the chat, simply append the response to the messages with the role assistant and repeat the procedure. The following shows and example:"
#~ msgstr "如要继续对话,只需将回复内容以 assistant 为 role 加入 messages ,然后重复以上流程即可。下面为示例:"
#~ msgid "Batching"
#~ msgstr "批处理"
#~ msgid "All common `transformers` methods support batched input and output. For basic usage, the following is an example:"
#~ msgstr "`transformers` 常用方法均支持批处理。以下为基本用法的示例:"
#~ msgid "With pipeline, it is simpler:"
#~ msgstr "使用流水线功能,实现批处理代码更简单:"
#~ msgid "Using Flash Attention 2 to Accelerate Generation"
#~ msgstr "使用 Flash Attention 2 加速生成"
#~ msgid "With the latest `transformers` and `torch`, Flash Attention 2 will be applied by default if applicable.[^fa2] You do not need to request the use of Flash Attention 2 in `transformers` or install the `flash_attn` package. The following is intended for users that cannot use the latest versions for various reasons."
#~ msgstr "如果您使用最新版本的 `transformers` 和 `torch` , Flash Attention 2 将在适用时自动应用。[^fa2] 无需指定使用 `transformers` 中的 Flash Attention 2 或安装 `falsh_attn` 包。下面的说明是为无法使用最新版的用户补充的。"
#~ msgid "If you would like to apply Flash Attention 2, you need to install an appropriate version of `flash_attn`. You can find pre-built wheels at [its GitHub repository](https://github.com/Dao-AILab/flash-attention/releases), and you should make sure the Python version, the torch version, and the CUDA version of torch are a match. Otherwise, you need to install from source. Please follow the guides at [its GitHub README](https://github.com/Dao-AILab/flash-attention)."
#~ msgstr "如果你希望使用 Flash Attention 2 , 你需要安装 `flash_attn` 。 你可以在其 [GitHub 存储库](https://github.com/Dao-AILab/flash-attention/releases) 找到预编译好的版本。注意选择与 Python 、 torch 和 torch 中 CUDA 版本对应的预编译版本。如无对应,你需要从源代码安装编译,请参考其 [GitHub README](https://github.com/Dao-AILab/flash-attention) 。"
#~ msgid "After a successful installation, you can load the model as shown below:"
#~ msgstr "成功安装 Flash Attention 2 后,你可以用下面这种方式读取模型:"
#~ msgid "Troubleshooting"
#~ msgstr "问题排查"
#~ msgid "Loading models takes a lot of memory"
#~ msgstr "模型加载使用大量显存"
#~ msgid "Normally, memory usage after loading the model can be roughly taken as twice the parameter count. For example, a 7B model will take 14GB memory to load. It is because for large language models, the compute dtype is often 16-bit floating point number. Of course, you will need more memory in inference to store the activations."
#~ msgstr "一般而言,模型加载所需显存可以按参数量乘二计算,例如,7B 模型需要 14GB 显存加载,其原因在于,对于大语言模型,计算所用数据类型为16位浮点数。当然,推理运行时还需要更多显存以记录激活状态。"
#~ msgid "For `transformers`, `torch_dtype=\"auto\"` is recommended and the model will be loaded in `bfloat16` automatically. Otherwise, the model will be loaded in `float32` and it will need double memory. You can also pass `torch.bfloat16` or `torch.float16` as `torch_dtype` explicitly."
#~ msgstr "对于 `transformers` ,推荐加载时使用 `torch_dtype=\"auto\"` ,这样模型将以 `bfloat16` 数据类型加载。否则,默认会以 `float32` 数据类型加载,所需显存将翻倍。也可以显式传入 `torch.bfloat16` 或 `torch.float16` 作为 `torch_dtype` 。"
#~ msgid "Multi-GPU inference is slow"
#~ msgstr "多卡推理缓慢"
#~ msgid "`transformers` relies on `accelerate` for multi-GPU inference and the implementation is a kind of naive model parallelism: different GPUs computes different layers of the model. It is enabled by the use of `device_map=\"auto\"` or a customized `device_map` for multiple GPUs."
#~ msgstr "`transformers` 依赖 `accelerate` 支持多卡推理,其实现为一种简单的模型并行策略:不同的卡计算模型的不同层,分配策略由 `device_map=\"auto\"` 或自定义的 `device_map` 指定。"
#~ msgid "However, this kind of implementation is not efficient as for a single request, only one GPU computes at the same time and the other GPUs just wait. To use all the GPUs, you need to arrange multiple sequences as on a pipeline, making sure each GPU has some work to do. However, that will require concurrency management and load balancing, which is out of the scope of `transformers`. Even if all things are implemented, you can make use of concurrency to improve the total throughput but the latency for each request is not great."
#~ msgstr "然而,这种实现方式并不高效,因为对于单一请求而言,同时只有单个 GPU 在进行计算而其他 GPU 则处于等待状态。为了充分利用所有的 GPU ,你需要像流水线一样安排多个处理序列,确保每个 GPU 都有一定的工作负载。但是,这将需要进行并发管理和负载均衡,这些超出了 `transformers` 库的范畴。即便实现了所有这些功能,整体吞吐量可以通过提高并发提高,但每个请求的延迟并不会很理想。"
#~ msgid "For Multi-GPU inference, we recommend using specialized inference framework, such as vLLM and TGI, which support tensor parallelism."
#~ msgstr "对于多卡推理,建议使用专门的推理框架,如 vLLM 和 TGI,这些框架支持张量并行。"
#~ msgid "`RuntimeError: CUDA error: device-side assert triggered`, `Assertion -sizes[i] <= index && index < sizes[i] && \"index out of bounds\" failed.`"
#~ msgstr "`RuntimeError: CUDA error: device-side assert triggered`, `Assertion -sizes[i] <= index && index < sizes[i] && \"index out of bounds\" failed.`"
#~ msgid "If it works with single GPU but not multiple GPUs, especially if there are PCI-E switches in your system, it could be related to drivers."
#~ msgstr "如果在单个 GPU上 工作正常,但在多个 GPU 上无法工作,特别是如果你的系统中有 PCI-E switch,这可能与驱动程序有关。"
#~ msgid "Try upgrading the GPU driver."
#~ msgstr "尝试升级显卡驱动"
#~ msgid "For data center GPUs (e.g., A800, H800, and L40s), please use the data center GPU drivers and upgrade to the latest subrelease, e.g., 535.104.05 to 535.183.01. You can check the release note at <https://docs.nvidia.com/datacenter/tesla/index.html>, where the issues fixed and known issues are presented."
#~ msgstr "对于数据中心 GPU (例如, A800 、 H800 和 L40 等),请使用数据中心 GPU 驱动程序并升级到最新子版本,例如从 535.104.05 升级至 535.183.01 。您可以在以下网址查看发布说明:<https://docs.nvidia.com/datacenter/tesla/index.html>,其中列出了已修复的问题和已知问题。"
#~ msgid "For consumer GPUs (e.g., RTX 3090 and RTX 4090), their GPU drivers are released more frequently and focus more on gaming optimization. There are online reports that 545.29.02 breaks `vllm` and `torch` but 545.29.06 works. Their release notes are also less helpful in identifying the real issues. However, in general, the advice is still upgrading the GPU driver."
#~ msgstr "对于消费级 GPU (例如, RTX 3090 和 RTX 4090 ),它们的 GPU 驱动程序发布的频率更高,并且更侧重于游戏优化。网上有报告称 545.29.02 版本破坏了 `vllm` 和 `torch` 的运行,但 545.29.06 版本可以正常工作。它们的发布说明在识别实际问题方面帮助较小。然而,总体而言,建议仍然是升级 GPU 驱动程序。"
#~ msgid "Try disabling P2P for process hang, but it has negative effect on speed."
#~ msgstr "尝试禁用 P2P 以解决进程挂起的问题,但这会对速度产生负面影响。"
#~ msgid "Next Step"
#~ msgstr "下一步"
#~ msgid "Now you can chat with Qwen2.5 in either streaming mode or not. Continue to read the documentation and try to figure out more advanced usages of model inference!"
#~ msgstr "现在,你可以选择流式模式或非流式模式与 Qwen2.5 进行对话。继续阅读文档,并尝试探索模型推理的更多高级用法!"
#~ msgid "The attention module for a model in `transformers` typically has three variants: `sdpa`, `flash_attention_2`, and `eager`. The first two are wrappers around related functions in the `torch` and the `flash_attn` packages. It defaults to `sdpa` if available."
#~ msgstr "`transformers` 中模型一般实现3种注意力模块: `sdpa` 、 `flash_attention_2` 和 `eager` 。前两种分别封装了 `torch` 和 `flash_attn` 中的相关实现。`transformers` 默认使用 `sdpa` 版本的注意力模块。"
#~ msgid "In addition, `torch` has integrated three implementations for `sdpa`: `FLASH_ATTENTION` (indicating Flash Attention 2 since version 2.2), `EFFICIENT_ATTENTION` (Memory Efficient Attention), and `MATH`. It attempts to automatically select the most optimal implementation based on the inputs. You don't need to install extra packages to use them."
#~ msgstr "同时, `torch` 包括3种 `sdpa` 实现: `FLASH_ATTENTION` (自 2.2 版本为 Flash Attention 2)、 `EFFICIENT_ATTENTION` (Memory Efficient Attention) 和 `MATH` 。 `torch` 根据输入自动选择最优的实现,你无需额外安装其它包或进行配置。"
#~ msgid "Hence, if applicable, by default, `transformers` uses `sdpa` and `torch` selects `FLASH_ATTENTION`."
#~ msgstr "因此,在默认情况下,如果适用, `transformers` 使用 `sdpa` 而 `torch` 会选择 `FLASH_ATTENTION` 。"
#~ msgid "If you wish to explicitly select the implementations in `torch`, refer to [this tutorial](https://pytorch.org/tutorials/intermediate/scaled_dot_product_attention_tutorial.html)."
#~ msgstr "如果你希望显式控制 `torch` 使用的 `sdpa` 实现,请参考 [本教程](https://pytorch.org/tutorials/intermediate/scaled_dot_product_attention_tutorial.html)。 "
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/quantization/awq.md:1 363514c3e24c4d2aa54832e85acf34ef
msgid "AWQ"
msgstr "AWQ"
#: ../../Qwen/source/quantization/awq.md:4 36b5c0de1013499f9f1e41edf8fa28ca
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/quantization/awq.md:7 9d6a80a82b044628bc9c911785ac9160
msgid "For quantized models, one of our recommendations is the usage of [AWQ](https://arxiv.org/abs/2306.00978) with [AutoAWQ](https://github.com/casper-hansen/AutoAWQ)."
msgstr "对于量化模型,我们推荐使用 [AWQ](https://arxiv.org/abs/2306.00978) 结合 [AutoAWQ](https://github.com/casper-hansen/AutoAWQ) "
#: ../../Qwen/source/quantization/awq.md:9 139542ed4b414cfb834b3fd81ea88d51
msgid "**AWQ** refers to Activation-aware Weight Quantization, a hardware-friendly approach for LLM low-bit weight-only quantization."
msgstr "**AWQ**即激活值感知的权重量化(Activation-aware Weight Quantization),是一种针对LLM的低比特权重量化的硬件友好方法。"
#: ../../Qwen/source/quantization/awq.md:11 9a2959bb9f984e36a299bc40abca9402
msgid "**AutoAWQ** is an easy-to-use Python library for 4-bit quantized models. AutoAWQ speeds up models by 3x and reduces memory requirements by 3x compared to FP16. AutoAWQ implements the Activation-aware Weight Quantization (AWQ) algorithm for quantizing LLMs."
msgstr "**AutoAWQ**是一个易于使用的工具包,用于4比特量化模型。相较于FP16,AutoAWQ能够将模型的运行速度提升3倍,并将内存需求降低至原来的三分之一。AutoAWQ实现了AWQ算法,可用于LLM的量化处理。"
#: ../../Qwen/source/quantization/awq.md:15 4f9fcd93d1f44b48869224c0f4e8b76a
msgid "In this document, we show you how to use the quantized model with Hugging Face `transformers` and also how to quantize your own model."
msgstr "在本文档中,我们将向您展示如何在Hugging Face `transformers`框架下使用量化模型,以及如何对您自己的模型进行量化"
#: ../../Qwen/source/quantization/awq.md:17 870ebc162f3749b48fe454df85aaaf4b
msgid "Usage of AWQ Models with Hugging Face transformers"
msgstr "在Hugging Face transformers中使用AWQ量化模型"
#: ../../Qwen/source/quantization/awq.md:19 cc7bd785c7ac45a4980fbda683699e43
msgid "Now, `transformers` has officially supported AutoAWQ, which means that you can directly use the quantized model with `transformers`. The following is a very simple code snippet showing how to run `Qwen2.5-7B-Instruct-AWQ` with the quantized model:"
msgstr "现在,`transformers`已经正式支持AutoAWQ,这意味着您可以直接在`transformers`中使用AWQ量化模型。以下是一个非常简单的代码片段,展示如何运行量化模型 `Qwen2.5-7B-Instruct-AWQ` :"
#: ../../Qwen/source/quantization/awq.md:56 47826d51abf54ad8a89ef9b91127a700
msgid "Usage of AWQ Models with vLLM"
msgstr "在vLLM中使用AWQ量化模型"
#: ../../Qwen/source/quantization/awq.md:58 b7235ae8f8344dd4a3d2029bbe7a40fc
msgid "vLLM has supported AWQ, which means that you can directly use our provided AWQ models or those quantized with `AutoAWQ` with vLLM. We recommend using the latest version of vLLM (`vllm>=0.6.1`) which brings performance improvements to AWQ models; otherwise, the performance might not be well-optimized."
msgstr "vLLM已支持AWQ,您可以直接使用我们提供的AWQ量化模型或使用`AutoAWQ`量化的模型。我们建议使用最新版的vLLM (`vllm>=0.6.1`),新版为AWQ量化模型提升了效率提;不然推理效率可能并为被良好优化(即效率可能较非量化模型低)。"
#: ../../Qwen/source/quantization/awq.md:61 940ce8fdb5da442b99af2bc1739911c6
msgid "Actually, the usage is the same with the basic usage of vLLM. We provide a simple example of how to launch OpenAI-API compatible API with vLLM and `Qwen2.5-7B-Instruct-AWQ`:"
msgstr "实际上,使用AWQ模型与vLLM的基本用法相同。我们提供了一个简单的示例,展示了如何通过vLLM启动与OpenAI API兼容的接口,并使用 `Qwen2.5-7B-Instruct-AWQ` 模型:"
#: ../../Qwen/source/quantization/awq.md:64 2d249915352049a6a8d5a06e1f4682ee
msgid "Run the following in a shell to start an OpenAI-compatible API service:"
msgstr "在终端中运行以下命令以开启OpenAI兼容API:"
#: ../../Qwen/source/quantization/awq.md:70 be7bfbb81698429cbfcbcd24d062fc08
msgid "Then, you can call the API as"
msgstr "随后,您可以这样调用API:"
#: ../../Qwen/source/quantization/awq.md:86 0dff7d5c7b044548a82e0ba68a043d80
msgid "or you can use the API client with the `openai` Python package as shown below:"
msgstr "或者你可以按照下面所示的方式,使用 `openai` Python包中的API客户端:"
#: ../../Qwen/source/quantization/awq.md:115 65f4d60502ee486382e9bda9a5a826bb
msgid "Quantize Your Own Model with AutoAWQ"
msgstr "使用AutoAWQ量化你的模型"
#: ../../Qwen/source/quantization/awq.md:117 c7c42af91c1a419194d65200bcfa2f26
#, fuzzy
msgid "If you want to quantize your own model to AWQ quantized models, we advise you to use AutoAWQ."
msgstr "如果您希望将自定义模型量化为AWQ量化模型,我们建议您使用AutoAWQ。推荐通过安装源代码来获取并安装该工具包的最新版本:"
#: ../../Qwen/source/quantization/awq.md:123 232e94883d044030b2193392788b9314
msgid "Suppose you have finetuned a model based on `Qwen2.5-7B`, which is named `Qwen2.5-7B-finetuned`, with your own dataset, e.g., Alpaca. To build your own AWQ quantized model, you need to use the training data for calibration. Below, we provide a simple demonstration for you to run:"
msgstr "假设你已经基于 `Qwen2.5-7B` 模型进行了微调,并将其命名为 `Qwen2.5-7B-finetuned` ,且使用的是你自己的数据集,比如Alpaca。若要构建你自己的AWQ量化模型,你需要使用训练数据进行校准。以下,我们将为你提供一个简单的演示示例以便运行:"
#: ../../Qwen/source/quantization/awq.md:141 5162195f32ee4ecba229aa137da1aba4
msgid "Then you need to prepare your data for calibration. What you need to do is just put samples into a list, each of which is a text. As we directly use our finetuning data for calibration, we first format it with ChatML template. For example,"
msgstr "接下来,您需要准备数据以进行校准。您需要做的就是将样本放入一个列表中,其中每个样本都是一段文本。由于我们直接使用微调数据来进行校准,所以我们首先使用ChatML模板对其进行格式化。例如:"
#: ../../Qwen/source/quantization/awq.md:153 0d4736e90e0242a8be15533de3aab6ff
msgid "where each `msg` is a typical chat message as shown below:"
msgstr "其中每个 `msg` 是一个典型的聊天消息,如下所示:"
#: ../../Qwen/source/quantization/awq.md:163 79d86630600945ac85dbe13d07987016
msgid "Then just run the calibration process by one line of code:"
msgstr "然后只需通过一行代码运行校准过程:"
#: ../../Qwen/source/quantization/awq.md:169 1ae219a50508465b98e3b3398e631681
msgid "Finally, save the quantized model:"
msgstr "最后,保存量化模型:"
#: ../../Qwen/source/quantization/awq.md:176 58316c1a4172418aba9f37925963e17f
msgid "Then you can obtain your own AWQ quantized model for deployment. Enjoy!"
msgstr "然后你就可以得到一个可以用于部署的AWQ量化模型。玩得开心!"
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/quantization/gptq.md:1 c90397f810fb44a0abba8dd02f998f1c
msgid "GPTQ"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:4 b79afc46b0f9474fb0c83751625aefc5
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/quantization/gptq.md:7 898494af2a944193880f27e2f90db4f4
msgid "[GPTQ](https://arxiv.org/abs/2210.17323) is a quantization method for GPT-like LLMs, which uses one-shot weight quantization based on approximate second-order information. In this document, we show you how to use the quantized model with Hugging Face `transformers` and also how to quantize your own model with [AutoGPTQ](https://github.com/AutoGPTQ/AutoGPTQ)."
msgstr "[GPTQ](https://arxiv.org/abs/2210.17323)是一种针对类GPT大型语言模型的量化方法,它基于近似二阶信息进行一次性权重量化。在本文档中,我们将向您展示如何使用 `transformers` 库加载并应用量化后的模型,同时也会指导您如何通过[AutoGPTQ](https://github.com/AutoGPTQ/AutoGPTQ)来对您自己的模型进行量化处理。"
#: ../../Qwen/source/quantization/gptq.md:10 11b82020735d4828a4182cefbf98aeb1
msgid "Usage of GPTQ Models with Hugging Face transformers"
msgstr "在Hugging Face transformers中使用GPTQ模型"
#: ../../Qwen/source/quantization/gptq.md:14 2e9481d850954772949dd33897e0b06b
msgid "To use the official Qwen2.5 GPTQ models with `transformers`, please ensure that `optimum>=1.20.0` and compatible versions of `transformers` and `auto_gptq` are installed."
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:16 fe6662a312184d40b07d957f4c0888cc
msgid "You can do that by"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:22 9f0ad8e2a26145cf8bd9d60305566771
msgid "Now, `transformers` has officially supported AutoGPTQ, which means that you can directly use the quantized model with `transformers`. For each size of Qwen2.5, we provide both Int4 and Int8 GPTQ quantized models. The following is a very simple code snippet showing how to run `Qwen2.5-7B-Instruct-GPTQ-Int4`:"
msgstr "现在,`transformers` 正式支持了AutoGPTQ,这意味着您能够直接在`transformers`中使用量化后的模型。以下是一个非常简单的代码片段示例,展示如何运行 `Qwen2.5-7B-Instruct-GPTQ-Int4` (请注意,对于每种大小的Qwen2.5模型,我们都提供了Int4和Int8两种量化版本):"
#: ../../Qwen/source/quantization/gptq.md:60 855686b8990f403bba151d8498947f23
msgid "Usage of GPTQ Models with vLLM"
msgstr "在vLLM中使用GPTQ模型"
#: ../../Qwen/source/quantization/gptq.md:62 ad572c30a0904598b3cbeba7c38a607a
msgid "vLLM has supported GPTQ, which means that you can directly use our provided GPTQ models or those trained with `AutoGPTQ` with vLLM. If possible, it will automatically use the GPTQ Marlin kernel, which is more efficient."
msgstr "vLLM已支持GPTQ,您可以直接使用我们提供的GPTQ量化模型或使用`AutoGPTQ`量化的模型。我们建议使用最新版的vLLM。如有可能,其会自动使用效率更好的GPTQ Marlin实现。"
#: ../../Qwen/source/quantization/gptq.md:65 09050876d2c04aee9b619d28d4f5589c
msgid "Actually, the usage is the same with the basic usage of vLLM. We provide a simple example of how to launch OpenAI-API compatible API with vLLM and `Qwen2.5-7B-Instruct-GPTQ-Int4`:"
msgstr "实际上,使用GPTQ模型与vLLM的基本用法相同。我们提供了一个简单的示例,展示了如何通过vLLM启动与OpenAI API兼容的接口,并使用 `Qwen2.5-7B-Instruct-GPTQ-Int4` 模型:"
#: ../../Qwen/source/quantization/gptq.md:68 a31dd879cc444b5da8d16fb1705585a6
msgid "Run the following in a shell to start an OpenAI-compatible API service:"
msgstr "在终端中运行以下命令以开启OpenAI兼容API:"
#: ../../Qwen/source/quantization/gptq.md:74 9dfb41e03089473792928b05b1225de4
msgid "Then, you can call the API as"
msgstr "随后,您可以这样调用API:"
#: ../../Qwen/source/quantization/gptq.md:90 6b440bebe0d84118bb63ed9a7c169ab5
msgid "or you can use the API client with the `openai` Python package as shown below:"
msgstr "或者你可以按照下面所示的方式,使用 `openai` Python包中的API客户端:"
#: ../../Qwen/source/quantization/gptq.md:119 7ffaa1ca8b4740b98dc3f804348da523
msgid "Quantize Your Own Model with AutoGPTQ"
msgstr "使用AutoGPTQ量化你的模型"
#: ../../Qwen/source/quantization/gptq.md:121 40bd0b11507c4f06be5a5918d0dc3bdb
msgid "If you want to quantize your own model to GPTQ quantized models, we advise you to use AutoGPTQ. It is suggested installing the latest version of the package by installing from source code:"
msgstr "如果你想将自定义模型量化为GPTQ量化模型,我们建议你使用AutoGPTQ工具。推荐通过安装源代码的方式获取并安装最新版本的该软件包。"
#: ../../Qwen/source/quantization/gptq.md:130 d6ebb03d51bf4e0686ae17ce3f0a34db
msgid "Suppose you have finetuned a model based on `Qwen2.5-7B`, which is named `Qwen2.5-7B-finetuned`, with your own dataset, e.g., Alpaca. To build your own GPTQ quantized model, you need to use the training data for calibration. Below, we provide a simple demonstration for you to run:"
msgstr "假设你已经基于 `Qwen2.5-7B` 模型进行了微调,并将该微调后的模型命名为 `Qwen2.5-7B-finetuned` ,且使用的是自己的数据集,比如Alpaca。要构建你自己的GPTQ量化模型,你需要使用训练数据进行校准。以下是一个简单的演示示例,供你参考运行:"
#: ../../Qwen/source/quantization/gptq.md:161 9c1b27cc38764332891a8a13175663fc
msgid "However, if you would like to load the model on multiple GPUs, you need to use `max_memory` instead of `device_map`. Here is an example:"
msgstr "但是,如果你想使用多GPU来读取模型,你需要使用 `max_memory` 而不是 `device_map`。下面是一段示例代码:"
#: ../../Qwen/source/quantization/gptq.md:172 c2a9a50734854c19acf3e623597aee80
msgid "Then you need to prepare your data for calibration. What you need to do is just put samples into a list, each of which is a text. As we directly use our finetuning data for calibration, we first format it with ChatML template. For example,"
msgstr "接下来,你需要准备数据进行校准。你需要做的是将样本放入一个列表中,其中每个样本都是一段文本。由于我们直接使用微调数据进行校准,所以我们首先使用ChatML模板对它进行格式化处理。例如:"
#: ../../Qwen/source/quantization/gptq.md:188 7621f73d34d04dd791d2eda03edb0d06
msgid "where each `msg` is a typical chat message as shown below:"
msgstr "其中每个 `msg` 是一个典型的聊天消息,如下所示:"
#: ../../Qwen/source/quantization/gptq.md:198 293efa14ece74a0aa9cbf32ef21e6bcd
msgid "Then just run the calibration process by one line of code:"
msgstr "然后只需通过一行代码运行校准过程:"
#: ../../Qwen/source/quantization/gptq.md:209 919d7a77cc4a4ef084ee8e2240ff1797
msgid "Finally, save the quantized model:"
msgstr "最后,保存量化模型:"
#: ../../Qwen/source/quantization/gptq.md:216 b353bdf12d6148fdb0a77662f795ae7e
msgid "It is unfortunate that the `save_quantized` method does not support sharding. For sharding, you need to load the model and use `save_pretrained` from transformers to save and shard the model. Except for this, everything is so simple. Enjoy!"
msgstr "很遗憾, `save_quantized` 方法不支持模型分片。若要实现模型分片,您需要先加载模型,然后使用来自 `transformers` 库的 `save_pretrained` 方法来保存并分片模型。除此之外,一切操作都非常简单。祝您使用愉快!"
#: ../../Qwen/source/quantization/gptq.md:222 caea6f76804e40daa394ae2e2d52a6ce
msgid "Known Issues"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:224 07df69bd48d4445887b5c1fa09f2f0fb
msgid "Qwen2.5-72B-Instruct-GPTQ-Int4 cannot stop generation properly"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:226
#: ../../Qwen/source/quantization/gptq.md:235 a4f1c7b0cb5d49f2929ba5d1246e885d
#: d2dbf88d06974152943e6ec405419390
msgid "Model"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:226 cb9c0be91ecc46c3b6ecfa97a0a37dd7
msgid "Qwen2.5-72B-Instruct-GPTQ-Int4"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:227
#: ../../Qwen/source/quantization/gptq.md:236 c1fe04754a0642fa82ed425d6abaa487
#: f3ff85cbbc47459fb36b5ad0e38b4a1b
msgid "Framework"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:227 8a5a4fe9d7634cb1ac65025565c3593a
#, fuzzy
msgid "vLLM, AutoGPTQ (including Hugging Face transformers)"
msgstr "在Hugging Face transformers中使用GPTQ模型"
#: ../../Qwen/source/quantization/gptq.md:228
#: ../../Qwen/source/quantization/gptq.md:237 320d56294cc4490f8b30ac523388bc44
#: c04326d003f949a7b2b63c6c6cb20ac3
msgid "Description"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:228 22f80d0679dc426dbbfb21b90b993a27
msgid "Generation cannot stop properly. Continual generation after where it should stop, then repeated texts, either single character, a phrase, or paragraphs, are generated."
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:229
#: ../../Qwen/source/quantization/gptq.md:238 255a7a8ac98b4d2da51f79f207be5901
#: 673d23bf488840a2a32a18cd657f334f
msgid "Workaround"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:229 c2171874ed804ffb826ac686128d7bff
msgid "The following workaround could be considered"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:230 a59d6759991640609371bf7afd81e0b8
msgid "Using the original model in 16-bit floating point"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:231 97134ed43ee3414199928d755c24544e
msgid "Using the AWQ variants or llama.cpp-based models for reduced chances of abnormal generation"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:233 7c30819dea6c4cfb8eee98d0dd217bf9
msgid "Qwen2.5-32B-Instruct-GPTQ-Int4 broken with vLLM on multiple GPUs"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:235 a4a641abd99a47049c1fd172e9cfa2be
msgid "Qwen2.5-32B-Instruct-GPTQ-Int4"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:236 70216327dda349cabf03412f5fbe3114
msgid "vLLM"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:237 8edf21882ff24358b736c73477cfba9d
msgid "Deployment on multiple GPUs and only garbled text like `!!!!!!!!!!!!!!!!!!` could be generated."
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:238 10d9d8b3d8e74afea5ccd79bc698fb7c
msgid "Each of the following workaround could be considered"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:239 33d1632f26f9423c847d06af7a5d107d
msgid "Using the AWQ or GPTQ-Int8 variants"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:240 b27f1f32637349d09b8c74a2041a4d9b
msgid "Using a single GPU"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:241 fc27883584a04682b9e28b2ccf51dc0e
msgid "Using Hugging Face `transformers` if latency and throughput are not major concerns"
msgstr ""
#: ../../Qwen/source/quantization/gptq.md:244 5664e5bd63c845d49e8cfa75e789dfa3
msgid "Troubleshooting"
msgstr "问题排查"
#: ../../Qwen/source/quantization/gptq.md 06f2358881134920ab43f4256ad6300e
msgid "With `transformers` and `auto_gptq`, the logs suggest `CUDA extension not installed.` and the inference is slow."
msgstr "在使用 `transformers` 和 `auto_gptq` 时,日志提示 `CUDA extension not installed.` 并且推理速度缓慢。"
#: ../../Qwen/source/quantization/gptq.md:248 2d57d681b2d74c27b60523fa86676b6f
msgid "`auto_gptq` fails to find a fused CUDA kernel compatible with your environment and falls back to a plain implementation. Follow its [installation guide](https://github.com/AutoGPTQ/AutoGPTQ/blob/main/docs/INSTALLATION.md) to install a pre-built wheel or try installing `auto_gptq` from source."
msgstr "`auto_gptq` 未能找到与您的环境兼容的融合CUDA算子,因此退回到基础实现。请遵循其 [安装指南](https://github.com/AutoGPTQ/AutoGPTQ/blob/main/docs/INSTALLATION.md) 来安装预构建的 wheel 或尝试从源代码安装 `auto_gptq` 。"
#: ../../Qwen/source/quantization/gptq.md 95b57d1a962c4dc7aa02a69a403e2376
msgid "Self-quantized Qwen2.5-72B-Instruct-GPTQ with `vllm`, `ValueError: ... must be divisible by ...` is raised. The intermediate size of the self-quantized model is different from the official Qwen2.5-72B-Instruct-GPTQ models."
msgstr "`vllm` 使用自行量化的 Qwen2.5-72B-Instruct-GPTQ 时,会引发 `ValueError: ... must be divisible by ...` 错误。自量化的模型的 intermediate size 与官方的 Qwen2.5-72B-Instruct-GPTQ 模型不同。"
#: ../../Qwen/source/quantization/gptq.md:255 ecd9b51a549045949ff18fdb6226ddc8
#, python-brace-format
msgid "After quantization the size of the quantized weights are divided by the group size, which is typically 128. The intermediate size for the FFN blocks in Qwen2.5-72B is 29568. Unfortunately, {math}`29568 \\div 128 = 231`. Since the number of attention heads and the dimensions of the weights must be divisible by the tensor parallel size, it means you can only run the quantized model with `tensor_parallel_size=1`, i.e., one GPU card."
msgstr "量化后,量化权重的大小将被 group size(通常为128)整除。Qwen2-72B 中FFN块的中间大小为29568。不幸的是, {math}`29568 \\div 128 = 231` 。由于注意力头的数量和权重的维度必须能够被张量并行大小整除,这意味着你只能使用 `tensor_parallel_size=1` ,即一张 GPU 卡,来运行量化的模型。"
#: ../../Qwen/source/quantization/gptq.md:260 8b1c5e3934654679a2d85e3287cf9309
#, python-brace-format
msgid "A workaround is to make the intermediate size divisible by {math}`128 \\times 8 = 1024`. To achieve that, the weights should be padded with zeros. While it is mathematically equivalent before and after zero-padding the weights, the results may be slightly different in reality."
msgstr "一个解决方案是使中间大小能够被 {math}`128 \\times 8 = 1024` 整除。为了达到这一目的,应该使用零值对权重进行填充。虽然在数学上,在对权重进行零填充前后是等价的,但在现实中结果可能会略有不同。"
#: ../../Qwen/source/quantization/gptq.md:264 ae904f7ab91340c4a6831aef4de643ba
msgid "Try the following:"
msgstr "尝试以下方法:"
#: ../../Qwen/source/quantization/gptq.md:297 4cf8c516a2324e618d25333c84be9e6b
msgid "This will save the padded checkpoint to the specified directory. Then, copy other files from the original checkpoint to the new directory and modify the `intermediate_size` in `config.json` to `29696`. Finally, you can quantize the saved model checkpoint."
msgstr "这将会把填充后的检查点保存到指定的目录。然后,你需要从原始检查点复制其他文件到新目录,并将 `config.json` 中的 `intermediate_size` 修改为 `29696` 。最后,你可以量化保存的模型检查点。"
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/quantization/llama.cpp.md:1
#: 2cde165afca34e508b163ca4d513c50c
msgid "llama.cpp"
msgstr ""
#: ../../Qwen/source/quantization/llama.cpp.md:3
#: c6369d5467e449719f2b30253bfdcb99
msgid "Quantization is a major topic for local inference of LLMs, as it reduces the memory footprint. Undoubtably, llama.cpp natively supports LLM quantization and of course, with flexibility as always."
msgstr "量化(Quantization)是本地运行大规模语言模型的主要议题,因为它能减少内存占用。毫无疑问,llama.cpp原生支持大规模语言模型的量化,并且一如既往地保持了灵活性。"
#: ../../Qwen/source/quantization/llama.cpp.md:6
#: 9aeef2584cd445c9a95c7dd463a0afbe
msgid "At high-level, all quantization supported by llama.cpp is weight quantization: Model parameters are quantized into lower bits, and in inference, they are dequantized and used in computation."
msgstr "在高层次上,llama.cpp所支持的所有量化都是权重量化(weight quantization):模型参数被量化为低位(bit)数,在推理过程中,它们会被反量化(dequantize)并用于计算。"
#: ../../Qwen/source/quantization/llama.cpp.md:9
#: dc633f028b95470489c4c9bca97a6002
msgid "In addition, you can mix different quantization data types in a single quantized model, e.g., you can quantize the embedding weights using a quantization data type and other weights using a different one. With an adequate mixture of quantization types, much lower quantization error can be attained with just a slight increase of bit-per-weight. The example program `llama-quantize` supports many quantization presets, such as Q4_K_M and Q8_0."
msgstr "此外,你可以在单一的量化模型中混合使用不同的量化数据类型,例如,你可以使用一种量化数据类型量化嵌入权重(embedding),而使用另一种量化其他权重。通过适当的量化类型组合,只需略微增加bpw (bit-per-weight, 位权比),就能达到更低的量化误差。示例程序`llama-quantize`支持许多量化预设,如Q4_K_M和Q8_0。"
#: ../../Qwen/source/quantization/llama.cpp.md:13
#: 4235cf2df8164eed928d00f57f6807c7
msgid "If you find the quantization errors still more than expected, you can bring your own scales, e.g., as computed by AWQ, or use calibration data to compute an importance matrix using `llama-imatrix`, which can then be used during quantization to enhance the quality of the quantized models."
msgstr "如果你发现量化误差仍然超出预期,你可以引入自己的量化尺度,例如由AWQ计算的,或者使用校准数据用`llama-imatrix`来计算一个“重要性矩阵”(importance matrix),然后在量化过程中使用以提高量化模型的质量。"
#: ../../Qwen/source/quantization/llama.cpp.md:15
#: 515fe42fe0f34fce883ae3fa60853f0c
#, python-brace-format
msgid "In this document, we demonstrate the common way to quantize your model and evaluate the performance of the quantized model. We will assume you have the example programs from llama.cpp at your hand. If you don't, check our guide [here](../run_locally/llama.cpp.html#getting-the-program){.external}."
msgstr "在本文档中,我们将展示量化和评估量化模型性能的常见方法。我们会假设你手头有llama.cpp的示例程序。如果没有,请查看我们的[指南](../run_locally/llama.cpp.html#getting-the-program){.external}。"
#: ../../Qwen/source/quantization/llama.cpp.md:19
#: 835ffea72afe4c4baf28d764a4a947f4
msgid "Getting the GGUF"
msgstr "获取GGUF"
#: ../../Qwen/source/quantization/llama.cpp.md:21
#: 93463a200a5543b5b5798795b8edd0b4
msgid "Now, suppose you would like to quantize `Qwen3-8B`. You need to first make a GGUF file as shown below:"
msgstr "现在,假设你想量化`Qwen3-8B-Instruct`。你需要首先创建一个GGUF文件,如下所示:"
#: ../../Qwen/source/quantization/llama.cpp.md:27
#: 59b1507af3dd430b9de8b1d51ebcfc53
msgid "Sometimes, it may be better to use fp32 as the start point for quantization. In that case, use"
msgstr "有时,可能最好将fp32作为量化的起点。在这种情况下,使用"
#: ../../Qwen/source/quantization/llama.cpp.md:33
#: d54b89e59e214e1baeba025ecd971e30
msgid "Quantizing the GGUF without Calibration"
msgstr "无校准量化GGUF"
#: ../../Qwen/source/quantization/llama.cpp.md:35
#: a6d57166997a4a1bad8a28eb4cc5593c
msgid "For the simplest way, you can directly quantize the model to lower-bits based on your requirements. An example of quantizing the model to 8 bits is shown below:"
msgstr "最简单的方法是,你可以根据需求直接将模型量化到低位数。下面是一个将模型量化到8 bit的例子:"
#: ../../Qwen/source/quantization/llama.cpp.md:41
#: 8094b5237744430aa9594b344901dc4b
msgid "`Q8_0` is a code for a quantization preset. You can find all the presets in [the source code of `llama-quantize`](https://github.com/ggml-org/llama.cpp/blob/master/examples/quantize/quantize.cpp). Look for the variable `QUANT_OPTIONS`. Common ones used for 7B models include `Q8_0`, `Q5_0`, and `Q4_K_M`. The letter case doesn't matter, so `q8_0` or `q4_K_m` are perfectly fine."
msgstr "`Q8_0`是一个量化预设的代号。你可以在[`llama-quantize`的源代码](https://github.com/ggml-org/llama.cpp/blob/master/examples/quantize/quantize.cpp)中找到所有预设。寻找变量`QUANT_OPTIONS`。对于7B模型常用的包括`Q8_0`、`Q5_0`和`Q4_K_M`。字母大小写不重要,所以`q8_0`或`q4_K_m`都是可以接受的。"
#: ../../Qwen/source/quantization/llama.cpp.md:47
#: 388bcb0c32464e6c8ec2940b0b564658
msgid "Now you can use the GGUF file of the quantized model with applications based on llama.cpp. Very simple indeed."
msgstr "现在,你可以使用基于llama.cpp的应用程序中的量化模型的GGUF文件。确实很简单。"
#: ../../Qwen/source/quantization/llama.cpp.md:50
#: a32f72ed508e477a98b67172c20c3d9d
msgid "However, the accuracy of the quantized model could be lower than expected occasionally, especially for lower-bit quantization. The program may even prevent you from doing that."
msgstr "然而,量化模型的准确性偶尔可能低于预期,特别是对于低位数量化。程序甚至可能阻止你这样做。"
#: ../../Qwen/source/quantization/llama.cpp.md:53
#: 5cbe569e12714a1eb8b5d125f3b85ab6
msgid "There are several ways to improve quality of quantized models. A common way is to use a calibration dataset in the target domain to identify the weights that really matter and quantize the model in a way that those weights have lower quantization errors, as introduced in the next two methods."
msgstr "有几种方法可以提高量化模型的质量。一种常见的方法是在目标域中使用校准数据集来识别真正重要的权重,并以这些权重具有较低量化误差的方式量化模型,如下两种方法中将介绍。"
#: ../../Qwen/source/quantization/llama.cpp.md:57
#: b6667f74d47e4e9f8ab0ba6fc8de0299
msgid "Quantizing the GGUF with AWQ Scale"
msgstr "使用AWQ尺度量化GGUF"
#: ../../Qwen/source/quantization/llama.cpp.md:60
#: b08348105b2f4b1d831ace8255b8208e
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/quantization/llama.cpp.md:63
#: e9191853f77c4c3bbf5b2516cd463229
msgid "To improve the quality of your quantized models, one possible solution is to apply the AWQ scale, following [this script](https://github.com/casper-hansen/AutoAWQ/blob/main/docs/examples.md#gguf-export). First, when you run `model.quantize()` with `autoawq`, remember to add `export_compatible=True` as shown below:"
msgstr "为了提高量化模型的质量,一种可能的解决方案是应用AWQ尺度,遵循[这个脚本](https://github.com/casper-hansen/AutoAWQ/blob/main/docs/examples.md#gguf-export)。首先,当你使用`autoawq`运行`model.quantize()`时,记得添加`export_compatible=True`,如下所示:"
#: ../../Qwen/source/quantization/llama.cpp.md:76
#: a9b8a7e942f343c68ab9bb5c51b75beb
msgid "The above code will not actually quantize the weights. Instead, it adjusts weights based on a dataset so that they are \"easier\" to quantize.[^AWQ]"
msgstr "上述代码实际上不会量化权重。相反,它会根据数据集调整权重,使它们“更容易”量化。[^AWQ]"
#: ../../Qwen/source/quantization/llama.cpp.md:79
#: 2853451d91ba48e99096e4ff2c6aa28d
msgid "Then, when you run `convert-hf-to-gguf.py`, remember to replace the model path with the path to the new model:"
msgstr "然后,当你运行`convert-hf-to-gguf.py`时,记得将模型路径替换为新模型的路径:"
#: ../../Qwen/source/quantization/llama.cpp.md:84
#: 68cab73a58b14a8a8a38b82f209f6060
msgid "Finally, you can quantize the model as in the last example:"
msgstr "最后,你可以像最后一个例子那样量化模型:"
#: ../../Qwen/source/quantization/llama.cpp.md:89
#: c92ab12879be4e1c98ef49dcdb66e3e0
msgid "In this way, it should be possible to achieve similar quality with lower bit-per-weight."
msgstr "这样,应该有可能以更低的bpw实现相似的质量。"
#: ../../Qwen/source/quantization/llama.cpp.md:95
#: 95d0914f02b44bacb160815e8f6400c3
msgid "Quantizing the GGUF with Importance Matrix"
msgstr "使用重要性矩阵量化GGUF"
#: ../../Qwen/source/quantization/llama.cpp.md:97
#: 35543f118a84404ca6e5c52e3c51b8f7
msgid "Another possible solution is to use the \"important matrix\"[^imatrix], following [this](https://github.com/ggml-org/llama.cpp/tree/master/examples/imatrix)."
msgstr "另一个可能的解决方案是使用\"重要矩阵\"[^imatrix],参照[这里](https://github.com/ggml-org/llama.cpp/tree/master/examples/imatrix)。"
#: ../../Qwen/source/quantization/llama.cpp.md:99
#: 6f0eba75740b41bc8d277809f72bd839
msgid "First, you need to compute the importance matrix data of the weights of a model (`-m`) using a calibration dataset (`-f`):"
msgstr "首先,你需要使用校准数据集(`-f`)计算模型权重的重要性矩阵数据(`-m`):"
#: ../../Qwen/source/quantization/llama.cpp.md:104
#: 3a9550cf6a04480788fa31a011c5094f
msgid "The text is cut in chunks of length `--chunk` for computation. Preferably, the text should be representative of the target domain. The final results will be saved in a file named `qwen3-8b-imatrix.dat` (`-o`), which can then be used:"
msgstr "文本被切割成长度为`--chunk`的块进行计算。最好,文本应代表目标领域。最终结果将保存在名为`qwen3-8b-imatrix.dat`(`-o`)的文件中,然后可以使用:"
#: ../../Qwen/source/quantization/llama.cpp.md:112
#: 2f3dfa34285948ff8780c66b41a49fb0
msgid "For lower-bit quantization mixtures for 1-bit or 2-bit, if you do not provide `--imatrix`, a helpful warning will be printed by `llama-quantize`."
msgstr "对于1 bit或2 bit的低位数量化混合,如果你不提供`--imatrix`,`llama-quantize`将打印出有用的警告。"
#: ../../Qwen/source/quantization/llama.cpp.md:116
#: 742734079457418f82128bb7ab0851bc
msgid "Perplexity Evaluation"
msgstr "困惑度(Perplexity)评估"
#: ../../Qwen/source/quantization/llama.cpp.md:118
#: d5e074195d2e4136aa741cd5facebc91
msgid "`llama.cpp` provides an example program for us to calculate the perplexity, which evaluate how unlikely the given text is to the model. It should be mostly used for comparisons: the lower the perplexity, the better the model remembers the given text."
msgstr "`llama.cpp`为我们提供了一个示例程序来计算困惑度,这评估了给定文本对模型而言的“不可能”程度。它主要用于比较:困惑度越低,模型对给定文本的记忆越好。"
#: ../../Qwen/source/quantization/llama.cpp.md:121
#: c7b7d6516be0401284f7d1a5fc21f683
msgid "To do this, you need to prepare a dataset, say \"wiki test\"[^wiki]. You can download the dataset with:"
msgstr "要做到这一点,你需要准备一个数据集,比如\"wiki测试集\"[^wiki]。你可以使用以下命令下载数据集:"
#: ../../Qwen/source/quantization/llama.cpp.md:128
#: 9af07ffbc0214d0e905079b8a791c376
msgid "Then you can run the test with the following command:"
msgstr "然后你可以使用以下命令运行测试:"
#: ../../Qwen/source/quantization/llama.cpp.md:132
#: b74f0297d6904028ba7c715af1501541
msgid "Wait for some time and you will get the perplexity of the model. There are some numbers of different kinds of quantization mixture [here](https://github.com/ggml-org/llama.cpp/blob/master/examples/perplexity/README.md). It might be helpful to look at the difference and grab a sense of how that kind of quantization might perform."
msgstr "稍等一段时间,你将得到模型的困惑度。[这里](https://github.com/ggml-org/llama.cpp/blob/master/examples/perplexity/README.md)提供了不同类型的量化模型的数值。观察差异可能有助于理解不同量化方式的潜在表现。"
#: ../../Qwen/source/quantization/llama.cpp.md:139
#: 2f906e721b414c9094a0282c011722c4
msgid "Finally"
msgstr "结束语"
#: ../../Qwen/source/quantization/llama.cpp.md:141
#: b1a3811d3c3941cc8b4693c1e5c168ca
msgid "In this guide, we demonstrate how to conduct quantization and evaluate the perplexity with llama.cpp. For more information, please visit the [llama.cpp GitHub repo](https://github.com/ggml-org/llama.cpp)."
msgstr "在本指南中,我们展示了如何使用llama.cpp进行量化和评估困惑度。更多信息,请访问[llama.cpp GitHub仓库](https://github.com/ggml-org/llama.cpp)。"
#: ../../Qwen/source/quantization/llama.cpp.md:144
#: bfeea9c5ef7540f88336e17f37c5cac1
msgid "We usually quantize the fp16 model to 4, 5, 6, and 8-bit models with different quantization mixtures, but sometimes a particular mixture just does not work, so we don't provide those in our HuggingFace Hub. However, others in the community may have success, so if you haven't found what you need in our repos, look around."
msgstr "我们通常将fp16模型量化为4、5、6和8位模型,采用不同的量化混合,但有时特定的混合就是不起作用,所以我们不在HuggingFace Hub中提供这些。但是,社区中的其他人可能会成功,因此,如果你在我们的仓库中没有找到所需的内容,请四处看看。"
#: ../../Qwen/source/quantization/llama.cpp.md:147
#: 54a2e17cd64e4e36bab3fff70e940d62
msgid "Enjoy your freshly quantized models!"
msgstr "享受你新鲜量化的模型吧!"
#: ../../Qwen/source/quantization/llama.cpp.md:91
#: e51017220a414251bd0e5b184dd4db0a
msgid "If you are interested in what this means, refer to [the AWQ paper](https://arxiv.org/abs/2306.00978). Basically, important weights (called salient weights in the paper) are identified based on activations across data examples. The weights are scaled accordingly such that the salient weights are protected even after quantization."
msgstr "如果你对这意味着什么感兴趣,请参阅[AWQ论文](https://arxiv.org/abs/2306.00978)。基本上,根据数据实例上的激活,识别出重要的权重(在论文中称为显著权重)。相应地缩放权重,以便即使在量化后也能保护显著权重。"
#: ../../Qwen/source/quantization/llama.cpp.md:114
#: 9eeaab6f255647b39afab212008b14ca
msgid "Here, the importance matrix keeps record of how weights affect the output: the weight should be important is a slight change in its value causes huge difference in the results, akin to the [GPTQ](https://arxiv.org/abs/2210.17323) algorithm."
msgstr "在这里,重要性矩阵记录了权重如何影响输出:如果权重的微小变化导致结果的巨大差异,则该权重应该是重要的,类似于[GPTQ](https://arxiv.org/abs/2210.17323)算法。"
#: ../../Qwen/source/quantization/llama.cpp.md:136
#: e5325dfbcf50472a8766eb96eda47e52
msgid "It is not a good evaluation dataset for instruct models though, but it is very common and easily accessible. You probably want to use a dataset similar to your target domain."
msgstr "虽然它不是指导模型的良好评估数据集,但它非常常见且易于访问。你可能希望使用与目标领域相似的数据集。"
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/run_locally/llama.cpp.md:1
#: 28ada5eeaa3d4def898b944359ccbf0d
msgid "llama.cpp"
msgstr "llama.cpp"
#: ../../Qwen/source/run_locally/llama.cpp.md f9c61545fb3f4320abd6b320e034919d
msgid "llama.cpp as a C++ library"
msgstr "llama.cpp作为C++库"
#: ../../Qwen/source/run_locally/llama.cpp.md:6
#: 2553b3fd749f438e8e48f32c81375f46
msgid "Before starting, let's first discuss what is llama.cpp and what you should expect, and why we say \"use\" llama.cpp, with \"use\" in quotes. llama.cpp is essentially a different ecosystem with a different design philosophy that targets light-weight footprint, minimal external dependency, multi-platform, and extensive, flexible hardware support:"
msgstr "开始之前,让我们先谈谈什么是llama.cpp,您应该期待什么,以及为什么我们说带引号“使用”llama.cpp。本质上,llama.cpp是一个不同的生态系统,具有不同的设计理念,旨在实现轻量级、最小外部依赖、多平台以及广泛灵活的硬件支持:"
#: ../../Qwen/source/run_locally/llama.cpp.md:8
#: a7ab025ba0af4667b2f7ebba93318d26
msgid "Plain C/C++ implementation without external dependencies"
msgstr "纯粹的C/C++实现,没有外部依赖"
#: ../../Qwen/source/run_locally/llama.cpp.md:9
#: 52a68f3b3acc43b4b251f1a72e82801f
msgid "Support a wide variety of hardware:"
msgstr "支持广泛的硬件:"
#: ../../Qwen/source/run_locally/llama.cpp.md:10
#: 16e1b62ece664855bd2be4aaeca1a805
msgid "AVX, AVX2 and AVX512 support for x86_64 CPU"
msgstr "x86_64 CPU的AVX、AVX2和AVX512支持"
#: ../../Qwen/source/run_locally/llama.cpp.md:11
#: e763087ad96543fbb80ca4baec4bfe97
msgid "Apple Silicon via Metal and Accelerate (CPU and GPU)"
msgstr "通过Metal和Accelerate支持Apple Silicon(CPU和GPU)"
#: ../../Qwen/source/run_locally/llama.cpp.md:12
#: 9f801a019f1549fcb3f322aa9264cf08
msgid "NVIDIA GPU (via CUDA), AMD GPU (via hipBLAS), Intel GPU (via SYCL), Ascend NPU (via CANN), and Moore Threads GPU (via MUSA)"
msgstr "NVIDIA GPU(通过CUDA)、AMD GPU(通过hipBLAS)、Intel GPU(通过SYCL)、昇腾NPU(通过CANN)和摩尔线程GPU(通过MUSA)"
#: ../../Qwen/source/run_locally/llama.cpp.md:13
#: 715936ea95364b9eb63c0b267603f841
msgid "Vulkan backend for GPU"
msgstr "GPU的Vulkan后端"
#: ../../Qwen/source/run_locally/llama.cpp.md:14
#: 1c46625a23ec4152a410586b838af2e8
msgid "Various quantization scheme for faster inference and reduced memory footprint"
msgstr "多种量化方案以加快推理速度并减少内存占用"
#: ../../Qwen/source/run_locally/llama.cpp.md:15
#: 41e3e66bf6fd4d65bac569ef9525dd08
msgid "CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity"
msgstr "CPU+GPU混合推理,以加速超过总VRAM容量的模型"
#: ../../Qwen/source/run_locally/llama.cpp.md:17
#: 7e220bdec17242edb034da6ef03c0217
msgid "It's like the Python frameworks `torch`+`transformers` or `torch`+`vllm` but in C++. However, this difference is crucial:"
msgstr "它就像 Python 框架 `torch`+`transformers` 或 `torch`+`vllm` 的组合,但用的是 C++。然而,这一差异至关重要:"
#: ../../Qwen/source/run_locally/llama.cpp.md:19
#: 449ff521ddf14376b7f577779ffd0d3a
msgid "Python is an interpreted language: The code you write is executed line-by-line on-the-fly by an interpreter. You can run the example code snippet or script with an interpreter or a natively interactive interpreter shell. In addition, Python is learner friendly, and even if you don't know much before, you can tweak the source code here and there."
msgstr "Python 是一种解释型语言:编写的代码会被解释器逐行实时执行。你可以使用解释器或原生交互式解释器终端来运行示例代码片段或脚本。此外,Python 对学习者非常友好,即使你之前了解不多,也可能修改源代码。"
#: ../../Qwen/source/run_locally/llama.cpp.md:23
#: dd8b4f55db6846ed872013f58001ddec
msgid "C++ is a compiled language: The source code you write needs to be compiled beforehand, and it is translated to machine code and an executable program by a compiler. The overhead from the language side is minimal. You do have source code for example programs showcasing how to use the library. But it is not very easy to modify the source code if you are not verse in C++ or C."
msgstr "C++ 是一种编译型语言:你编写的源代码需要预先编译,由编译器将其转换为机器码和可执行程序,来自语言层面的开销微乎其微。llama.cpp也提供了示例程序的源代码,展示了如何使用该库。但是,如果你不精通 C++ 或 C 语言,修改源代码并不容易。"
#: ../../Qwen/source/run_locally/llama.cpp.md:29
#: 8d2bc05e1031475f9d97d5dddc1a31c7
msgid "To use llama.cpp means that you use the llama.cpp library in your own program, like writing the source code of [Ollama](https://ollama.com/), [LM Studio](https://lmstudio.ai/), [GPT4ALL](https://www.nomic.ai/gpt4all), [llamafile](https://llamafile.ai/) etc. But that's not what this guide is intended or could do. Instead, here we introduce how to use the `llama-cli` example program, in the hope that you know that llama.cpp does support Qwen2.5 models and how the ecosystem of llama.cpp generally works."
msgstr "真正使用 llama.cpp 意味着在自己的程序中使用 llama.cpp 库,就像编写 [Ollama](https://ollama.com/)、[LM Studio](https://lmstudio.ai/)、[GPT4ALL](https://www.nomic.ai/gpt4all)、[llamafile](https://llamafile.ai/) 等的源代码。但这并不是本指南的目的或所能做的。相反,这里我们介绍如何使用 `llama-cli` 示例程序,希望你能了解到 llama.cpp 支持 Qwen2.5 模型以及 llama.cpp 生态系统的一般工作原理。"
#: ../../Qwen/source/run_locally/llama.cpp.md:34
#: 364cf24aaa7d42039524893406872768
msgid "In this guide, we will show how to \"use\" [llama.cpp](https://github.com/ggml-org/llama.cpp) to run models on your local machine, in particular, the `llama-cli` and the `llama-server` example program, which comes with the library."
msgstr "在这份指南中,我们将讨论如何“使用” [llama.cpp](https://github.com/ggml-org/llama.cpp) 在您的本地机器上运行模型,特别是随库提供的 `llama-cli` 和 `llama-server` 示例程序。"
#: ../../Qwen/source/run_locally/llama.cpp.md:36
#: 7f501873d71c4f42af9911097ea84c68
msgid "The main steps are:"
msgstr "主要步骤如下:"
#: ../../Qwen/source/run_locally/llama.cpp.md:37
#: e870e4ce3a0b491e8ddd8324982c1fad
msgid "Get the programs"
msgstr "获取程序"
#: ../../Qwen/source/run_locally/llama.cpp.md:38
#: a9c5836beb7e42c79d8c38a73ca91226
msgid "Get the Qwen3 models in GGUF[^GGUF] format"
msgstr "获取 GGUF[^GGUF] 格式的 Qwen3 模型"
#: ../../Qwen/source/run_locally/llama.cpp.md:39
#: 7f4f97a838d7464790ca067bc8a9f381
msgid "Run the program with the model"
msgstr "使用模型运行程序"
#: ../../Qwen/source/run_locally/llama.cpp.md:42
#: e098c21b360a4111a27650f26eade1de
msgid "llama.cpp supports Qwen3 and Qwen3MoE from version `b5092`."
msgstr "llama.cpp 自版本 `b5092` 支持 Qwen3 和 Qwen3MoE 。"
#: ../../Qwen/source/run_locally/llama.cpp.md:45
#: 9b743c5ec03f425ebe92e37becc4d6cc
msgid "Getting the Program"
msgstr "获取程序"
#: ../../Qwen/source/run_locally/llama.cpp.md:47
#: 8d2cfc01b1bd49be8f445bc479bed875
msgid "You can get the programs in various ways. For optimal efficiency, we recommend compiling the programs locally, so you get the CPU optimizations for free. However, if you don't have C++ compilers locally, you can also install using package managers or downloading pre-built binaries. They could be less efficient but for non-production example use, they are fine."
msgstr "你可以通过多种方式获得 llama.cpp 中的程序。为了达到最佳效率,我们建议你本地编译程序,这样可以零成本享受CPU优化。但是,如果你的本地环境没有C++编译器,也可以使用包管理器安装或者下载预编译的二进制文件。虽然它们可能效率较低,但对于非生产用途的例子来说,它们已经足够好用了。"
#: ../../Qwen/source/run_locally/llama.cpp.md 137946d0ceaf41d48340f243b473d553
msgid "Compile Locally"
msgstr "本地编译"
#: ../../Qwen/source/run_locally/llama.cpp.md:56
#: b12ed52d2bac494b81cce504f4feabf7
msgid "Here, we show the basic command to compile `llama-cli` locally on **macOS** or **Linux**. For Windows or GPU users, please refer to [the guide from llama.cpp](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md)."
msgstr "这里,我们将展示在 **macOS** 或 **Linux** 上本地编译 `llama-cli` 的基本命令。对于 Windows 用户或 GPU 用户,请参考[llama.cpp的指南](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md)。"
#: ../../Qwen/source/run_locally/llama.cpp.md c60a1a9b078f466d8dad6883b58f4cc0
msgid "Installing Build Tools"
msgstr "安装构建工具"
#: ../../Qwen/source/run_locally/llama.cpp.md:63
#: d647649fd7bb486183318d413ed4ce53
msgid "To build locally, a C++ compiler and a build system tool are required. To see if they have been installed already, type `cc --version` or `cmake --version` in a terminal window."
msgstr "要进行本地构建,你需要一个C++编译器和一个构建系统工具。在终端窗口中输入`cc --version`或`cmake --version`,看看这些工具是否已经安装好了。"
#: ../../Qwen/source/run_locally/llama.cpp.md:65
#: 7f031447e44a4c6cb5b376267fb8860a
msgid "If installed, the build configuration of the tool will be printed to the terminal, and you are good to go!"
msgstr "如果已安装,工具的构建配置信息将被打印到终端,那么你就可以开始了!"
#: ../../Qwen/source/run_locally/llama.cpp.md:66
#: 58e9dc44f8df4a25a647b68d54c934f4
msgid "If errors are raised, you need to first install the related tools:"
msgstr "如果出现错误,说明你需要先安装相关工具:"
#: ../../Qwen/source/run_locally/llama.cpp.md:67
#: 9c92dddd2e3d44c78c448767851a75b2
msgid "On macOS, install with the command `xcode-select --install`"
msgstr "在macOS上,使用命令`xcode-select --install`来安装。"
#: ../../Qwen/source/run_locally/llama.cpp.md:68
#: 9e5163c275244ec4b1ac423afd7a1446
msgid "On Ubuntu, install with the command `sudo apt install build-essential`. For other Linux distributions, the command may vary; the essential packages needed for this guide are `gcc` and `cmake`."
msgstr "在Ubuntu上,使用命令`sudo apt install build-essential`来安装。对于其他Linux发行版,命令可能会有所不同;本指南所需的基本包是`gcc`和`cmake`。"
#: ../../Qwen/source/run_locally/llama.cpp.md 7974674c08ea450caef3ef9c4b775775
msgid "Compiling the Program"
msgstr "编译程序"
#: ../../Qwen/source/run_locally/llama.cpp.md:75
#: e996a1af17624e8a92d5a024f35384cb
msgid "For the first step, clone the repo and enter the directory:"
msgstr "第一步是克隆仓库并进入该目录:"
#: ../../Qwen/source/run_locally/llama.cpp.md:81
#: 26e2257b7d264ff098b9e9aac386e8bf
msgid "Then, build llama.cpp using CMake:"
msgstr "随后,使用 CMake 执行 llama.cpp 构建:"
#: ../../Qwen/source/run_locally/llama.cpp.md:87
#: f3dd6fd3218844cf87e77bd8d131bd9a
msgid "The first command will check the local environment and determine which backends and features should be included. The second command will actually build the programs."
msgstr "第一条命令将检查本地环境并确定需要包含的推理后端与特性。第二条命令将实际构建程序文件。"
#: ../../Qwen/source/run_locally/llama.cpp.md:90
#: 824a5208c02447bf9cad4998020a8053
msgid "To shorten the time, you can also enable parallel compiling based on the CPU cores you have, for example:"
msgstr "为了缩短时间,你还可以根据你的CPU核心数开启并行编译,例如:"
#: ../../Qwen/source/run_locally/llama.cpp.md:94
#: dc2d7a5d5aa440eb8b08e80ebbd09c4f
msgid "This will build the programs with 8 parallel compiling jobs."
msgstr "这将以8个并行编译任务来构建程序。"
#: ../../Qwen/source/run_locally/llama.cpp.md:96
#: 457edcce2b624750827dd017430617a2
msgid "The built programs will be in `./build/bin/`."
msgstr "结果将存于 `./build/bin/` 。"
#: ../../Qwen/source/run_locally/llama.cpp.md a80deae3ccbb4a41b5458bfe34ff2c41
msgid "Package Managers"
msgstr "软件包管理器"
#: ../../Qwen/source/run_locally/llama.cpp.md:101
#: d2f9b00de571423ab0b6134ab9e71495
msgid "For **macOS** and **Linux** users, `llama-cli` and `llama-server` can be installed with package managers including Homebrew, Nix, and Flox."
msgstr "对于**macOS**和**Linux**用户,`llama-cli` 和 `llama-server` 可以通过包括 Homebrew、Nix 和 Flox 在内的软件包管理器进行安装。"
#: ../../Qwen/source/run_locally/llama.cpp.md:103
#: fe4ac98dae374260982a1cd23772eedf
msgid "Here, we show how to install `llama-cli` and `llama-server` with Homebrew. For other package managers, please check the instructions [here](https://github.com/ggml-org/llama.cpp/blob/master/docs/install.md)."
msgstr "在这里,我们展示如何使用 Homebrew 安装 `llama-cli` 和 `llama-server` 。对于其他软件包管理器的安装,请查阅[这里的指南](https://github.com/ggml-org/llama.cpp/blob/master/docs/install.md)。"
#: ../../Qwen/source/run_locally/llama.cpp.md:106
#: 403b1540c5774684a2558eddf7855455
msgid "Installing with Homebrew is very simple:"
msgstr "使用 Homebrew 安装非常简单:"
#: ../../Qwen/source/run_locally/llama.cpp.md:108
#: e98fd923879c448da69e164a67468303
msgid "Ensure that Homebrew is available on your operating system. If you don't have Homebrew, you can install it as in [its website](https://brew.sh/)."
msgstr "请确保您的操作系统上已安装有 Homebrew。如果没有,您可以按照[官网](https://brew.sh/)上的指导进行安装。"
#: ../../Qwen/source/run_locally/llama.cpp.md:111
#: 8442a14a50884a9eac53d35673b8bb95
msgid "Second, you can install the pre-built binaries, `llama-cli` and `llama-server` included, with a single command:"
msgstr "其次,您只需一条命令即可安装预先编译好的二进制文件,其中包括 `llama-cli` 和 `llama-server` :"
#: ../../Qwen/source/run_locally/llama.cpp.md:116
#: 6dacdef061db483eb52089e13e0d9a83
msgid "Note that the installed binaries might not be built with the optimal compile options for your hardware, which can lead to poor performance. They also don't support GPU on Linux systems."
msgstr "请注意,安装的二进制文件可能并未针对您的硬件优化编译选项,这可能导致性能不佳。此外,在 Linux 系统上它们也不支持 GPU。"
#: ../../Qwen/source/run_locally/llama.cpp.md 54ca313d26be41de889551e4644f5aa9
msgid "Binary Release"
msgstr "二进制文件"
#: ../../Qwen/source/run_locally/llama.cpp.md:122
#: 8442042815744172bc4b946609019536
msgid "You can also download pre-built binaries from [GitHub Releases](https://github.com/ggml-org/llama.cpp/releases). Please note that those pre-built binaries files are architecture-, backend-, and os-specific. If you are not sure what those mean, you probably don't want to use them and running with incompatible versions will most likely fail or lead to poor performance."
msgstr "您还可以从[GitHub Release](https://github.com/ggml-org/llama.cpp/releases)下载预构建的二进制文件。请注意,这些预构建的二进制文件是特定于架构、后端和操作系统的。如果您不确定这些意味着什么,可能您并不想使用它们。使用不兼容的版本很可能导致运行失败或性能不佳。"
#: ../../Qwen/source/run_locally/llama.cpp.md:126
#: 72ca9d49995445f29bc82323f4085df0
msgid "The file name is like `llama-<version>-bin-<os>-<feature>-<arch>.zip`."
msgstr "文件名类似于`llama-<version>-bin-<os>-<feature>-<arch>.zip`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:128
#: f23e3e64be5d492984da99da55b7f9ff
msgid "There are three simple parts:"
msgstr "分为三个简单部分:"
#: ../../Qwen/source/run_locally/llama.cpp.md:129
#: 7b7cdecdb4a94bc8aa29bdb490c55051
msgid "`<version>`: the version of llama.cpp. The latest is preferred, but as llama.cpp is updated and released frequently, the latest may contain bugs. If the latest version does not work, try the previous release until it works."
msgstr "`<version>`:llama.cpp的版本。建议使用最新版本,但鉴于llama.cpp频繁更新和发布,最新版本可能包含bug。如果最新版本无法正常工作,请尝试之前的版本直到找到能正常工作的为止。"
#: ../../Qwen/source/run_locally/llama.cpp.md:130
#: 3125b20e47824342a931b14f1ece0db4
msgid "`<os>`: the operating system. `win` for Windows; `macos` for macOS; `linux` for Linux."
msgstr "`<os>`:操作系统。`win`代表Windows;`macos`代表macOS;`linux`代表Linux。"
#: ../../Qwen/source/run_locally/llama.cpp.md:131
#: 75cd1de21b164b54afe1b1593ba034ca
msgid "`<arch>`: the system architecture. `x64` for `x86_64`, e.g., most Intel and AMD systems, including Intel Mac; `arm64` for `arm64`, e.g., Apple Silicon or Snapdragon-based systems."
msgstr "`<arch>`:系统架构。`x64`对应`x86_64`,例如大多数Intel和AMD系统,包括Intel Mac;`arm64`对应`arm64`,例如Apple Silicon或基于Snapdragon的系统。"
#: ../../Qwen/source/run_locally/llama.cpp.md:133
#: 2a54ebbd822e4d28a3351bff53bb0d6d
msgid "The `<feature>` part is somewhat complicated for Windows:"
msgstr "`<feature>`部分对于Windows来说有些复杂:"
#: ../../Qwen/source/run_locally/llama.cpp.md:134
#: 18421e8375054a0aa729075a1a512e47
msgid "Running on CPU"
msgstr "在CPU上运行"
#: ../../Qwen/source/run_locally/llama.cpp.md:135
#: 600e9f1569ea4d96a1eedb0e07a1ef57
msgid "x86_64 CPUs: We suggest try the `avx2` one first."
msgstr "x86_64 CPU:我们建议首先尝试`avx2`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:136
#: 34d9757e8e9b43cd84aed0adff92b0f0
msgid "`noavx`: No hardware acceleration at all."
msgstr "`noavx`:完全无AVX硬件加速。"
#: ../../Qwen/source/run_locally/llama.cpp.md:137
#: d7bb327d152d4db0a98d91b9e17bedca
msgid "`avx2`, `avx`, `avx512`: SIMD-based acceleration. Most modern desktop CPUs should support avx2, and some CPUs support `avx512`."
msgstr "`avx2`,`avx`,`avx512`:基于SIMD的加速。大多数现代桌面CPU应该支持AVX2,部分CPU支持AVX512。"
#: ../../Qwen/source/run_locally/llama.cpp.md:138
#: c755b0207d034649a7e694715d0125a5
msgid "`openblas`: Relying on OpenBLAS for acceleration for prompt processing but not generation."
msgstr "`openblas`:依赖OpenBLAS加速提示词(prompt)处理,但不涉及生成过程。"
#: ../../Qwen/source/run_locally/llama.cpp.md:139
#: f4e7484610ce41bfb7f1cb3cd45f47ce
msgid "arm64 CPUs: We suggest try the `llvm` one first."
msgstr "arm64 CPU:我们建议首先尝试`llvm`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:140
#: d92755ab934c4486a99912e8b3447f75
#, fuzzy
msgid "[`llvm` and `msvc`](https://github.com/ggml-org/llama.cpp/pull/7191) are different compilers"
msgstr "[`llvm`和`msvc`](https://github.com/ggerganov/llama.cpp/pull/7191)是不同的编译器"
#: ../../Qwen/source/run_locally/llama.cpp.md:141
#: 145fbf58d2d64f53a47d30eea1d6996a
msgid "Running on GPU: We suggest try the `cu<cuda_verison>` one for NVIDIA GPUs, `kompute` for AMD GPUs, and `sycl` for Intel GPUs first. Ensure that you have related drivers installed."
msgstr "在GPU上运行:我们建议NVIDIA GPU先尝试`cu<cuda_verison>`,AMD GPU先尝试`kompute`,Intel GPU先尝试`sycl`。请确保已安装相关驱动程序。"
#: ../../Qwen/source/run_locally/llama.cpp.md:142
#: d7d306d907ba4e0198cd5780462e45e1
msgid "[`vulcan`](https://github.com/ggml-org/llama.cpp/pull/2059): support certain NVIDIA and AMD GPUs"
msgstr "[`vulcan`](https://github.com/ggml-org/llama.cpp/pull/2059):支持某些NVIDIA和AMD GPU"
#: ../../Qwen/source/run_locally/llama.cpp.md:143
#: 2b96eed7f16a4e7db1b772c6b393533f
msgid "[`kompute`](https://github.com/ggml-org/llama.cpp/pull/4456): support certain NVIDIA and AMD GPUs"
msgstr "[`kompute`](https://github.com/ggml-org/llama.cpp/pull/4456):支持某些NVIDIA和AMD GPU"
#: ../../Qwen/source/run_locally/llama.cpp.md:144
#: 15f5608b06594bb8a48bb02bad7f6f73
msgid "[`sycl`](https://github.com/ggml-org/llama.cpp/discussions/5138): Intel GPUs, oneAPI runtime is included"
msgstr "[`sycl`](https://github.com/ggml-org/llama.cpp/discussions/5138):Intel GPU,包含oneAPI运行时"
#: ../../Qwen/source/run_locally/llama.cpp.md:145
#: e0a1a84d55c3401aa6d865d91578591c
msgid "`cu<cuda_verison>`: NVIDIA GPUs, CUDA runtime is not included. You can download the `cudart-llama-bin-win-cu<cuda_version>-x64.zip` and unzip it to the same directory if you don't have the corresponding CUDA toolkit installed."
msgstr "`cu<cuda_verison>`:NVIDIA GPU,未包含CUDA运行时。如果您没有安装相应的CUDA工具包,可以下载`cudart-llama-bin-win-cu<cuda_version>-x64.zip`并将其解压到同一目录中。"
#: ../../Qwen/source/run_locally/llama.cpp.md:147
#: caffe142d6d34cd9977f32172a543d0f
msgid "You don't have much choice for macOS or Linux."
msgstr "对于macOS或Linux,您的选择不多。"
#: ../../Qwen/source/run_locally/llama.cpp.md:148
#: b6638fc41ce1410eba66aca507ec3784
msgid "Linux: only one prebuilt binary, `llama-<version>-bin-linux-x64.zip`, supporting CPU."
msgstr "Linux:仅有一个预构建的二进制文件`llama-<version>-bin-linux-x64.zip`,支持CPU。"
#: ../../Qwen/source/run_locally/llama.cpp.md:149
#: 43205e30e3e24d91a302ca9c72a8467c
msgid "macOS: `llama-<version>-bin-macos-x64.zip` for Intel Mac with no GPU support; `llama-<version>-bin-macos-arm64.zip` for Apple Silicon with GPU support."
msgstr "macOS:对于Intel Mac,使用`llama-<version>-bin-macos-x64.zip`(不支持GPU);对于Apple Silicon,使用`llama-<version>-bin-macos-arm64.zip`(支持GPU)。"
#: ../../Qwen/source/run_locally/llama.cpp.md:151
#: ee63fcc0d70744b885256719742d397e
msgid "After downloading the `.zip` file, unzip them into a directory and open a terminal at that directory."
msgstr "下载`.zip`文件后,将其解压到一个目录中,并在该目录下打开终端。"
#: ../../Qwen/source/run_locally/llama.cpp.md:156
#: fb60a22681e6451b9cbf8b0d581f75b5
msgid "Getting the GGUF"
msgstr "获取 GGUF"
#: ../../Qwen/source/run_locally/llama.cpp.md:158
#: 21b68c30d5c349d3b19ba3aa68be1ee0
msgid "GGUF[^GGUF] is a file format for storing information needed to run a model, including but not limited to model weights, model hyperparameters, default generation configuration, and tokenizer."
msgstr "GGUF[^GGUF] 是一种文件格式,用于存储运行模型所需的信息,包括但不限于模型权重、模型超参数、默认生成配置和tokenzier。"
#: ../../Qwen/source/run_locally/llama.cpp.md:160
#: 2f6f264d1f7544c1942dfa083cdc79d2
msgid "You can use the official Qwen GGUFs from our HuggingFace Hub or prepare your own GGUF file."
msgstr "您可以使用我们 HuggingFace Hub 上的官方 Qwen GGUF 文件,或者自己准备 GGUF 文件。"
#: ../../Qwen/source/run_locally/llama.cpp.md:162
#: 5fa4bf13f78942128dce441ff6342dd5
msgid "Using the Official Qwen3 GGUFs"
msgstr "使用官方 Qwen3 GGUF"
#: ../../Qwen/source/run_locally/llama.cpp.md:164
#: 305b8e040240411cb25fa3d659b3c9ce
msgid "We provide a series of GGUF models in our HuggingFace organization, and to search for what you need you can search the repo names with `-GGUF`."
msgstr "在我们的 HuggingFace 组织中,我们提供了一系列 GGUF 模型。要查找您需要的模型,可以在仓库名称中搜索 `-GGUF`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:166
#: 8e5a49035e5143628b55f2c1b8d5bc73
msgid "Download the GGUF model that you want with `huggingface-cli` (you need to install it first with `pip install huggingface_hub`):"
msgstr "使用 `huggingface-cli` 下载您想要的 GGUF 模型(首先需要通过 `pip install huggingface_hub` 进行安装):"
#: ../../Qwen/source/run_locally/llama.cpp.md:171
#: bf2d06f6b43140f3af26203bd18a5b4d
msgid "For example:"
msgstr "比如:"
#: ../../Qwen/source/run_locally/llama.cpp.md:176
#: cd157baad53d4c8889b0f7aafa7f887b
msgid "This will download the Qwen3-8B model in GGUF format quantized with the scheme Q4_K_M."
msgstr "这将下载采用 Q4_K_M 方案量化的 GGUF 格式的 Qwen3-8B model 模型。"
#: ../../Qwen/source/run_locally/llama.cpp.md:178
#: bd5c9e3ec9994ecba81c83e1f9077427
msgid "Preparing Your Own GGUF"
msgstr "准备您自己的 GGUF"
#: ../../Qwen/source/run_locally/llama.cpp.md:180
#: 6e7a23375e09420fb100575c510ad291
msgid "Model files from HuggingFace Hub can be converted to GGUF, using the `convert-hf-to-gguf.py` Python script. It does require you to have a working Python environment with at least `transformers` installed."
msgstr "可以使用 `convert-hf-to-gguf.py` Python 脚本将来自 HuggingFace Hub 的模型文件转换为 GGUF。这确实需要您拥有一个工作中的 Python 环境,并至少安装了 `transformers`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:183
#: 9977b9a396c0417db6056d5c435e150d
msgid "Obtain the source file if you haven't already:"
msgstr "如果尚未获取,请先获取源文件:"
#: ../../Qwen/source/run_locally/llama.cpp.md:189
#: 8170538bc1124019a5224fe42d298427
msgid "Suppose you would like to use Qwen3-8B you can make a GGUF file for the fp16 model as shown below:"
msgstr "假设您想使用 Qwen3-8B,可以按照以下方式为 fp16 模型制作 GGUF 文件:"
#: ../../Qwen/source/run_locally/llama.cpp.md:193
#: 08b6b540b87d4ff09613f08ed615c1df
msgid "The first argument to the script refers to the path to the HF model directory or the HF model name, and the second argument refers to the path of your output GGUF file. Remember to create the output directory before you run the command."
msgstr "脚本的第一个参数指的是 HF 模型目录或 HF 模型名称的路径,第二个参数指的是输出 GGUF 文件的路径。在运行命令前,请记得创建输出目录。"
#: ../../Qwen/source/run_locally/llama.cpp.md:196
#: 939cf0b39ee641cb8a27f9d11996b191
msgid "The fp16 model could be a bit heavy for running locally, and you can quantize the model as needed. We introduce the method of creating and quantizing GGUF files in [this guide](../quantization/llama.cpp). You can refer to that document for more information."
msgstr "fp16 模型对于本地运行可能有些重,您可以根据需要对模型进行量化。我们在 [这份指南](../quantization/llama.cpp) 中介绍了创建和量化 GGUF 文件的方法。您可以参考该文档获取更多信息。"
#: ../../Qwen/source/run_locally/llama.cpp.md:201
#: 33098e99c7a04ec98db9e802a6af49a0
msgid "Run Qwen with llama.cpp"
msgstr "使用 llama.cpp 运行 Qwen"
#: ../../Qwen/source/run_locally/llama.cpp.md:204
#: 754f5addb1cc406b8af2695a3dece765
msgid "Regarding switching between thinking and non-thinking modes, while the soft switch is always available, the hard switch implemented in the chat template is not exposed in llama.cpp. The quick workaround is to pass a custom chat template equivalennt to always `enable_thinking=False` via `--chat-template-file`."
msgstr "关于在思考模式和非思考模式之间切换,虽然软开关始终可用,但在聊天模板中实现的硬开关并未在 llama.cpp 中暴露。快速的解决方法是通过 `--chat-template-file` 传递一个等效于始终设置 `enable_thinking=False` 的自定义聊天模板。"
#: ../../Qwen/source/run_locally/llama.cpp.md:210
#: 6a12aa67f5f348e9948e8f789e3abecb
msgid "llama-cli"
msgstr ""
#: ../../Qwen/source/run_locally/llama.cpp.md:212
#: d08abfb6cd4a432db6963e07c560f5bd
msgid "[llama-cli](https://github.com/ggml-org/llama.cpp/tree/master/examples/main) is a console program which can be used to chat with LLMs. Simple run the following command where you place the llama.cpp programs:"
msgstr "[llama-cli](https://github.com/ggml-org/llama.cpp/tree/master/examples/main) 是一个可用于与大型语言模型聊天的控制台程序。只需在放置 llama.cpp 程序的位置运行以下命令:"
#: ../../Qwen/source/run_locally/llama.cpp.md:218
#: a746a1b907d74e4882acf7ca17b5a02c
msgid "Here are some explanations to the above command:"
msgstr "以下是对上述命令的一些解释:"
#: ../../Qwen/source/run_locally/llama.cpp.md:219
#: 7af666e145aa4d48b44609d8d1835b71
msgid "**Model**: llama-cli supports using model files from local path, remote url, or HuggingFace hub."
msgstr "**模型**:llama-cli 支持从本地路径、远程 URL 或 HuggingFace Hub 使用模型文件。"
#: ../../Qwen/source/run_locally/llama.cpp.md:220
#: 63c7c39cdf24490c91eda18659c94fb3
msgid "`-hf Qwen/Qwen3-8B-GGUF:Q8_0` in the above indicates we are using the model file from HuggingFace hub"
msgstr "上面的 `-hf Qwen/Qwen3-8B-GGUF:Q8_0` 表示我们使用的是来自 HuggingFace Hub 的模型文件。"
#: ../../Qwen/source/run_locally/llama.cpp.md:221
#: 6e774ad7835c412c8280f5b22adccac5
msgid "To use a local path, pass `-m qwen3-8b-q8_0.gguf` instead"
msgstr "要使用本地路径,传递 `-m qwen3-8b-q8_0.gguf` 即可。"
#: ../../Qwen/source/run_locally/llama.cpp.md:222
#: a3991a20f31044e2ae60c1a46e819ffc
msgid "To use a remote url, pass `-mu https://hf.co/Qwen/Qwen3-8B-GGUF/resolve/main/qwen3-8b-Q8_0.gguf?download=true` instead"
msgstr "要使用远程 URL,传递 `-mu https://hf.co/Qwen/Qwen3-8B-GGUF/resolve/main/qwen3-8b-Q8_0.gguf?download=true` 即可。"
#: ../../Qwen/source/run_locally/llama.cpp.md:224
#: 53f71a56c76e40789bf5ba7e0c9eb2e6
msgid "**Speed Optimization**:"
msgstr "**速度优化**:"
#: ../../Qwen/source/run_locally/llama.cpp.md:225
#: 079f880149d64ac694b6bebd306564bd
msgid "CPU: llama-cli by default will use CPU and you can change `-t` to specify how many threads you would like it to use, e.g., `-t 8` means using 8 threads."
msgstr "CPU:llama-cli 默认会使用 CPU,您可以通过更改 `-t` 来指定希望使用的线程数,例如 `-t 8` 表示使用 8 个线程。"
#: ../../Qwen/source/run_locally/llama.cpp.md:226
#: 6e10dabde41c404f93bb1148057fee75
msgid "GPU: If the programs are bulit with GPU support, you can use `-ngl`, which allows offloading some layers to the GPU for computation. If there are multiple GPUs, it will offload to all the GPUs. You can use `-dev` to control the devices used and `-sm` to control which kinds of parallelism is used. For example, `-ngl 99 -dev cuda0,cuda1 -sm row` means offload all layers to GPU 0 and GPU1 using the split mode row. Adding `-fa` may also speed up the generation."
msgstr "GPU:如果程序包含 GPU 支持,您可以使用 `-ngl`,它允许将一些层卸载到 GPU 进行计算。如果有多个 GPU,它会卸载到所有 GPU 上。您可以使用 `-dev` 控制使用的设备,并使用 `-sm` 控制使用的并行类型。例如,`-ngl 99 -dev cuda0,cuda1 -sm row` 表示使用 row 切分将所有层卸载到 GPU 0 和 GPU 1。添加 `-fa` 也可能加速生成。"
#: ../../Qwen/source/run_locally/llama.cpp.md:232
#: ab04fbce840f4307a1eac8d31602c09a
msgid "**Sampling Parameters**: llama.cpp supports [a variety of sampling methods](https://github.com/ggml-org/llama.cpp/tree/master/examples/main#generation-flags) and has default configuration for many of them. It is recommended to adjust those parameters according to the actual case and the recommended parameters from Qwen3 modelcard could be used as a reference. If you encounter repetition and endless generation, it is recommended to pass in addition `--presence-penalty` up to `2.0`."
msgstr "**采样参数**:llama.cpp 支持[多种采样方法](https://github.com/ggml-org/llama.cpp/tree/master/examples/main#generation-flags),并对其中许多方法有默认配置。建议根据实际情况调整这些参数,Qwen3 模型卡片中推荐的参数可作为参考。如果您遇到重复和无尽生成的情况,建议额外传递 `--presence-penalty`,最大值为 `2.0`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:236
#: 877d32d7b0314d08b7503762b3a6ae5b
msgid "**Context Management**: llama.cpp adopts the \"rotating\" context management by default. The `-c` controls the maximum context length (default 4096, 0 means loaded from model), and `-n` controls the maximum generation length each time (default -1 means infinite until ending, -2 means until context full). When the context is full but the generation doesn't end, the first `--keep` tokens (default 0, -1 means all) from the initial prompt is kept, and the first half of the rest is discarded. Then, the model continues to generate based on the new context tokens. You can set `--no-context-shift` to prevent this rotating behaviour and the generation will stop once `-c` is reached."
msgstr "**上下文管理**:llama.cpp 默认采用“轮换”上下文管理方式。`-c` 控制最大上下文长度(默认值 4096,0 表示从模型加载),`-n` 控制每次生成的最大长度(默认值 -1 表示无限生成直到结束,-2 表示直到上下文满)。当上下文已满但生成未结束时,初始提示中的前 `--keep` 个 token(默认值 0,-1 表示全部)会被保留,其余部分的前半部分会被丢弃。然后,模型基于新的上下文 token 继续生成。您可以设置 `--no-context-shift` 来防止这种轮换行为,一旦达到 `-c`,生成就会停止。"
#: ../../Qwen/source/run_locally/llama.cpp.md:242
#: 045740d5ab9647db91ba072b92f20b24
msgid "llama.cpp supports YaRN, which can be enabled by `-c 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768`."
msgstr "llama.cpp 支持 YaRN,可以通过 `-c 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768` 启用。"
#: ../../Qwen/source/run_locally/llama.cpp.md:243
#: 7ea67acb74ce4152b7924d3bb4d8a9b1
msgid "**Chat**: `--jinja` indicates using the chat template embedded in the GGUF which is prefered and `--color` indicates coloring the texts so that user input and model output can be better differentiated. If there is a chat template, like in Qwen3 models, llama-cli will enter chat mode automatically. To stop generation or exit press \"Ctrl+C\". You can use `-sys` to add a system prompt."
msgstr "**聊天**:`--jinja` 表示使用嵌入在 GGUF 中的聊天模板(推荐),`--color` 表示对文本进行着色,以便更好地区分用户输入和模型输出。如果有聊天模板(如 Qwen3 模型中),llama-cli 将自动进入聊天模式。要停止生成或退出,请按 \"Ctrl+C\"。您可以使用 `-sys` 添加系统提示。"
#: ../../Qwen/source/run_locally/llama.cpp.md:249
#: 88cb9458091b412d88a892764fa67bdb
msgid "llama-server"
msgstr ""
#: ../../Qwen/source/run_locally/llama.cpp.md:251
#: 2873c333020b4fd591988547419c0234
msgid "[llama-server](https://github.com/ggml-org/llama.cpp/tree/master/examples/server) is a simple HTTP server, including a set of LLM REST APIs and a simple web front end to interact with LLMs using llama.cpp."
msgstr "[llama-server](https://github.com/ggml-org/llama.cpp/tree/master/examples/server) 是一个简单的 HTTP 服务器,包含一组 LLM REST API 和一个简单的 Web 前端,用于通过 llama.cpp 与大型语言模型交互。"
#: ../../Qwen/source/run_locally/llama.cpp.md:253
#: c8c19cde1d2a4d9894b7438e00dbb7b5
msgid "The core command is similar to that of llama-cli. In addition, it supports thinking content parsing and tool call parsing."
msgstr "其核心命令与 llama-cli 类似。此外,它还支持思考内容解析和工具调用解析。"
#: ../../Qwen/source/run_locally/llama.cpp.md:260
#: 24590adb336a48a49938a50f3071714c
msgid "By default the server will listen at `http://localhost:8080` which can be changed by passing `--host` and `--port`. The web front end can be assess from a browser at `http://localhost:8080/`. The OpenAI compatible API is at `http://localhost:8080/v1/`."
msgstr "默认情况下,服务器将在 `http://localhost:8080` 监听,可以通过传递 `--host` 和 `--port` 更改。Web 前端可以通过浏览器访问 `http://localhost:8080/`。兼容 OpenAI 的 API 位于 `http://localhost:8080/v1/`。"
#: ../../Qwen/source/run_locally/llama.cpp.md:265
#: d84b5c974b724852900b5c2ed8cb8bd8
msgid "What's More"
msgstr "还有更多"
#: ../../Qwen/source/run_locally/llama.cpp.md:267
#: d6b97b2133f24613bc91ea09bd1bdda1
msgid "If you still find it difficult to use llama.cpp, don't worry, just check out other llama.cpp-based applications. For example, Qwen3 has already been officially part of Ollama and LM Studio, which are platforms for your to search and run local LLMs."
msgstr "如果你仍然觉得使用`llama-cli`有困难,别担心,可以尝试其他基于llama.cpp的应用程序。例如,Qwen3已经成为Ollama和LM Studio的官方组成部分,它们是用于搜索和运行本地LLM的平台。"
#: ../../Qwen/source/run_locally/llama.cpp.md:270
#: 1b9178ea95a64a6badee5291e042b1f8
msgid "Have fun!"
msgstr "玩得开心!"
#: ../../Qwen/source/run_locally/llama.cpp.md:3
#: 1b3eec2d725f4dbfa06f9f0d88e053dd
msgid "GPT-Generated Unified Format"
msgstr ""
#~ msgid "Previously, Qwen2 models generate nonsense like `GGGG...` with `llama.cpp` on GPUs. The workaround is to enable flash attention (`-fa`), which uses a different implementation, and offload the whole model to the GPU (`-ngl 80`) due to broken partial GPU offloading with flash attention."
#~ msgstr "曾有一段时间,在 GPU 上用 `llama.cpp` 运行 Qwen2 模型会生成类似 `GGGG...` 的胡言乱语。一个权宜之计是开启 flash attention (`-fa`) 并将全模型加载到 GPU 上 (`-ngl 80`) 。前者使用不同的算法实现,后者避免触发 flash attention 在模型一部分 GPU 加载时的异常。"
#~ msgid "Both should be no longer necessary after `b3370`, but it is still recommended enabling both for maximum efficiency."
#~ msgstr "自版本 `b3370` 起,以上方案已非必需。但考虑最佳效率,仍建议使用两项参数。"
#~ msgid "![llama-cli conversation start](../assets/imgs/llama-cli-cnv-start.png)"
#~ msgstr ""
#~ msgid "llama-cli conversation start"
#~ msgstr "llama-cli 对话开始"
#~ msgid "![llama-cli conversation chat](../assets/imgs/llama-cli-cnv-chat.png)"
#~ msgstr ""
#~ msgid "llama-cli conversation chat"
#~ msgstr "llama-cli 对话聊天"
#~ msgid "![llama-cli interactive first](../assets/imgs/llama-cli-if.png)"
#~ msgstr ""
#~ msgid "llama-cli interactive first"
#~ msgstr "llama-cli 互动模式用户优先"
#~ msgid "![llama-cli interactive](../assets/imgs/llama-cli-i.png)"
#~ msgstr ""
#~ msgid "llama-cli interactive"
#~ msgstr "llama-cli 互动模式"
#~ msgid "The main output is as follows: ![llama-cli](../assets/imgs/llama-cli.png)"
#~ msgstr "主要输出如下所示: ![llama-cli](../assets/imgs/llama-cli.png)"
#~ msgid "llama-cli"
#~ msgstr ""
#~ msgid "![llama-cli mid](../assets/imgs/llama-cli-mid.png)"
#~ msgstr ""
#~ msgid "llama-cli mid"
#~ msgstr "llama-cli 中间"
#~ msgid "Get the `llama-cli` program"
#~ msgstr "获取 `llama-cli` 程序"
#~ msgid "Remember that `llama-cli` is an example program, not a full-blown application. Sometimes it just does not work in the way you would like. This guide could also get quite technical sometimes. If you would like a smooth experience, check out the application mentioned above, which are much easier to \"use\"."
#~ msgstr "请记住,`llama-cli` 只是一个示例程序,并非完整应用。有时候它可能无法完全按照您的期望运行。本指南有时会涉及一些技术细节。如果您希望获得流畅的体验,请尝试上述提到的应用,它们使用起来会更加便捷。"
#~ msgid "Then use `make`:"
#~ msgstr "然后运行 `make` 命令:"
#~ msgid "The command will only compile the parts needed for `llama-cli`. On macOS, it will enable Metal and Accelerate by default, so you can run with GPUs. On Linux, you won't get GPU support by default, but SIMD-optimization is enabled if available."
#~ msgstr "该命令只会编译`llama-cli`所需的部件。在macOS上,默认情况下会启用Metal和Accelerate,因此你可以使用GPU运行。在Linux上,默认情况下你无法获得GPU支持,但如果可用,会启用CPU SIMD优化。"
#~ msgid "There are other [example programs](https://github.com/ggerganov/llama.cpp/tree/master/examples) in llama.cpp. You can build them at once with simply (it may take some time):"
#~ msgstr "在llama.cpp中还有其他的[示例程序](https://github.com/ggerganov/llama.cpp/tree/master/examples),你可以一次构建它们(可能需要一些时间):"
#~ msgid "or you can also compile only the one you need, for example:"
#~ msgstr "你也可以只编译你需要的,例如:"
#~ msgid "Running the Model"
#~ msgstr "运行模型"
#~ msgid "Due to random sampling and source code updates, the generated content with the same command as given in this section may be different from what is shown in the examples."
#~ msgstr "由于随机采样和源代码更新,使用本节中给出的相同命令生成的内容可能与示例中显示的不同。"
#~ msgid "`llama-cli` provide multiple \"mode\" to \"interact\" with the model. Here, we demonstrate three ways to run the model, with increasing difficulty."
#~ msgstr "`llama-cli` 提供多种“模式”来与模型进行“交互”。在这里,我们展示三种运行模型的方法,使用难度逐渐增加。"
#~ msgid "Conversation Mode"
#~ msgstr "对话模式"
#~ msgid "For users, to achieve chatbot-like experience, it is recommended to commence in the conversation mode"
#~ msgstr "对于普通用户来说,为了获得类似聊天机器人的体验,建议从对话模式开始。"
#~ msgid "The program will first print metadata to the screen until you see the following:"
#~ msgstr "程序首先会在屏幕上打印元数据,直到你看到以下内容:"
#~ msgid "Now, the model is waiting for your input, and you can chat with the model:"
#~ msgstr "现在,模型正在等待你的输入,你可以与模型进行对话:"
#~ msgid "That's something, isn't it? You can stop the model generation anytime by Ctrl+C or Command+. However, if the model generation is ended and the control is returned to you, pressing the combination will exit the program."
#~ msgstr "这很有趣,对吧?你可以随时通过 Ctrl+C 或 Command+. 来停止模型生成。但是,如果模型生成结束并且控制权返回给你,按下组合键将会退出程序。"
#~ msgid "So what does the command we used actually do? Let's explain a little:"
#~ msgstr "那么,我们使用的命令实际上做了什么呢?让我们来解释一下:"
#~ msgid "-m or --model"
#~ msgstr "-m 或 --model"
#~ msgid "Model path, obviously."
#~ msgstr "显然,这是模型路径。"
#~ msgid "-co or --color"
#~ msgstr "-co 或 --color"
#~ msgid "Colorize output to distinguish prompt and user input from generations. Prompt text is dark yellow; user text is green; generated text is white; error text is red."
#~ msgstr "为输出着色以区分提示词、用户输入和生成的文本。提示文本为深黄色;用户文本为绿色;生成的文本为白色;错误文本为红色。"
#~ msgid "-cnv or --conversation"
#~ msgstr "-cnv 或 --conversation"
#~ msgid "Run in conversation mode. The program will apply the chat template accordingly."
#~ msgstr "在对话模式下运行。程序将相应地应用聊天模板。"
#~ msgid "-p or --prompt"
#~ msgstr "-p 或 --prompt"
#~ msgid "In conversation mode, it acts as the system message."
#~ msgstr "在对话模式下,它作为系统提示。"
#~ msgid "-fa or --flash-attn"
#~ msgstr "-fa 或 --flash-attn"
#~ msgid "Enable Flash Attention if the program is compiled with GPU support."
#~ msgstr "如果程序编译时支持 GPU,则启用Flash Attention注意力实现。"
#~ msgid "-ngl or --n-gpu-layers"
#~ msgstr "-ngl 或 --n-gpu-layers"
#~ msgid "Layers to the GPU for computation if the program is compiled with GPU support."
#~ msgstr "如果程序编译时支持 GPU,则将这么多层分配给 GPU 进行计算。"
#~ msgid "-n or --predict"
#~ msgstr "-n 或 --predict"
#~ msgid "Number of tokens to predict."
#~ msgstr "要预测的token数量。"
#~ msgid "You can also explore other options by"
#~ msgstr "你也可以通过以下方式探索其他选项:"
#~ msgid "Interactive Mode"
#~ msgstr "互动模式"
#~ msgid "The conversation mode hides the inner workings of LLMs. With interactive mode, you are made aware how LLMs work in the way to completion or continuation. The workflow is like"
#~ msgstr "对话模式隐藏了大型语言模型(LLMs)的内部机制。在互动模式下,你可以直观地了解LLMs如何完成或继续生成文本。工作流程如下"
#~ msgid "Give the model an initial prompt, and the model generates a completion."
#~ msgstr "给模型一个初始提示,模型会生成续写文本。"
#~ msgid "Interrupt the model generation any time or wait until the model generates a reverse prompt or an eos token."
#~ msgstr "随时中断模型生成,或者等到模型生成反向提示(reverse prompt)或结束token(eos token)。"
#~ msgid "Append new texts (with optional prefix and suffix), and then let the model continues the generation."
#~ msgstr "添加新文本(可选前缀和后缀),然后让模型继续生成。"
#~ msgid "Repeat Step 2. and Step 3."
#~ msgstr "重复步骤2和步骤3。"
#~ msgid "This workflow requires a different set of options, since you have to mind the chat template yourselves. To proper run the Qwen2.5 models, try the following:"
#~ msgstr "此工作流程需要一组不同的选项,因为你必须自己管理聊天模板。为了正确运行Qwen2.5模型,请尝试以下操作:"
#~ msgid "We use some new options here:"
#~ msgstr "我们在这里使用了一些新的选项:"
#~ msgid "-sp or --special"
#~ msgstr "-sp 或 --special"
#~ msgid "Show the special tokens."
#~ msgstr "显示特殊token。"
#~ msgid "-i or --interactive"
#~ msgstr "-i 或 --interactive"
#~ msgid "Enter interactive mode. You can interrupt model generation and append new texts."
#~ msgstr "进入互动模式。你可以中断模型生成并添加新文本。"
#~ msgid "-if or --interactive-first"
#~ msgstr "-if 或 --interactive-first"
#~ msgid "Immediately wait for user input. Otherwise, the model will run at once and generate based on the prompt."
#~ msgstr "立即等待用户输入。否则,模型将立即运行并根据提示生成文本。"
#~ msgid "In interactive mode, it is the contexts based on which the model predicts the continuation."
#~ msgstr "在互动模式下,这是模型续写用的上文。"
#~ msgid "--in-prefix"
#~ msgstr ""
#~ msgid "String to prefix user inputs with."
#~ msgstr "用户输入附加的前缀字符串。"
#~ msgid "--in-suffix"
#~ msgstr ""
#~ msgid "String to suffix after user inputs with."
#~ msgstr "用户输入附加的后缀字符串。"
#~ msgid "The result is like this:"
#~ msgstr "结果如下:"
#~ msgid "We use `prompt`, `in-prefix`, and `in-suffix` together to implement the chat template (ChatML-like) used by Qwen2.5 with a system message. So the experience is very similar to the conversation mode: you just need to type in the things you want to ask the model and don't need to worry about the chat template once the program starts. Note that, there should not be a new line after user input according to the template, so remember to end your input with `/`."
#~ msgstr "我们将 `prompt`、`in-prefix` 和 `in-suffix` 结合起来实现Qwen2.5使用的包含系统消息的聊天模板(类似ChatML)。这样的,体验与对话模式非常相似:你只需输入想要询问模型的内容,在程序启动后无需担心聊天模板。请注意,根据模板,用户输入后不应有换行符,所以请以 `/` 结束输入。"
#~ msgid "Advanced Usage"
#~ msgstr "高级用法"
#~ msgid "Interactive mode can achieve a lot more flexible workflows, under the condition that the chat template is maintained properly throughout. The following is an example:"
#~ msgstr "互动模式可以实现更灵活的工作流程,前提是整个过程中正确维护聊天模板。以下是一个示例:"
#~ msgid "In the above example, I set `--reverse-prompt` to `\"LLM\"` so that the generation is interrupted whenever the model generates `\"LLM\"`[^rp]. The in prefix and in suffix are also set to empty so that I can add content exactly I want. After every generation of `\"LLM\"`, I added the part `\"...not what you think...\"` which are not likely to be generated by the model. Yet the model can continue generation just as fluent, although the logic is broken the second time around. I think it's fun to play around."
#~ msgstr "在上面的例子中,我将 `--reverse-prompt` 设置为 `\"LLM\"`,以便每当模型生成 `\"LLM\"` 时中断生成过程[^rp]。前缀和后缀也被设置为空,这样我可以精确地添加想要的内容。每次生成 `\"LLM\"` 后,我添加了 `\"...not what you think...\"` 的部分,这部分不太可能由模型生成。然而,模型仍能继续流畅生成,尽管第二次逻辑被破坏。这很有趣,值得探索。"
#~ msgid "Non-interactive Mode"
#~ msgstr "非交互模式"
#~ msgid "You can also use `llama-cli` for text completion by using just the prompt. However, it also means you have to format the input properly and only one turn can be generated."
#~ msgstr "你还可以仅使用提示词,通过`llama-cli`完成文本续写。但这也意味着你需要正确格式化输入,并且只能生成一次回应。"
#~ msgid "The following is an example:"
#~ msgstr "以下是一个示例:"
#~ msgid "The main output is as follows:"
#~ msgstr "主要步骤如下:"
#~ msgid "In fact, you can start completion anywhere you want, even in the middle of an assistant message:"
#~ msgstr "实际上,你可以从任何你想要的地方开始续写,即使是在assistant消息的中间:"
#~ msgid "Now you can use `llama-cli` in three very different ways! Try talk to Qwen2.5 and share your experience with the community!"
#~ msgstr "现在你可以用三种截然不同的方式使用`llama-cli`了!试试和Qwen2.5对话,然后与社区分享你的体验吧!"
#~ msgid "There are some gotchas in using `--reverse-prompt` as it matches tokens instead of strings. Since the same string can be tokenized differently in different contexts in BPE tokenization, some reverse prompts are never matched even though the string does exist in generation."
#~ msgstr "`--reverse-prompt`在匹配时针对的是token而非字符串,因此使用时有一些需要注意的地方。由于BPE tokenizer在不同上下文中对相同字符串的tokenization结果可能不同,所以某些反向提示符即使在生成的文本中存在,也可能永远无法匹配成功。"
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/run_locally/mlx-lm.md:1 3b84b663657743c58c83a421e1302ac4
msgid "MLX-LM"
msgstr ""
#: ../../Qwen/source/run_locally/mlx-lm.md:4 49dc1a423dc3489abb616eeb839d53c6
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/run_locally/mlx-lm.md:7 e85ad62d5b504e0abcf1060c1fd7f2da
msgid "[mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms) helps you run LLMs locally on Apple Silicon. It is available at MacOS. It has already supported Qwen models and this time, we have also provided checkpoints that you can directly use with it."
msgstr "[mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms)能让你在Apple Silicon上运行大型语言模型,适用于MacOS。mlx-lm已支持Qwen模型,此次我们提供直接可用的模型文件。"
#: ../../Qwen/source/run_locally/mlx-lm.md:11 44c83220973e4571a64f9d7079493d2f
msgid "Prerequisites"
msgstr "准备工作"
#: ../../Qwen/source/run_locally/mlx-lm.md:13 058158dccfaa435186d7833a913db643
msgid "The easiest way to get started is to install the `mlx-lm` package:"
msgstr "首先需要安装`mlx-lm`包:"
#: ../../Qwen/source/run_locally/mlx-lm.md:15 3ae5d0d79fb64cb29c9f13c634fb5d67
msgid "with `pip`:"
msgstr "使用`pip`:"
#: ../../Qwen/source/run_locally/mlx-lm.md:21 2d6dd86369c6430392ed6c5e2891397d
msgid "with `conda`:"
msgstr "使用`conda`:"
#: ../../Qwen/source/run_locally/mlx-lm.md:27 c11938d4f28344d1b7c59374fd51b458
#, fuzzy
msgid "Running with Qwen MLX Files"
msgstr "使用Qwen MLX模型文件"
#: ../../Qwen/source/run_locally/mlx-lm.md:29 89cf4fcf7a5c4a63a21e5a8e0caf9496
msgid "We provide model checkpoints with `mlx-lm` in our Hugging Face organization, and to search for what you need you can search the repo names with `-MLX`."
msgstr "我们已在Hugging Face提供了适用于`mlx-lm`的模型文件,请搜索带`-MLX`的存储库。"
#: ../../Qwen/source/run_locally/mlx-lm.md:31 2b0d57b86ebc4d6da87a23f430def570
msgid "Here provides a code snippet with `apply_chat_template` to show you how to load the tokenizer and model and how to generate contents."
msgstr "这里我们展示了一个代码样例,其中使用了`apply_chat_template`来应用对话模板。"
#: ../../Qwen/source/run_locally/mlx-lm.md:52 ee8f71515ca34ebcb9c28b1f2e55b688
msgid "Make Your MLX files"
msgstr "自行制作MLX格式模型"
#: ../../Qwen/source/run_locally/mlx-lm.md:54 fc9029906db54f1593081b7e9aacbaf8
msgid "You can make mlx files with just one command:"
msgstr "仅用一条命令即可制作mlx格式模型"
#: ../../Qwen/source/run_locally/mlx-lm.md:60 d82e773a63ff4a2db2af161af7aeb1b7
msgid "where"
msgstr "参数含义分别是"
#: ../../Qwen/source/run_locally/mlx-lm.md:62 bcb1f835e45e4b54938c34d390846cf2
msgid "`--hf-path`: the model name on Hugging Face Hub or the local path"
msgstr "`--hf-path`: Hugging Face Hub上的模型名或本地路径"
#: ../../Qwen/source/run_locally/mlx-lm.md:63 c69929c8ea734b78bcb95872044c6a2a
msgid "`--mlx-path`: the path for output files"
msgstr "`--mlx-path`: 输出模型文件的存储路径"
#: ../../Qwen/source/run_locally/mlx-lm.md:64 d44a2b228f074e378094497d5bbd543f
msgid "`-q`: enable quantization"
msgstr "`-q`: 启用量化"
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/run_locally/ollama.md:1 b115922199224f47bb532b26bf224620
msgid "Ollama"
msgstr "Ollama"
#: ../../Qwen/source/run_locally/ollama.md:4 d0b6051e7f0744dc894cdaa685a03e14
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/run_locally/ollama.md:7 8e9346ee9e40484ca07e522a28c10a0f
msgid "[Ollama](https://ollama.com/) helps you run LLMs locally with only a few commands. It is available at MacOS, Linux, and Windows. Now, Qwen2.5 is officially on Ollama, and you can run it with one command:"
msgstr "[Ollama](https://ollama.com/)帮助您通过少量命令即可在本地运行LLM。它适用于MacOS、Linux和Windows操作系统。现在,Qwen2.5正式上线Ollama,您只需一条命令即可运行它:"
#: ../../Qwen/source/run_locally/ollama.md:15 d320e9f92d7c4e91a6dc80e999b28823
msgid "Next, we introduce more detailed usages of Ollama for running Qwen2.5 models."
msgstr "接着,我们介绍在Ollama使用Qwen2.5模型的更多用法"
#: ../../Qwen/source/run_locally/ollama.md:17 51b225372f6c4b94af27930e3aaefd7e
msgid "Quickstart"
msgstr "快速开始"
#: ../../Qwen/source/run_locally/ollama.md:19 12459a52b35b401085439f7bb53f911d
msgid "Visit the official website [Ollama](https://ollama.com/) and click download to install Ollama on your device. You can also search models on the website, where you can find the Qwen2.5 models. Except for the default one, you can choose to run Qwen2.5-Instruct models of different sizes by:"
msgstr "访问官方网站[Ollama](https://ollama.com/),点击`Download`以在您的设备上安装Ollama。您还可以在网站上搜索模型,在这里您可以找到Qwen2.5系列模型。除了默认模型之外,您可以通过以下方式选择运行不同大小的Qwen2.5-Instruct模型:"
#: ../../Qwen/source/run_locally/ollama.md:23 0367670c6d70484b9be2532d1c29e185
msgid "`ollama run qwen2.5:0.5b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:24 ab112db6e61547109ce8ea0c4c012471
msgid "`ollama run qwen2.5:1.5b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:25 183f671bddee433faf2a4a329dcb55eb
msgid "`ollama run qwen2.5:3b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:26 36a8777ecbeb48769efec7ece108735d
msgid "`ollama run qwen2.5:7b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:27 f4651bc149b94406b85096f4783c5e53
msgid "`ollama run qwen2.5:14b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:28 6c9850d60512459988a6b73a6b1e01fc
msgid "`ollama run qwen2.5:32b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:29 a96e5ef6669c485399d85ef441f6306c
msgid "`ollama run qwen2.5:72b`"
msgstr ""
#: ../../Qwen/source/run_locally/ollama.md:32 3e61ed7de75245559bbca559ded82630
msgid "`ollama` does not host base models. Even though the tag may not have the instruct suffix, they are all instruct models."
msgstr "`ollama`并不托管基模型。即便模型标签不带instruct后缀,实际也是instruct模型。"
#: ../../Qwen/source/run_locally/ollama.md:36 85b7620bbb1e4d38833b5a231e9f12c2
msgid "Run Ollama with Your GGUF Files"
msgstr "用Ollama运行你自己的GGUF文件"
#: ../../Qwen/source/run_locally/ollama.md:38 29df4213bffb4bc7a970d7ce86ad275f
msgid "Sometimes you don't want to pull models and you just want to use Ollama with your own GGUF files. Suppose you have a GGUF file of Qwen2.5, `qwen2.5-7b-instruct-q5_0.gguf`. For the first step, you need to create a file called `Modelfile`. The content of the file is shown below:"
msgstr "有时您可能不想拉取模型,而是希望直接使用自己的GGUF文件来配合Ollama。假设您有一个名为`qwen2.5-7b-instruct-q5_0.gguf`的Qwen2.5的GGUF文件。在第一步中,您需要创建一个名为`Modelfile`的文件。该文件的内容如下所示:"
#: ../../Qwen/source/run_locally/ollama.md:101 19c9382f6bfc4aeda355f5745988fadb
msgid "Then create the ollama model by running:"
msgstr "然后通过运行下列命令来创建一个ollama模型"
#: ../../Qwen/source/run_locally/ollama.md:107 4de4ae26fa644c83848aa97598160a02
msgid "Once it is finished, you can run your ollama model by:"
msgstr "完成后,你即可运行你的ollama模型:"
#: ../../Qwen/source/run_locally/ollama.md:113 ce86e8ed78164b369eec2548b6260b49
msgid "Tool Use"
msgstr "工具调用"
#: ../../Qwen/source/run_locally/ollama.md:115 09644168e091461abc60e06d579f80b0
msgid "Tool use is now support Ollama and you should be able to run Qwen2.5 models with it. For more details, see our [function calling guide](../framework/function_call)."
msgstr "Ollama现已支持工具调用,Qwen2.5也已适配。更多详情,请参阅我们的[函数调用指南](../framework/function_call)"
# Copyright (C) 2024, Qwen Team, Alibaba Group.
# This file is distributed under the same license as the Qwen package.
#
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 19:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../Qwen/source/training/llama_factory.rst:2
#: 7a9018d9e7ee41858ac5c59723365a63
msgid "LLaMA-Factory"
msgstr ""
#: ../../Qwen/source/training/llama_factory.rst:5
#: 6e90d8f392914d029783ed85b510063f
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../Qwen/source/training/llama_factory.rst:7
#: e82fbe9827774824a4259372afda3240
msgid "Here we provide a script for supervised finetuning Qwen2.5 with `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__. This script for supervised finetuning (SFT) has the following features:"
msgstr "我们将介绍如何使用 `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__ 微调 Qwen2.5 模型。本脚本包含如下特点:"
#: ../../Qwen/source/training/llama_factory.rst:11
#: 7d37d7835f514ce68b9e4e3054919d3c
msgid "Support single-GPU and multi-GPU training;"
msgstr "支持单卡和多卡分布式训练"
#: ../../Qwen/source/training/llama_factory.rst:13
#: 232bdd05e26846989ae770a8da52ccc3
msgid "Support full-parameter tuning, LoRA, Q-LoRA, Dora."
msgstr "支持全参数微调、LoRA、Q-LoRA 和 DoRA 。"
#: ../../Qwen/source/training/llama_factory.rst:15
#: 0cf21b5d01024a0999a290c5fa0f4e9e
msgid "In the following, we introduce more details about the usage of the script."
msgstr "下文将介绍更多关于脚本的用法。"
#: ../../Qwen/source/training/llama_factory.rst:19
#: aa67b2029a4449a8838c80545256d4c0
msgid "Installation"
msgstr "安装"
#: ../../Qwen/source/training/llama_factory.rst:21
#: 2b4a2d1e20c342948e987ce6abad71d0
msgid "Before you start, make sure you have installed the following packages:"
msgstr "开始之前,确保你已经安装了以下代码库:"
#: ../../Qwen/source/training/llama_factory.rst:23
#: 488203d62c3143f09325bbe587ef3f7a
msgid "Follow the instructions of `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__, and build the environment."
msgstr "根据 `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__ 官方指引构建好你的环境"
#: ../../Qwen/source/training/llama_factory.rst:26
#: 98fc755a8555428fbcf01b547bcc270f
msgid "Install these packages (Optional):"
msgstr "安装下列代码库(可选):"
#: ../../Qwen/source/training/llama_factory.rst:33
#: b4d6aa9134de4f35800126a4b71e7a72
msgid "If you want to use `FlashAttention-2 <https://github.com/Dao-AILab/flash-attention>`__, make sure your CUDA is 11.6 and above."
msgstr "如你使用 `FlashAttention-2 <https://github.com/Dao-AILab/flash-attention>`__ ,请确保你的CUDA版本在11.6以上。"
#: ../../Qwen/source/training/llama_factory.rst:38
#: 5859e0c6dbd24040b05778b6fdea052e
msgid "Data Preparation"
msgstr "准备数据"
#: ../../Qwen/source/training/llama_factory.rst:40
#: 6ffed6a5040d48238e5d10b3f984a73a
msgid "LLaMA-Factory provides several training datasets in ``data`` folder, you can use it directly. If you are using a custom dataset, please prepare your dataset as follows."
msgstr "LLaMA-Factory 在 ``data`` 文件夹中提供了多个训练数据集,您可以直接使用它们。如果您打算使用自定义数据集,请按照以下方式准备您的数据集。"
#: ../../Qwen/source/training/llama_factory.rst:44
#: b6d0fcedc5fc40e291255c528bc988fb
msgid "Organize your data in a **json** file and put your data in ``data`` folder. LLaMA-Factory supports dataset in ``alpaca`` or ``sharegpt`` format."
msgstr "请将您的数据以 ``json`` 格式进行组织,并将数据放入 data 文件夹中。LLaMA-Factory 支持以 ``alpaca`` 或 ``sharegpt`` 格式的数据集。"
#: ../../Qwen/source/training/llama_factory.rst:48
#: 2f73d4edea9044a8bd42c9e4e25e992c
msgid "The dataset in ``alpaca`` format should follow the below format:"
msgstr "``alpaca`` 格式的数据集应遵循以下格式:"
#: ../../Qwen/source/training/llama_factory.rst:65
#: 0669bfde81294b459125997c0a6e8257
msgid "The dataset in ``sharegpt`` format should follow the below format:"
msgstr "``sharegpt`` 格式的数据集应遵循以下格式:"
#: ../../Qwen/source/training/llama_factory.rst:86
#: f1749224279f40bb8b6a3adf517af147
msgid "Provide your dataset definition in ``data/dataset_info.json`` in the following format ."
msgstr "在 ``data/dataset_info.json`` 文件中提供您的数据集定义,并采用以下格式:"
#: ../../Qwen/source/training/llama_factory.rst:89
#: a7f285a82bbd495ab10b76fb5a2be6fb
msgid "For ``alpaca`` format dataset, the columns in ``dataset_info.json`` should be:"
msgstr "对于 ``alpaca`` 格式的数据集,其 ``dataset_info.json`` 文件中的列应为:"
#: ../../Qwen/source/training/llama_factory.rst:105
#: f90f0ba55b93436c9a096afa85489698
msgid "For ``sharegpt`` format dataset, the columns in ``dataset_info.json`` should be:"
msgstr "对于 ``sharegpt`` 格式的数据集,``dataset_info.json`` 文件中的列应该包括:"
#: ../../Qwen/source/training/llama_factory.rst:127
#: f91215519e61450c9c4c245beb4d26d6
msgid "Training"
msgstr "训练"
#: ../../Qwen/source/training/llama_factory.rst:129
#: 1624352503e24ceb927d2dba808df7ae
msgid "Execute the following training command:"
msgstr "执行下列命令:"
#: ../../Qwen/source/training/llama_factory.rst:169
#: 24676444d9cd42069f3bd760d3c5b0cd
msgid "and enjoy the training process. To make changes to your training, you can modify the arguments in the training command to adjust the hyperparameters. One argument to note is ``cutoff_len``, which is the maximum length of the training data. Control this parameter to avoid OOM error."
msgstr "并享受训练过程。若要调整您的训练,您可以通过修改训练命令中的参数来调整超参数。其中一个需要注意的参数是 ``cutoff_len`` ,它代表训练数据的最大长度。通过控制这个参数,可以避免出现OOM(内存溢出)错误。"
#: ../../Qwen/source/training/llama_factory.rst:176
#: bd1b02c65c5e4216bde43f8b1dd60ca6
msgid "Merge LoRA"
msgstr "合并LoRA"
#: ../../Qwen/source/training/llama_factory.rst:178
#: b581a862018f412db480ec68be1512fa
msgid "If you train your model with LoRA, you probably need to merge adapter parameters to the main branch. Run the following command to perform the merging of LoRA adapters."
msgstr "如果你使用 LoRA 训练模型,可能需要将adapter参数合并到主分支中。请运行以下命令以执行 LoRA adapter 的合并操作。"
#: ../../Qwen/source/training/llama_factory.rst:194
#: fee1bd0eca4b41e1bf3efa0d42ee401e
msgid "Conclusion"
msgstr "结语"
#: ../../Qwen/source/training/llama_factory.rst:196
#: cfb87dc99dbe4e85a304593333c2241d
msgid "The above content is the simplest way to use LLaMA-Factory to train Qwen. Feel free to dive into the details by checking the official repo!"
msgstr "上述内容是使用LLaMA-Factory训练Qwen的最简单方法。 欢迎通过查看官方仓库深入了解详细信息!"
# Copyright (C) 2024, Qwen Team
# This file is distributed under the same license as the Qwen package.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: Qwen \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-04-28 22:42+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/training/ms_swift.rst:2 498aef3956544f9b97c7a47c66997ea2
msgid "SWIFT"
msgstr ""
#: ../../source/training/ms_swift.rst:5 92e3eed8ef984c1e897d6958ddbf552a
msgid "To be updated for Qwen3."
msgstr "仍需为Qwen3更新。"
#: ../../source/training/ms_swift.rst:7 0a0330c78d8549d1be55501fdc352a15
msgid "ModelScope SWIFT (ms-swift) is the official large model and multimodal model training and deployment framework provided by the ModelScope community."
msgstr "ModelScope SWIFT (ms-swift) 是 ModelScope 社区提供的官方大型模型和多模态模型训练与部署框架。"
#: ../../source/training/ms_swift.rst:9 6e980c27ae67400ca12c11fedf439af8
msgid "GitHub repository: `ms-swift <https://github.com/modelscope/ms-swift>`__"
msgstr "GitHub 地址:`ms-swift <https://github.com/modelscope/ms-swift>`__"
#: ../../source/training/ms_swift.rst:12 8b5caa854cf641c489310f747ed205ba
msgid "Supervised Fine-Tuning (SFT)"
msgstr "有监督微调 (SFT)"
#: ../../source/training/ms_swift.rst:14 43999693226b4bc1ba78d7f8addaa01d
msgid "The SFT script in ms-swift has the following features:"
msgstr "ms-swift 中的 SFT 训练具有以下特性:"
#: ../../source/training/ms_swift.rst:16 98001e42cd2f4f1e874a8aab9b32eca4
msgid "Flexible training options: single-GPU and multi-GPU support"
msgstr "支持单卡和多卡分布式训练"
#: ../../source/training/ms_swift.rst:17 4d0fa62cd0364e08b684beca46245de0
msgid "Efficient tuning methods: full-parameter, LoRA, Q-LoRA, and Dora"
msgstr "支持全参数微调、LoRA、Q-LoRA 和 DoRA"
#: ../../source/training/ms_swift.rst:18 fa0834b06bd9417193ae6966e1628433
msgid "Broad model compatibility: supports various LLM and MLLM architectures"
msgstr "模型兼容性:支持各种 LLM 和 MLLM 架构"
#: ../../source/training/ms_swift.rst:20 0098b181ea6b4724bcec06a864c7972b
msgid "For detailed model compatibility, see: `Supported Models <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html>`__"
msgstr "支持的模型参考:`支持的模型 <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html>`__"
#: ../../source/training/ms_swift.rst:23 ../../source/training/ms_swift.rst:193
#: 491064083ea6496489d30443d86e0124 c40c7e8842a94cf7b8abb7006010b343
msgid "Environment Setup"
msgstr "环境配置"
#: ../../source/training/ms_swift.rst:25 ../../source/training/ms_swift.rst:195
#: 118f5e679951486cbbe6123781294450 b303a90f4bbc4158a01727472b14e68f
msgid "Follow the instructions of `ms-swift <https://github.com/modelscope/ms-swift>`__, and build the environment."
msgstr "根据 `ms-swift <https://github.com/modelscope/ms-swift>`__ 的说明搭建环境。"
#: ../../source/training/ms_swift.rst:27 f53885ef077d4ac283603fd297b2b2be
msgid "Optional packages for advanced features::"
msgstr "可选包::"
#: ../../source/training/ms_swift.rst:33 ../../source/training/ms_swift.rst:205
#: 4ec49f5d7a1b4a9a9851cafafe9c7623 da2d1da0c3414e03845aae57397a6fdc
msgid "Data Preparation"
msgstr "数据准备"
#: ../../source/training/ms_swift.rst:35 e8de47b33f6c4196961896fc35c8d038
msgid "ms-swift supports multiple dataset formats:"
msgstr "ms-swift 支持多种数据集格式:"
#: ../../source/training/ms_swift.rst:64 5de2bd424af14c52b7d73385caca6fba
msgid "For complete dataset formatting guidelines, see: `Custom Dataset Documentation <https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html>`__"
msgstr "有关完整的数据集格式,请参阅:`自定义数据集文档 <https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html>`__"
#: ../../source/training/ms_swift.rst:66 26962c82458e4dfbab4192a3d56139dd
msgid "Pre-built datasets are available at: `Supported Datasets <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
msgstr "内置数据集可以查看:`支持的数据集 <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
#: ../../source/training/ms_swift.rst:69 35818d0633704fd3a70482e77a32f892
msgid "Training Examples"
msgstr "训练示例"
#: ../../source/training/ms_swift.rst:72 b019313ad9e541839a91228db979ed13
msgid "Single-GPU Training"
msgstr "单卡训练"
#: ../../source/training/ms_swift.rst:74 ac147523e4044971b880dc123bcaf21e
msgid "**LLM Example (Qwen2.5-7B-Instruct):**"
msgstr "**LLM 示例 (Qwen2.5-7B-Instruct):**"
#: ../../source/training/ms_swift.rst:104 c3a00f6cd3314912b94ceb13f71539df
msgid "**MLLM Example (Qwen2.5-VL-7B-Instruct):**"
msgstr "**MLLM 示例 (Qwen2.5-VL-7B-Instruct):**"
#: ../../source/training/ms_swift.rst:130
#: ../../source/training/ms_swift.rst:327 172d134c37604cb6a9b4ff639adbccae
#: 4f3070b9cb95428d95cde6915ba38023
msgid "Multi-GPU Training"
msgstr "多卡训练"
#: ../../source/training/ms_swift.rst:132
#: ../../source/training/ms_swift.rst:329 49ae85a99b9940c292862841f1216a7b
#: 5aca8726082640948ba58c6cd13bc602
msgid "**LLM Example with DeepSpeed:**"
msgstr "**LLM 示例:**"
#: ../../source/training/ms_swift.rst:156
#: ../../source/training/ms_swift.rst:374 428e4834ff554058b29f682c9a61d4ea
#: e730333134d54c35bd2c3c01d9009c23
msgid "**MLLM Example with DeepSpeed:**"
msgstr "**MLLM 示例:**"
#: ../../source/training/ms_swift.rst:180 a287c14468ed4625ad50eb75770e4481
msgid "Reinforcement Learning (RL)"
msgstr "强化学习 (RL)"
#: ../../source/training/ms_swift.rst:182 9c4c01d75c264b61965178092f5d4cd2
msgid "The RL script in ms-swift has the following features:"
msgstr "ms-swift 中的 RL 训练具有以下特性:"
#: ../../source/training/ms_swift.rst:184 22471f86636445e19f1e2cf921f15fac
msgid "Support single-GPU and multi-GPU training"
msgstr "支持单卡和多卡分布式训练"
#: ../../source/training/ms_swift.rst:185 069b9f84ec464a8aa7229bdc17c23648
msgid "Support full-parameter tuning, LoRA, Q-LoRA, and Dora"
msgstr "支持全参数微调、LoRA、Q-LoRA 和 DoRA"
#: ../../source/training/ms_swift.rst:186 dc6e8583b524406a818cf3491381bc1b
msgid "Supports multiple RL algorithms including GRPO, DAPO, PPO, DPO, KTO, ORPO, CPO, and SimPO"
msgstr "支持多种 RL 算法,包括 GRPO、DAPO、PPO、DPO、KTO、ORPO、CPO 和 SimPO"
#: ../../source/training/ms_swift.rst:187 5a9368d446294733b009dac7065caa95
msgid "Supports both large language models (LLM) and multimodal models (MLLM)"
msgstr "支持大型语言模型 (LLM) 和多模态模型 (MLLM)"
#: ../../source/training/ms_swift.rst:189 6747ebd5490a42fa9bbe39c71d42de64
msgid "For detailed support information, please refer to: `Supported Features <https://swift.readthedocs.io/en/latest/Instruction/Pre-training-and-Fine-tuning.html#pre-training-and-fine-tuning>`__"
msgstr "有关详细支持信息,请参考:`训练支持 <https://swift.readthedocs.io/en/latest/Instruction/Pre-training-and-Fine-tuning.html#pre-training-and-fine-tuning>`__"
#: ../../source/training/ms_swift.rst:196 5c4ebc67dec24fdba825e855d656428f
msgid "Install these packages (Optional)::"
msgstr "安装这些软件包 (可选)::"
#: ../../source/training/ms_swift.rst:207 b4dcc1d366304d0abbfa994d76646547
msgid "ms-swift has built-in preprocessing logic for several datasets, which can be directly used for training via the ``--dataset`` parameter. For supported datasets, please refer to: `Supported Datasets <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
msgstr "ms-swift 内置了多个数据集的预处理逻辑,可以通过 ``--dataset`` 参数直接用于训练。有关支持的数据集,请参见:`支持的数据集 <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
#: ../../source/training/ms_swift.rst:209 24c5befb3f5545a88b84cfa678b2752f
msgid "You can also use local custom datasets by providing the local dataset path to the ``--dataset`` parameter."
msgstr "您还可以通过将本地数据集路径提供给 ``--dataset`` 参数来使用本地自定义数据集。"
#: ../../source/training/ms_swift.rst:211 fb274880c62c4cad8243cbbed73f2adc
msgid "Example Dataset Formats:"
msgstr "示例数据集格式:"
#: ../../source/training/ms_swift.rst:224 31d062ff29e14c53ad92ca6d5ab81c23
msgid "Notes on Dataset Requirements"
msgstr "数据集要求说明"
#: ../../source/training/ms_swift.rst:226 f6e1dbba72e9450984df10e9da1f4f4c
msgid "Reward Function Calculation: Depending on the reward function being used, additional columns may be required in the dataset. For example:"
msgstr "奖励函数计算:根据使用的奖励函数,数据集中可能需要额外的列。例如:"
#: ../../source/training/ms_swift.rst:228 5b6d41c5b1734f1a87358fb5416c5888
msgid "When using the built-in accuracy/cosine reward, the dataset must include a ``solution`` column to compute accuracy. The other columns in the dataset will also be passed to the `kwargs` of the reward function."
msgstr "在使用内置的accuracy/cosine奖励时,数据集必须包含一个 solution 列以计算准确性。数据集中的其他列也将传递到奖励函数的 kwargs 中。"
#: ../../source/training/ms_swift.rst:231 63ee17a7a6b041a88daee17b96928659
msgid "Customizing the Reward Function: To tailor the reward function to your specific needs, you can refer to the following resource: `external reward plugin <https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin>`__"
msgstr "自定义奖励函数:为了满足您的具体需求,可以参考以下资源:`外部奖励插件 <https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin>`__"
#: ../../source/training/ms_swift.rst:235 5b5aa025adbe492788108bfed1ce09a3
msgid "GRPO Training Examples"
msgstr "GRPO 训练示例"
#: ../../source/training/ms_swift.rst:238 836b7e6ebcd04288968db7886440374b
msgid "Single-GPU Configuration"
msgstr "单卡配置"
#: ../../source/training/ms_swift.rst:240 e9c20b0809904804b2ed1a87986f6af2
msgid "**LLM (Qwen2.5-7B):**"
msgstr ""
#: ../../source/training/ms_swift.rst:282 2b6027439b4a423a9f4984a2bb184109
msgid "**MLLM (Qwen2.5-VL-7B-Instruct):**"
msgstr ""
#: ../../source/training/ms_swift.rst:422 a0267c6288d44958bde43844c55b692d
msgid "Model Export"
msgstr "模型导出"
#: ../../source/training/ms_swift.rst:424 0ad518fadd134498ad3a393397c6d2ae
msgid "**Merge LoRA Adapters:**"
msgstr "**合并 LoRA:**"
#: ../../source/training/ms_swift.rst:432 1a4a2ca034c44df9b573fdf23eed352e
msgid "**Push to ModelScope Hub:**"
msgstr "**推送到 ModelScope Hub:**"
#~ msgid "ms-swift"
#~ msgstr ""
#~ msgid "Introduction to ms-swift SFT"
#~ msgstr "ms-swift SFT 介绍"
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
furo
myst-parser==4.0.0
sphinx<8,>4.5.0
sphinx-copybutton
sphinx-design>=0.6.0
html {
font-size: 16px;
}
h1 {
font-size: 1.75rem;
line-height: 2.5rem;
}
h2 {
font-size: 1.5rem;
line-height: 2rem;
}
h3 {
font-size: 1.25rem;
line-height: 1.75rem;
}
h4 {
font-size: 1.125rem;
line-height: 1.5rem;
}
h5 {
font-size: 1rem;
}
h6 {
font-size: 0.75rem;
}
h1,
h2,
h3,
h4,
h5,
h6 {
margin-top: 1.875rem;
margin-bottom: 1rem;
}
p strong {
font-weight: 500;
}
p:target {
background-color: var(--color-highlight-on-target);
}
details.sd-dropdown summary.sd-summary-title {
flex-direction: row-reverse;
font-weight: 500;
padding-left: 0;
}
details.sd-dropdown summary.sd-summary-title code.literal {
font-weight: bolder;
filter: brightness(95%);
}
details.sd-dropdown summary.sd-summary-title span.sd-summary-state-marker {
padding-left: 0.5em;
padding-right: 0.5em
}
details.sd-dropdown div.sd-summary-content {
padding-left: 2.5em;
}
pre.terminal {
font-size: 12px !important;
line-height: 16px;
background-color: black;
color: white;
padding: .5em;
text-wrap: wrap;
word-break: break-all;
}
pre.terminal span.system {
color: greenyellow
}
pre.terminal span.user {
color: yellowgreen
}
\ No newline at end of file
// @ts-check
// Extra JS capability for selected tabs to be synced
// The selection is stored in local storage so that it persists across page loads.
/**
* @type {Record<string, HTMLElement[]>}
*/
let sd_id_to_elements = {};
const storageKeyPrefix = "sphinx-design-tab-id-";
/**
* Create a key for a tab element.
* @param {HTMLElement} el - The tab element.
* @returns {[string, string, string] | null} - The key.
*
*/
function create_key(el) {
let syncId = el.getAttribute("data-sync-id");
let syncGroup = el.getAttribute("data-sync-group");
if (!syncId || !syncGroup) return null;
return [syncGroup, syncId, syncGroup + "--" + syncId];
}
/**
* Initialize the tab selection.
*
*/
function ready() {
// Find all tabs with sync data
/** @type {string[]} */
let groups = [];
document.querySelectorAll(".sd-tab-label").forEach((label) => {
if (label instanceof HTMLElement) {
let data = create_key(label);
if (data) {
let [group, id, key] = data;
// add click event listener
// @ts-ignore
label.onclick = onSDLabelClick;
// store map of key to elements
if (!sd_id_to_elements[key]) {
sd_id_to_elements[key] = [];
}
sd_id_to_elements[key].push(label);
if (groups.indexOf(group) === -1) {
groups.push(group);
// Check if a specific tab has been selected via URL parameter
const tabParam = new URLSearchParams(window.location.search).get(
group
);
if (tabParam) {
console.log(
"sphinx-design: Selecting tab id for group '" +
group +
"' from URL parameter: " +
tabParam
);
window.sessionStorage.setItem(storageKeyPrefix + group, tabParam);
}
}
// Check is a specific tab has been selected previously
let previousId = window.sessionStorage.getItem(
storageKeyPrefix + group
);
if (previousId === id) {
// console.log(
// "sphinx-design: Selecting tab from session storage: " + id
// );
// @ts-ignore
label.previousElementSibling.checked = true;
}
}
}
});
}
/**
* Activate other tabs with the same sync id.
*
* @this {HTMLElement} - The element that was clicked.
*/
function onSDLabelClick() {
let data = create_key(this);
if (!data) return;
const top = this.parentElement?.offsetTop || 0;
console.log(top);
let [group, id, key] = data;
for (const label of sd_id_to_elements[key]) {
if (label === this) continue;
// @ts-ignore
label.previousElementSibling.checked = true;
}
const diff = (this.parentElement?.offsetTop || 0) - top;
if (diff !== 0) {
window.scrollBy({ left: 0, top: diff, behavior: "instant" });
}
window.sessionStorage.setItem(storageKeyPrefix + group, id);
}
document.addEventListener("DOMContentLoaded", ready, false);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment