v1.0

a52e53db · chenzk · a52e53db · a52e53db · a52e53db · a52e53db
Commit a52e53db authored Apr 29, 2025 by chenzk
20 changed files
--- a/docs/locales/zh_CN/LC_MESSAGES/framework/qwen_agent.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/framework/qwen_agent.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/framework/qwen_agent.rst:2
+#: aaed24d3edd64e6ab1f20188f3d5ba24
+msgid "Qwen-Agent"
+msgstr "Qwen-Agent"
+#: ../../Qwen/source/framework/qwen_agent.rst:5
+#: 1cbbb8d342f243c58e0d66a3e44daac8
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/framework/qwen_agent.rst:7
+#: 3e1dbee121bc4a6c91a26618e27c0d86
+msgid "`Qwen-Agent <https://github.com/QwenLM/Qwen-Agent>`__ is a framework for developing LLM applications based on the instruction following, tool usage, planning, and memory capabilities of Qwen. It also comes with example applications such as Browser Assistant, Code Interpreter, and Custom Assistant."
+msgstr "`Qwen-Agent <https://github.com/QwenLM/Qwen-Agent>`__ 是一个基于 Qwen 的指令跟随、工具使用、计划和记忆能力来开发 LLM 应用程序的框架。它还附带了一些示例应用程序，例如浏览器助手、代码解释器和自定义助手。"
+#: ../../Qwen/source/framework/qwen_agent.rst:14
+#: f180730da09640169fb93950a2e8cb5f
+msgid "Installation"
+msgstr "安装"
+#: ../../Qwen/source/framework/qwen_agent.rst:23
+#: 89f39ac4160d49fba7f9d52dce6527c3
+msgid "Developing Your Own Agent"
+msgstr "开发您自己的智能体"
+#: ../../Qwen/source/framework/qwen_agent.rst:25
+#: 307456721ed7469eb7b8f636483188f4
+msgid "Qwen-Agent provides atomic components such as LLMs and prompts, as well as high-level components such as Agents. The example below uses the Assistant component as an illustration, demonstrating how to add custom tools and quickly develop an agent that uses tools."
+msgstr "Qwen-Agent 提供包括语言模型和提示词等原子级组件，及智能体等高级组件在内的多种组件。以下示例选取助理组件进行展示，阐述了如何整合自定义工具以及如何迅速开发出一个能够应用这些工具的代理程序。"
+#: ../../Qwen/source/framework/qwen_agent.rst:94
+#: 13034806dd414e19a5f53ece31d0fa16
+msgid "The framework also provides more atomic components for developers to combine. For additional showcases, please refer to `examples <https://github.com/QwenLM/Qwen-Agent/tree/main/examples>`__."
+msgstr "该框架还为开发者提供了更多的原子组件以供组合使用。欲了解更多示例，请参见 `examples <https://github.com/QwenLM/Qwen-Agent/tree/main/examples>`__。"
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/concepts.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/concepts.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/getting_started/concepts.md:1
+#: 581ec8a4d8dd4b5a99caf167b796a6e9
+msgid "Key Concepts"
+msgstr "核心概念"
+#: ../../Qwen/source/getting_started/concepts.md:4
+#: fc803dd8f02a4caf9be29e42364659a0
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/getting_started/concepts.md:7
+#: 834244ff25a040fe91f63682732dd416
+msgid "Qwen"
+msgstr "通义千问 (Qwen)"
+#: ../../Qwen/source/getting_started/concepts.md:9
+#: ee9dee3630614908860b2144007186fd
+msgid "Qwen (Chinese: 通义千问; pinyin: _Tongyi Qianwen_) is the large language model and large multimodal model series of the Qwen Team, Alibaba Group.  Qwen is capable of natural language understanding, text generation, vision understanding, audio understanding, tool use, role play, playing as AI agent, etc.  Both language models and multimodal models are pre-trained on large-scale multilingual and multimodal data and post-trained on quality data for aligning to human preferences."
+msgstr "通义千问（英文： Qwen ；读作： _kùn_）是由阿里巴巴通义千问团队开发的大规模语言和多模态系列模型。通义千问可以执行自然语言理解、文本生成、视觉理解、音频理解、工具调用、角色扮演、智能体等多种任务。语言和多模态模型均在大规模、多语言、多模态数据上进行预训练，并在高质量语料上后训练以与人类偏好对齐。"
+#: ../../Qwen/source/getting_started/concepts.md:13
+#: 6a37d9a0b6e2414a9b7ede0e095476af
+msgid "There is the proprietary version and the open-weight version."
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:15
+#: 4fba11f4661b4e469f88dc3917b27427
+msgid "The proprietary versions include"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:16
+#: ../../Qwen/source/getting_started/concepts.md:31
+#: be8423cea0b447c2b15de596c120f541 d07679ae34d0463f96aeff896a759118
+msgid "Qwen: the language models"
+msgstr "通义千问 (Qwen)：语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:17
+#: a1461ec445034ba099aa58b1a13375a0
+#, fuzzy
+msgid "Qwen Max"
+msgstr "通义千问 (Qwen)"
+#: ../../Qwen/source/getting_started/concepts.md:18
+#: 19f8d7108d69464a8d1ce2980c1e4e92
+#, fuzzy
+msgid "Qwen Plus"
+msgstr "通义千问 (Qwen)"
+#: ../../Qwen/source/getting_started/concepts.md:19
+#: ede369bc8dd24052ad674131f4a3b68a
+msgid "Qwen Turbo"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:20
+#: ../../Qwen/source/getting_started/concepts.md:36
+#: ddb0acdec40b4f79a3e6517f86727e4b e4df2227d36a46ee8644ce77f9fc1dc0
+msgid "Qwen-VL: the vision-language models"
+msgstr "通义千问 VL (Qwen-VL): 视觉语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:21
+#: f9f5a5b50af44e90999a87661cdf4e5a
+msgid "Qwen-VL Max"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:22
+#: fd0074955211498c8520ef3405bf312f
+msgid "Qwen-VL Plus"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:23
+#: 40c8d32d570c4a76a5392c8e296c3793
+msgid "Qwen-VL OCR"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:24
+#: ../../Qwen/source/getting_started/concepts.md:39
+#: c0e45bd6e6b44ac7b18ef6a511c0999e f84666b662ab4d5ea41766d46f34fbc0
+msgid "Qwen-Audio: the audio-language models"
+msgstr "通义千问 Audio: 音频语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:25
+#: 0584dbb5e76949ea965661c535e982d7
+msgid "Qwen-Audio Turbo"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:26
+#: aa78dd31bce94f6db05be93976278455
+msgid "Qwen-Audio ASR"
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:28
+#: df255434cec04d12b8e2d048d4e5baf8
+msgid "You can learn more about them at Alibaba Cloud Model Studio ([China Site](https://help.aliyun.com/zh/model-studio/getting-started/models#9f8890ce29g5u) \\[zh\\], [International Site](https://www.alibabacloud.com/en/product/modelstudio))."
+msgstr ""
+#: ../../Qwen/source/getting_started/concepts.md:30
+#: bc0fbc68d29b49da90efba3358f5013f
+msgid "The spectrum for the open-weight models spans over"
+msgstr "开源模型包括："
+#: ../../Qwen/source/getting_started/concepts.md:32
+#: e3107d97ea1b4e0284c2a33c0da02813
+msgid "[Qwen](https://github.com/QwenLM/Qwen): 1.8B, 7B, 14B, and 72B models"
+msgstr "[Qwen](https://github.com/QwenLM/Qwen): 1.8B、 7B、 14B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:33
+#: 8918b660d015430a8d14c3c62b87b19d
+msgid "[Qwen1.5](https://github.com/QwenLM/Qwen1.5/tree/v1.5): 0.5B, 1.8B, 4B, 14BA2.7B, 7B, 14B, 32B, 72B, and 110B models"
+msgstr "[Qwen1.5](https://github.com/QwenLM/Qwen1.5/tree/v1.5): 0.5B、 1.8B、 4B、 14BA2.7B、 7B、 14B、 32B、 72B 及 110B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:34
+#: c5ad94aa9d524a7290d9d0ec35321641
+msgid "[Qwen2](https://github.com/QwenLM/Qwen2/tree/v2.0): 0.5B, 1.5B, 7B, 57A14B, and 72B models"
+msgstr "[Qwen2](https://github.com/QwenLM/Qwen2/tree/v2.0): 0.5B、 1.5B、 7B、 57A14B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:35
+#: 5c38bd713ca847b4bb552971cdd75a99
+msgid "[Qwen2.5](https://github.com/QwenLM/Qwen2.5/): 0.5B, 1.5B, 3B, 7B, 14B, 32B, and 72B models"
+msgstr "[Qwen2.5](https://github.com/QwenLM/Qwen2.5/): 0.5B、 1.5B、 3B、 7B、 14B、 32B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:37
+#: aa36bbcafdf742a9addd2a7b32705a02
+msgid "[Qwen-VL](https://github.com/QwenLM/Qwen-VL): 7B-based models"
+msgstr "[Qwen-VL](https://github.com/QwenLM/Qwen-VL): 基于 7B 的模型"
+#: ../../Qwen/source/getting_started/concepts.md:38
+#: 9d1d663950d34fefbfc7df37fa1def7a
+msgid "[Qwen2-VL](https://github.com/QwenLM/Qwen2-VL): 2B, 7B, and 72B-based models"
+msgstr "[Qwen-VL](https://github.com/QwenLM/Qwen2-VL): 基于 2B 、 7B 和 72B 的模型"
+#: ../../Qwen/source/getting_started/concepts.md:40
+#: bb8a3431ea1f4cc99b4b8dd78e55d9ad
+msgid "[Qwen-Audio](https://github.com/QwenLM/Qwen-Audio): 7B-based model"
+msgstr "[Qwen-Audio](https://github.com/QwenLM/Qwen-Audio): 基于 7B 的模型"
+#: ../../Qwen/source/getting_started/concepts.md:41
+#: 2421a5d0f547440bbf0211274bf44d5d
+msgid "[Qwen2-Audio](https://github.com/QwenLM/Qwen2-Audio): 7B-based models"
+msgstr "[Qwen2-Audio](https://github.com/QwenLM/Qwen2-Audio): 基于 7B 的模型"
+#: ../../Qwen/source/getting_started/concepts.md:42
+#: df8610d7dfbf4651a955dc909b727061
+#, fuzzy
+msgid "Q*Q: the reasoning models"
+msgstr "通义千问 (Qwen)：语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:43
+#: 67e209da7f5848adab885598a9069f11
+#, fuzzy
+msgid "[QwQ-Preview](https://github.com/QwenLM/Qwen2.5/): 32B LLM"
+msgstr "[Qwen2.5-Coder](https://github.com/QwenLM/Qwen2.5-Coder): 7B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:44
+#: 4e9ad66b735a4109ab8ec727486c463c
+#, fuzzy
+msgid "[QVQ-Preview](https://github.com/QwenLM/Qwen2-VL): 72B VLM"
+msgstr "[Qwen-VL](https://github.com/QwenLM/Qwen-VL): 基于 7B 的模型"
+#: ../../Qwen/source/getting_started/concepts.md:45
+#: 728cd9f1dc9d4502ad9a3702e802fc2e
+msgid "CodeQwen/Qwen-Coder: the language models for coding"
+msgstr "Code通义千问 / 通义千问Coder：代码语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:46
+#: 133fd513d7084b54bfe910fda13a42ec
+msgid "[CodeQwen1.5](https://github.com/QwenLM/CodeQwen1.5): 7B models"
+msgstr "[CodeQwen1.5](https://github.com/QwenLM/CodeQwen1.5): 7B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:47
+#: a903957acc0d458b8200788144be0b4d
+#, fuzzy
+msgid "[Qwen2.5-Coder](https://github.com/QwenLM/Qwen2.5-Coder): 0.5B, 1.5B, 3B, 7B, 14B, and 32B models"
+msgstr "[Qwen2.5](https://github.com/QwenLM/Qwen2.5/): 0.5B、 1.5B、 3B、 7B、 14B、 32B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:48
+#: 6c47a9310a6945719b35da4bff3e0c9e
+msgid "Qwen-Math: the language models for mathematics"
+msgstr "通义千问 Math：数学语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:49
+#: fadbf7de806d4f288fc4355b52bcc060
+msgid "[Qwen2-Math](https://github.com/QwenLM/Qwen2-Math): 1.5B, 7B, and 72B models"
+msgstr "[Qwen2-Math](https://github.com/QwenLM/Qwen2-Math)： 1.5B、 7B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:50
+#: 0066352e253345288d16bb1a8df40e1c
+msgid "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math): 1.5B, 7B, and 72B models"
+msgstr "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math)： 1.5B、 7B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:51
+#: b45ed6f1601c41f8a33f6b2b6ff8b47b
+#, fuzzy
+msgid "Qwen-Math-RM: the reward models for mathematics"
+msgstr "通义千问 Math：数学语言模型"
+#: ../../Qwen/source/getting_started/concepts.md:52
+#: 286e8dd455ef4bab91821d399dd4a582
+#, fuzzy
+msgid "[Qwen2-Math-RM](https://github.com/QwenLM/Qwen2-Math): 72B models"
+msgstr "[Qwen2-Math](https://github.com/QwenLM/Qwen2-Math)： 1.5B、 7B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:53
+#: 81eb8401de1646309924a74e633b9b45
+#, fuzzy
+msgid "[Qwen2.5-Math-RM](https://github.com/QwenLM/Qwen2.5-Math): 72B models"
+msgstr "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math)： 1.5B、 7B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:54
+#: e0cd026299ba4809a86504afbe2dd8d5
+#, fuzzy
+msgid "[Qwen2.5-Math-PRM](https://github.com/QwenLM/Qwen2.5-Math): 7B and 72B models"
+msgstr "[Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math)： 1.5B、 7B 及 72B 模型"
+#: ../../Qwen/source/getting_started/concepts.md:56
+#: acec8c22ff094ebe8295cad38ec7a8db
+msgid "**In this document, our focus is Qwen, the language models.**"
+msgstr "**本文档针对通义千问 (Qwen) 语言模型。**"
+#: ../../Qwen/source/getting_started/concepts.md:58
+#: e1e6ade4e85b4975bf992ed0a9c99140
+msgid "Causal Language Models"
+msgstr "因果语言模型 (Causal Language Models)"
+#: ../../Qwen/source/getting_started/concepts.md:60
+#: 593921d01e7a41caa52eda69db81c908
+msgid "Causal language models, also known as autoregressive language models or decoder-only language models, are a type of machine learning model designed to predict the next token in a sequence based on the preceding tokens.  In other words, they generate text one token at a time, using the previously generated tokens as context.  The \"causal\" aspect refers to the fact that the model only considers the past context (the already generated tokens) when predicting the next token, not any future tokens."
+msgstr "因果语言模型 (causal Language Models)，也被称为自回归语言模型 (autoregressive language models) 或仅解码器语言模型 (decoder-only language models) ，是一种机器学习模型，旨在根据序列中的前导 token 预测下一个 token 。换句话说，它使用之前生成的 token 作为上下文，一次生成一个 token 的文本。\"因果\"方面指的是模型在预测下一个 token 时只考虑过去的上下文（即已生成的 token ），而不考虑任何未来的 token 。"
+#: ../../Qwen/source/getting_started/concepts.md:64
+#: 4b31da2c06c54107857edcb2764e0019
+msgid "Causal language models are widely used for various natural language processing tasks involving text completion and generation.  They have been particularly successful in generating coherent and contextually relevant text, making them a cornerstone of modern natural language understanding and generation systems."
+msgstr "因果语言模型被广泛用于涉及文本补全和生成的各种自然语言处理任务。它们在生成连贯且具有上下文关联性的文本方面尤其成功，这使得它们成为现代自然语言理解和生成系统的基础。"
+#: ../../Qwen/source/getting_started/concepts.md:67
+#: 98f73b1f049641038ec1b310a219b209
+msgid "**Takeaway: Qwen models are causal language models suitable for text completion.**"
+msgstr "**要点：Qwen 模型是适用于文本补全的因果语言模型。**"
+#: ../../Qwen/source/getting_started/concepts.md
+#: 2f5c19be905046e1ae669119e3bb6e7c
+msgid "Learn more about language models"
+msgstr "了解更多关于语言模型的信息"
+#: ../../Qwen/source/getting_started/concepts.md:71
+#: 557d7c8bafb94a34b76b6d96a3ce46ff
+msgid "They are three main kinds of models that are commonly referred to as language models in deep learning:"
+msgstr "在深度学习中，被称为语言模型的主要有三类："
+#: ../../Qwen/source/getting_started/concepts.md:72
+#: 89ef0f95d0f5492f877ddceb0233d2fc
+msgid "Sequence-to-sequence models: T5 and the likes"
+msgstr "序列到序列模型 (sequence-to-sequence models)：T5及其类似模型"
+#: ../../Qwen/source/getting_started/concepts.md:74
+#: 80f14b7e5beb41d7920772b053681e24
+msgid "Sequence-to-sequence models use both an encoder to capture the entire input sequence and a decoder to generate an output sequence. They are widely used for tasks like machine translation, text summarization, etc."
+msgstr "序列到序列模型同时使用编码器来捕获整个输入序列，以及解码器来生成输出序列。它们广泛应用于诸如机器翻译、文本摘要等任务。"
+#: ../../Qwen/source/getting_started/concepts.md:77
+#: 0b15c87feae5409f80999e86ad5f5942
+msgid "Bidirectional models or encoder-only models: BERT and the likes"
+msgstr "双向模型 (bidirectional models) 或仅编码器模型 (encoder-only models) ：BERT及其类似模型"
+#: ../../Qwen/source/getting_started/concepts.md:79
+#: 7439fe506ee64fbfaba86bb409cb76ca
+msgid "Bidirectional models can access both past and future context in a sequence during training. They cannot generate sequential outputs in real-time due to the need for future context. They are widely used as embedding models and subsequently used for text classification."
+msgstr "双向模型在训练期间可以访问序列中的过去和未来上下文。由于需要未来上下文，它们无法实时生成顺序输出。它们广泛用作嵌入模型，并随后用于文本分类。"
+#: ../../Qwen/source/getting_started/concepts.md:83
+#: c7f7ae809802445bbaafc7d7f783c71a
+msgid "Casual language models or decoder-only models: GPT and the likes"
+msgstr "因果语言模型 (casual language models) 或仅解码器模型 (decoder-only models) ：GPT及其类似模型"
+#: ../../Qwen/source/getting_started/concepts.md:85
+#: b2825bdbf41c485c849444fc734fde43
+msgid "Causal language models operate unidirectionally in a strictly forward direction, predicting each subsequent word based only on the previous words in the sequence.  This unidirectional nature ensures that the model's predictions do not rely on future context, making them suitable for tasks like text completion and generation."
+msgstr "因果语言模型以严格向前的单向方式运行，仅根据序列中的前导词汇预测每个后续词汇。这种单向性确保了模型的预测不依赖于未来上下文，使它们适合于文本补全和生成等任务。"
+#: ../../Qwen/source/getting_started/concepts.md:89
+#: 26bfa80a4e224b9ca3494f83fc37b0b6
+msgid "Pre-training & Base models"
+msgstr "预训练 (Pre-training) 和基模型 (Base models)"
+#: ../../Qwen/source/getting_started/concepts.md:91
+#: d75a1bc5132a43e8b41ce24b8021e7ab
+msgid "Base language models are foundational models trained on extensive corpora of text to predict the next word in a sequence.  Their main goal is to capture the statistical patterns and structures of language, enabling them to generate coherent and contextually relevant text.  These models are versatile and can be adapted to various natural language processing tasks through fine-tuning.  While adept at producing fluent text, they may require in-context learning or additional training to follow specific instructions or perform complex reasoning tasks effectively. For Qwen models, the base models are those without \"-Instruct\" indicators, such as Qwen2.5-7B and Qwen2.5-72B."
+msgstr "基础语言模型 (base language models) 是在大量文本语料库上训练的基本模型，用于预测序列中的下一个词。它们的主要目标是捕捉语言的统计模式和结构，使它们能够生成连贯且具有上下文关联性的文本。这些模型具有多功能性，可以通过微调适应各种自然语言处理任务。虽然擅长生成流畅的文本，但它们可能需要情境学习 (in-context learning)或额外训练才能遵循特定指令或有效执行复杂推理任务。对于 Qwen 模型，基础模型是指那些没有 \"-Instruct\" 标识符的模型，例如 Qwen2.5-7B 和 Qwen2.5-72B 。"
+#: ../../Qwen/source/getting_started/concepts.md:97
+#: 7f7321ea84f34e29beabf6122a77ec64
+msgid "**Takeaway: Use base models for in-context learning, downstream fine-tuning, etc.**"
+msgstr "**要点：使用基础模型进行情境学习、下游微调等。**"
+#: ../../Qwen/source/getting_started/concepts.md:99
+#: b1d8ca8221c0494796dda85ac2456389
+msgid "Post-training & Instruction-tuned models"
+msgstr "后训练 (Post-training) 和指令微调模型 (Instruction-tuned models)"
+#: ../../Qwen/source/getting_started/concepts.md:101
+#: 2f55c1d2c9234c44ab55bf90fcb1b10f
+msgid "Instruction-tuned language models are specialized models designed to understand and execute specific instructions in conversational styles. These models are fine-tuned to interpret user commands accurately and can perform tasks such as summarization, translation, and question answering with improved accuracy and consistency.  Unlike base models, which are trained on large corpora of text, instruction-tuned models undergo additional training using datasets that contain examples of instructions and their desired outcomes, often in multiple turns. This kind of training makes them ideal for applications requiring targeted functionalities while maintaining the ability to generate fluent and coherent text. For Qwen models, the instruction-tuned models are those with the \"-Instruct\" suffix, such as Qwen2.5-7B-Instruct and Qwen2.5-72B-Instruct. [^instruct-chat]"
+msgstr "指令微调语言模型 (Instruction-tuned language models) 是专门设计用于理解并以对话风格执行特定指令的模型。这些模型经过微调，能准确地解释用户命令，并能以更高的准确性和一致性执行诸如摘要、翻译和问答等任务。与在大量文本语料库上训练的基础模型不同，指令调优模型会使用包含指令示例及其预期结果的数据集进行额外训练，通常涵盖多个回合。这种训练方式使它们非常适合需要特定功能的应用，同时保持生成流畅且连贯文本的能力。对于 Qwen 模型，指令调优模型是指带有 \"-Instruct\" 后缀的模型，例如 Qwen2.5-7B-Instruct 和 Qwen2.5-72B-Instruct 。 [^instruct-chat]"
+#: ../../Qwen/source/getting_started/concepts.md:107
+#: d5b5590ccf434715bd57d0746f196cfe
+msgid "**Takeaway: Use instruction-tuned models for conducting tasks in conversations, downstream fine-tuning, etc.**"
+msgstr "**要点：使用指令微调模型进行对话式的任务执行、下游微调等。**"
+#: ../../Qwen/source/getting_started/concepts.md:112
+#: 5dc4cca1e5104c67b1a3bcdd004e7a9d
+msgid "Tokens & Tokenization"
+msgstr "Tokens & Tokenization"
+#: ../../Qwen/source/getting_started/concepts.md:114
+#: 9e3a74bf95fd40e49fef921a0d0df6ff
+msgid "Tokens represent the fundamental units that models process and generate.  They can represent texts in human languages (regular tokens) or represent specific functionality like keywords in programming languages (control tokens [^special]). Typically, a tokenizer is used to split text into regular tokens, which can be words, subwords, or characters depending on the specific tokenization scheme employed, and furnish the token sequence with control tokens as needed. The vocabulary size, or the total number of unique tokens a model recognizes, significantly impacts its performance and versatility.  Larger language models often use sophisticated tokenization methods to handle the vast diversity of human language while keeping the vocabulary size manageable. Qwen use a relatively large vocabulary of 151,646 tokens in total."
+msgstr "token 代表模型处理和生成的基本单位。它们可以表示人类语言中的文本（常规 token），或者表示特定功能，如编程语言中的关键字（控制 token [^special]）。通常，使用 tokenizer 将文本分割成常规 token ，这些 token 可以是单词、子词或字符，具体取决于所采用的特定 tokenization 方案，并按需为 token 序列添加控制 token 。词表大小，即模型识别的唯一 token 总数，对模型的性能和多功能性有重大影响。大型语言模型通常使用复杂的 tokenization 来处理人类语言的广阔多样性，同时保持词表大小可控。Qwen 词表相对较大，有 15 1646 个 token。"
+#: ../../Qwen/source/getting_started/concepts.md:123
+#: 9e1c049b23fc403ea61919a755ae865a
+msgid "**Takeaway: Tokenization method and vocabulary size is important.**"
+msgstr "**要点：tokenization 和词表大小很重要。**"
+#: ../../Qwen/source/getting_started/concepts.md:125
+#: 0a01476839134505b1e2e004f67c876b
+msgid "Byte-level Byte Pair Encoding"
+msgstr "Byte-level Byte Pair Encoding"
+#: ../../Qwen/source/getting_started/concepts.md:127
+#: e461340d6e834aaeb233649a70618165
+msgid "Qwen adopts a subword tokenization method called Byte Pair Encoding (BPE), which attempts to learn the composition of tokens that can represent the text with the fewest tokens.  For example, the string \" tokenization\" is decomposed as \" token\" and \"ization\" (note that the space is part of the token). Especially, the tokenization of Qwen ensures that there is no unknown words and all texts can be transformed to token sequences."
+msgstr "Qwen采用了名为字节对编码（Byte Pair Encoding，简称BPE）的子词tokenization方法，这种方法试图学习能够用最少的 token 表示文本的 token 组合。例如，字符串\"tokenization\"被分解为\" token\"和\"ization\"（注意空格是 token 的一部分）。特别地，Qwen的 tokenization 确保了不存在未知词汇，并且所有文本都可以转换为 token 序列。"
+#: ../../Qwen/source/getting_started/concepts.md:131
+#: af40a128cbe44fb59a057f9477737197
+msgid "There are 151,643 tokens as a result of BPE in the vocabulary of Qwen, which is a large vocabulary efficient for diverse languages. As a rule of thumb, 1 token is 3~4 characters for English texts and 1.5~1.8 characters for Chinese texts."
+msgstr "Qwen词表中因BPE而产生的 token 数量为 15 1643 个，这是一个适用于多种语言的大词表。一般而言，对于英语文本，1个token大约是3~4个字符；而对于中文文本，则大约是1.5~1.8个汉字。"
+#: ../../Qwen/source/getting_started/concepts.md:134
+#: 3b92bf813f14474f842584fa9bf4fdee
+msgid "**Takeaway: Qwen processes texts in subwords and there are no unknown words.**"
+msgstr "**要点：Qwen 以子词形式处理文本，不存在未知词汇。**"
+#: ../../Qwen/source/getting_started/concepts.md
+#: b29e165e1810403dbcd90cfedd8c73a6
+msgid "Learn more about tokenization in Qwen"
+msgstr "了解更多"
+#: ../../Qwen/source/getting_started/concepts.md:137
+#: b7fa098dbce946c9847eb414f7d52b9e
+msgid "Qwen uses byte-level BPE (BBPE) on UTF-8 encoded texts.  It starts by treating each byte as a token and then iteratively merges the most frequent pairs of tokens occurring the texts into larger tokens until the desired vocabulary size is met."
+msgstr "Qwen 使用基于字节的BPE (BBPE) 对UTF-8编码的文本进行处理。它开始时将每个字节视为一个 token ，然后迭代地将文本中最频繁出现的 token 对合并成更大的 token，直到达到所需的词表大小。"
+#: ../../Qwen/source/getting_started/concepts.md:140
+#: 504bb23b689949dd9bbee78f97d7e0a0
+msgid "In byte-level BPE, minimum 256 tokens are needed to tokenize every piece of text and avoid the out of vocabulary (OOV) problem. In comparison, character-level BPE needs every Unicode character in its vocabulary to avoid OOV and the Unicode Standard contains 154,998 characters as of Unicode Version 16.0."
+msgstr "在基于字节的BPE中，至少需要256个 token 来对每段文本进行 tokenization，并避免未登录词（out of vocabulary, OOV）问题。相比之下，基于字符的 BPE 需要其词表中包含所有 Unicode 字符以避免未登录词，而截至 Unicode 版本16.0，Unicode标准包含 15 4998 个字符。"
+#: ../../Qwen/source/getting_started/concepts.md:143
+#: cfed44d0c905486cb7e12838014249e1
+msgid "One limitation to keep in mind for byte-level BPE is that the individual tokens in the vocabulary may not be seemingly semantically meaningful or even valid UTF-8 byte sequences, and in certain aspects, they should be viewed as a text compression scheme."
+msgstr "基于字节的BPE的一个限制是，词表中的个别 token 可能看似没有语义意义，甚至不是有效的 UTF-8 字节序列，在某些方面，它们应该被视为一种文本压缩方案。"
+#: ../../Qwen/source/getting_started/concepts.md:146
+#: 4c6140ebdb0742e199793a7da566943e
+msgid "Control Tokens & Chat Template"
+msgstr "控制 Token 和 对话模板"
+#: ../../Qwen/source/getting_started/concepts.md:148
+#: 7fab9c7227b94996bbdd30a2dd6a11cc
+msgid "Control tokens and chat templates both serve as mechanisms to guide the model's behavior and outputs."
+msgstr "控制 token 和对话模板都作为指导模型行为和输出的机制。"
+#: ../../Qwen/source/getting_started/concepts.md:150
+#: 9d38b62cddc34442bffc173b6c5e15ea
+msgid "Control tokens are special tokens inserted into the sequence that signifies meta information. For example, in pre-training, multiple documents may be packed into a single sequence. For Qwen, the control token \"<|endoftext|>\" is inserted after each document to signify that the document has ended and a new document will proceed."
+msgstr "控制token是插入到序列中的特殊token，表示元信息。例如，在预训练中，多个文档可以被打包成一个单一的序列。对于Qwen，控制令牌 \"<|endoftext|>\" 在每个文档后插入，表示文档已经结束，新的文档将开始。"
+#: ../../Qwen/source/getting_started/concepts.md:154
+#: aed5af70b3de447b9b3c1312f040f103
+msgid "Chat templates provide a structured format for conversational interactions, where predefined placeholders or prompts are used to elicit responses from the model that adhere to a desired dialogue flow or context. Different models may use different kinds of chat template to format the conversations.  It is crucial to use the designated one to ensure the precise control over the LLM's generation process."
+msgstr "对话模板为对话交互提供了结构化的格式，其中使用预定义的占位符或提示来从模型中引发遵循期望的对话流程或上下文的响应。不同的模型可能使用不同类型的对话模板来格式化对话。使用指定的模板对于确保对语言模型生成过程的精确控制至关重要。"
+#: ../../Qwen/source/getting_started/concepts.md:158
+#: 7acbb7b28f1746a8b779a004a7dc2d93
+msgid "Qwen uses the following format (ChatML[^chatml]), making use of control tokens to format each turn in the conversations"
+msgstr "Qwen使用以下格式（ChatML[^chatml]），利用控制 token 来格式化对话中的每一轮。"
+#: ../../Qwen/source/getting_started/concepts.md:163
+#: 33f3aee8869748fa9f7a51c7efa76338
+msgid "The user input take the role of `user` and the model generation takes the role of `assistant`.  Qwen also supports the meta message that instruct the model to perform specific actions or generate text with certain characteristics, such as altering tone, style, or content, which takes the role of `system` and the content defaults to \"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\""
+msgstr "用户输入扮演 `user` 的 role ，而模型生成则承担 `assistant` 的 role 。 Qwen 还支持元消息，该消息指导模型执行特定操作或生成具有特定特性的文本，例如改变语气、风格或内容，这将承担 `system` 的 role，且内容默认为 \"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\" 。"
+#: ../../Qwen/source/getting_started/concepts.md:166
+#: 0129cbc394614f5f94047592df13c9b6
+msgid "The following is a full example:"
+msgstr "下面为一个完整示例"
+#: ../../Qwen/source/getting_started/concepts.md:183
+#: 59bab0422fa34a19ab2995e6ff15dc56
+msgid "Starting from Qwen2.5, the Qwen model family including multimodal and specialized models will use a unified vocabulary, which contains control tokens from all subfamilies. There are 22 control tokens in the vocabulary of Qwen2.5, making the vocabulary size totaling 151,665:"
+msgstr "从 Qwen2.5 开始，Qwen 模型家族，包括多模态和专项模型，将使用统一的词汇表，其中包含了所有子系列的控制 token 。Qwen2.5 的词汇表中有 22 个控制 token，使得词汇表的总规模达到 15 1665 。"
+#: ../../Qwen/source/getting_started/concepts.md:185
+#: 701bd6f896634b0aaf2920d883268a16
+msgid "1 general: `<|endoftext|>`"
+msgstr "通用 token 1个：`<|endoftext|>`"
+#: ../../Qwen/source/getting_started/concepts.md:186
+#: 7e78239f93a245dbb046d4ae2afe8a72
+msgid "2 for chat: `<|im_start|>` and `<|im_end|>`"
+msgstr "对话 token 2个：`<|im_start|>` 和 `<|im_end|>`"
+#: ../../Qwen/source/getting_started/concepts.md:187
+#: eb686086dfe44d53a5cdfc98e9bbaad8
+msgid "2 for tool use: `<tool_call>` and `</tool_call>`"
+msgstr "工具调用 token 2个： `<tool_call>` 和 `</tool_call>`"
+#: ../../Qwen/source/getting_started/concepts.md:188
+#: c8259cada9e94790a759a4b1f8edaf2d
+msgid "11 for vision"
+msgstr "视觉相关 token 11个"
+#: ../../Qwen/source/getting_started/concepts.md:189
+#: 9b67870139b144c8ae4451e3deb1c1c5
+msgid "6 for coding"
+msgstr "代码相关 token 6个"
+#: ../../Qwen/source/getting_started/concepts.md:191
+#: 32c9581187f640d2a37cca85390bf1de
+msgid "**Takeaway: Qwen uses ChatML with control tokens for chat template.**"
+msgstr "**要点: Qwen 使用带有控制 token 的 ChatML 作为对话模板。**"
+#: ../../Qwen/source/getting_started/concepts.md:195
+#: 74d8b323a0864a9c94a78f154a5c86c0
+msgid "Length Limit"
+msgstr "长度限制"
+#: ../../Qwen/source/getting_started/concepts.md:197
+#: 2833c71b35d94ff0b6825f86bc9be098
+msgid "As Qwen models are causal language models, in theory there is only one length limit of the entire sequence. However, since there is often packing in training and each sequence may contain multiple individual pieces of texts.  **How long the model can generate or complete ultimately depends on the use case and in that case how long each document (for pre-training) or each turn (for post-training) is in training.**"
+msgstr "由于 Qwen 模型是因果语言模型，理论上整个序列只有一个长度限制。然而，由于在训练中通常存在打包现象，每个序列可能包含多个独立的文本片段。**模型能够生成或完成的长度最终取决于具体的应用场景，以及在这种情况下，预训练时每份文档或后训练时每轮对话的长度。**"
+#: ../../Qwen/source/getting_started/concepts.md:201
+#: 1d25c6232d924639b313a1a66d1990c9
+msgid "For Qwen2.5, the packed sequence length in training is 32,768 tokens.[^yarn] The maximum document length in pre-training is this length. The maximum message length for user and assistant is different in post-training. In general, the assistant message could be up to 8192 tokens."
+msgstr "对于Qwen2.5，在训练中的打包序列长度为 3 2768 个 token [^yarn]。预训练中的最大文档长度即为此长度。而后训练中，user和assistant的最大消息长度则有所不同。一般情况下，assistant消息长度可达 8192 个 token。"
+#: ../../Qwen/source/getting_started/concepts.md:209
+#: f39c2748eccb486794c941d23b23835c
+msgid "**Takeaway: Qwen2.5 models can process texts of 32K or 128K tokens and up to 8K tokens can be assistant output.**"
+msgstr "**要点：Qwen2 模型可以处理 32K 或 128K token 长的文本，其中 8K 长度可作为输出。**"
+#: ../../Qwen/source/getting_started/concepts.md:109
+#: 7195ff6a5d1a4e6881f272081c9885d7
+msgid "Previously, they are known as the chat models and with the \"-Chat\" suffix. Starting from Qwen2, the name is changed to follow the common practice. For Qwen, \"-Instruct\" and \"-Chat\" should be regarded as synonymous."
+msgstr "此前，它们被称为对话模型，并带有\"-Chat\"后缀。从Qwen2开始，名称变更为遵循通用做法。对于Qwen，\"-Instruct\"和\"-Chat\"应被视为同义词。"
+#: ../../Qwen/source/getting_started/concepts.md:121
+#: f50caec63c8948a894dbf8c718f0b2d8
+msgid "Control tokens can be called special tokens. However, the meaning of special tokens need to be interpreted based on the contexts: special tokens may contain extra regular tokens."
+msgstr "控制 token 也可以称为“特殊 token”。但是，特殊 token 的意义需要根据上下文进行解释：特殊 token 也可能包含额外的常规 token。"
+#: ../../Qwen/source/getting_started/concepts.md:193
+#: fc70e6f93b71452ca0d09aa0ff28dd54
+msgid "For historical reference only, ChatML is first described by the OpenAI Python SDK. The last available version is [this](https://github.com/openai/openai-python/blob/v0.28.1/chatml.md). Please also be aware that that document lists use cases intended for OpenAI models. For Qwen2.5 models, please only use as in our guide."
+msgstr "仅供历史参考，ChatML最初由OpenAI的Python SDK描述。可获取的最新版本是[这个](https//github.com/openai/openai-python/blob/v0.28.1/chatml.md)。请注意，该文档列出的应用案例是为OpenAI模型设计的。对于Qwen2.5模型，请仅按照我们的指南使用。"
+#: ../../Qwen/source/getting_started/concepts.md:206
+#: a08b83b36c2d4e8d8f3dbb020ecb37a2
+msgid "The sequence length can be extended to 131,072 tokens for Qwen2.5-7B, Qwen2.5-14B, Qwen2.5-32B, and Qwen2.5-72B models with YaRN.      Please refer to the model card on how to enable YaRN in vLLM."
+msgstr "使用YaRN，Qwen2.5-7B、Qwen2.5-14B、Qwen2.5-32B和Qwen2-72B模型的序列长度可以扩展到13 1072个token。请参考模型卡片了解如何在 vLLM 中启用 YaRN。"
+#~ msgid "There is the proprietary version hosted exclusively at [Alibaba Cloud \\[zh\\]](https://help.aliyun.com/zh/model-studio/developer-reference/tongyi-qianwen-llm/) and the open-weight version."
+#~ msgstr "通义千问分为[闭源](https://help.aliyun.com/zh/model-studio/developer-reference/tongyi-qianwen-llm/)和开源两大版本。"
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/quantization_benchmark.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/quantization_benchmark.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:2
+#: 6d4d3bb3020f4e4d8dba0ca5778cdcae
+msgid "Performance of Quantized Models"
+msgstr "量化模型效果评估"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:5
+#: 3a541cd8cba74edf9b06b46f59eaaf38
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:7
+#: 3a95fc299de141dea4fc729ef907ce17
+msgid "This section reports the generation performance of quantized models (including GPTQ and AWQ) of the Qwen2 series. Specifically, we report:"
+msgstr "本部分介绍Qwen2量化模型（包括GPTQ与AWQ量化方案）的效果评估，有以下数据集"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:11
+#: 9386a3b95eb340568185da78224a1ccd
+msgid "MMLU (Accuracy)"
+msgstr "MMLU （准确率）"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:12
+#: 3cd93b881c90488895c61298104bc7fb
+msgid "C-Eval (Accuracy)"
+msgstr "C-Eval （准确率）"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:13
+#: 7ac4bb515b0a49699d4eb95fc433bb51
+msgid "IFEval (Strict Prompt-Level Accuracy)"
+msgstr "IFEval （提示词级的严格准确率，Strict Prompt-Level Accuracy）"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:15
+#: 08e3f35820344c93877618815650b866
+msgid "We use greedy decoding in evaluating all models."
+msgstr "所有模型均使用贪心解码。"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
+#: 9aec40221219455d8fc4e473e5acf09c
+msgid "Quantization"
+msgstr "量化模型"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
+#: 93f274f4751f445d85f04937b25c7f7d
+msgid "Average"
+msgstr "平均"
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
+#: 776612f5dd4a40d98976bdfe4896508c
+msgid "MMLU"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
+#: f6e8014116cf4179a934d601ee61d04d
+msgid "C-Eval"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:18
+#: 0c40e96c4a3b4cdeaaf1a95ff1aa8f98
+msgid "IFEval"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
+#: 773ccb0f10bd4cf690e819af51c40e76
+msgid "Qwen2-72B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
+#: 71e180f75e624b738d56ec2a1fad253c 7ebe73a2e96445c4bb733845c3190240
+#: bd5a3b8861d646fa9e8d8bc51bb1b80c cc79a78b34f94c18b7bdaf1bfcc8824d
+msgid "BF16"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
+#: 08517ffc3e6e4ceb812c3d8710307266 2e879d3d1fef4c878b097550d745e7ae
+msgid "81.3"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
+#: f795aa42cf7d42ccb5a573a5f44be79f
+msgid "82.3"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
+#: 01c54f3da3454e178a07a9f88ed5302b
+msgid "83.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:20
+#: 7651df5ccaa14b11a3a89827a5265ae8
+msgid "77.6"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
+#: 04de04c9ff3640f096301e76fdd291de 301aa8e494ff4fe4aefcc8cfb7a4c065
+#: d395be41cf144318a1faeccc6f6965c8 ec513d10a75d44b8bd134287a57b5cdd
+msgid "GPTQ-Int8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
+#: 411166db878d4d8f8515e9f5d78a651c
+msgid "80.7"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
+#: e63ce8a2f1cc4cec9b52521015e2aebe
+msgid "83.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:22
+#: e6be6c30e0d740d39c6c8807e2d4f5f8
+msgid "77.5"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
+#: 21720ff324814b2b865f37a40c3586b5 4644a49bcdfd457b84eb5b2771177d78
+#: 560dcb4bfa6e45088faefdb504d629a5 7044a0d2dd6945138ea385287ab5bf33
+msgid "GPTQ-Int4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
+#: 1cb55cd40b3c484d8213c15375b2ad68
+msgid "81.2"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
+#: 32b889d9ef014f2ab6be6881e20d40ae
+msgid "80.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
+#: ba86de9eb27b40e0ba6a57580aed89c3 eed2e99c0edc426e81ec24e961fe971e
+msgid "83.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:24
+#: ee3a3132082048d5b79721fa84f6f816
+msgid "78.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
+#: 632f832fc1f249fa92764538b698550d 8c7ccf4f75f44b27bb1b5aac544836cb
+#: b473937c2be94c3490483bb5a820e2fe bc1abd77dd27412992d21bda1831a2a8
+msgid "AWQ"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
+#: 2711a3f907224e51ba30818b2e730a30
+msgid "80.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
+#: ca9624c0258b425ba53f024b086c173a
+msgid "80.5"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:26
+#: 2f4b57d4394c4cb187407145ce8d5f1e
+msgid "76.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
+#: 48cc75ed7bf04778b327c7b03d418e37
+msgid "Qwen2-7B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
+#: 75182905b74a41099ff859fb86752e99
+msgid "66.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
+#: 80cda712e9dc482fac24952d3bb27b28
+msgid "70.5"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
+#: 0701d66bc3084aef8937e4b687705f37
+msgid "77.2"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:28
+#: 8efb5c133644420c808dfd78f8fcde2f
+msgid "53.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
+#: 2076e02516bd4ff1856bc12a8d6bd320
+msgid "66.2"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
+#: 588f4ad13845491d9589ea094265d532
+msgid "69.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
+#: 0c79963a231a402eb6db1671e851be38
+msgid "76.7"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:30
+#: 5d525163672f456289990489459466ae
+msgid "52.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
+#: 9283ca6491194b59a5edf57228f9b5af a4123c0691a442f6850ae25615c108af
+msgid "64.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
+#: 9e7ffb49aac34129894b0582c0d8aba1
+msgid "67.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
+#: 7c2fc310e5764b7fbf6034ffd3a5d26d
+msgid "75.2"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:32
+#: 33e6b6e590a64c08adccf0bb161c1046
+msgid "49.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
+#: b3cbe7665bdf4f4388f015fb6606540e
+msgid "67.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
+#: a47d3b52e80249f986c4339b9d3fff10
+msgid "73.6"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:34
+#: d76543cff2df434185fbe51712024679
+msgid "51.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
+#: cee2c965036d41c6a93ffbf9a9788e4b
+msgid "Qwen2-1.5B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
+#: 8c9d1cd8fb5a4d75b85d0edcb9ed69df
+msgid "48.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
+#: f5e05b0942a24e2b9cac753932ad51c4
+msgid "52.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
+#: c6f81ec529004598aa14c55228ff9538
+msgid "63.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:36
+#: 5b2b4092d04f4d02a56bd0df5807e2c5
+msgid "29.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
+#: 08d2bf82e83f4a889d622c72c1e1b3b2
+msgid "48.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
+#: 3d8ea738153f467ba55d50e6bf0f84c0
+msgid "53.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
+#: 8755d6c4c1e64cd38122f08a92bd90ca
+msgid "62.5"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:38
+#: 1c403dbb3692472a88706cb4b4a1f0f3
+msgid "28.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
+#: f3f43ea77edc4ff0969e2466e6fe13e1
+msgid "45.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
+#: 9d070c4b9f3e4fceb27b29ecdf90eb41
+msgid "50.7"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
+#: 24ff991704c440deb34b92512f89c371
+msgid "57.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:40
+#: b4645b7317a44cb795fc4190149dd0e0
+msgid "27.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
+#: eeee44d1d65647569999de94e72c00cb
+msgid "46.5"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
+#: 41630bee9142494c801083cd5d213dc0
+msgid "51.6"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
+#: 762395735fb34bccbc4d057968bbfbf1
+msgid "58.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:42
+#: f5915835bcb24051bebed452fc398728
+msgid "29.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
+#: 39108e2a66444ca780a720f115251308
+msgid "Qwen2-0.5B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
+#: 2795adace57c401cb8bacc00082dfd53 a59271d53e434d17a8a0a19529158f2c
+msgid "34.4"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
+#: c93982789e4e453eb5a02d64f02cb74f
+msgid "37.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
+#: 213dfd43b2254a2caec1d4b1d231ed55
+msgid "45.2"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:44
+#: 11de22e2a04a4c04b0b91d09d028b853
+msgid "20.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
+#: 84b6570bcc8d4c6598336d5bc9b9d36a
+msgid "32.6"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
+#: b79e88232d114f43a179dcc5b0477c97
+msgid "35.6"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
+#: 1166b675e1e64e18a82c3219f321e248
+msgid "43.9"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:46
+#: fdf340d39b074778b55d36f477f8dc0a
+msgid "18.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
+#: ed930e1b13dd4c5caf80b2a180a1bcc3
+msgid "29.7"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
+#: c3d5617389634f7e96c66b4f869379a9
+msgid "33.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
+#: 4573b471c48d4028ad6fb378e75f40aa
+msgid "39.2"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:48
+#: c867c42e916f493b9715b1adf656ddcb
+msgid "16.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
+#: 20d4c89c335648bb93f07ebfb8ce9fce
+msgid "31.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
+#: 25400aeaf79d49cb914ffa5ff26bfe03
+msgid "42.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/quantization_benchmark.rst:50
+#: d15e246b65b0427d970b78deffd8c2bc
+msgid "16.7"
+msgstr ""
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/quickstart.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/quickstart.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/getting_started/quickstart.md:1
+#: 595827c46f2e4884b69954cf22e0e957
+msgid "Quickstart"
+msgstr "快速开始"
+#: ../../Qwen/source/getting_started/quickstart.md:3
+#: 725288359306417a943352cef10f831c
+msgid "This guide helps you quickly start using Qwen3.  We provide examples of [Hugging Face Transformers](https://github.com/huggingface/transformers) as well as [ModelScope](https://github.com/modelscope/modelscope), and [vLLM](https://github.com/vllm-project/vllm) for deployment."
+msgstr "本指南帮助您快速上手 Qwen3 的使用，并提供了如下示例： [Hugging Face Transformers](https://github.com/huggingface/transformers) 以及 [ModelScope](https://github.com/modelscope/modelscope) 和 [vLLM](https://github.com/vllm-project/vllm>) 在部署时的应用实例。"
+#: ../../Qwen/source/getting_started/quickstart.md:6
+#: 6bfc020002af4b4eaad8adf3902e30ac
+msgid "You can find Qwen3 models in [the Qwen3 collection](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) at HuggingFace Hub and [the Qwen3 collection](https://www.modelscope.cn/collections/Qwen3-9743180bdc6b48) at ModelScope."
+msgstr "你可以在 HuggingFace Hub 的 [Qwen3 collection](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) 或 ModelScope 的 [Qwen3 collection](https://www.modelscope.cn/collections/Qwen3-9743180bdc6b48) 中寻找 Qwen3 模型。"
+#: ../../Qwen/source/getting_started/quickstart.md:8
+#: 1dbf0833f8a5407b8d00056d029eb9d8
+msgid "Transformers"
+msgstr "Transformers"
+#: ../../Qwen/source/getting_started/quickstart.md:10
+#: cbe2f022b0b54729a1d3627cb19ad99f
+msgid "To get a quick start with Qwen3, you can try the inference with `transformers` first. Make sure that you have installed `transformers>=4.51.0`. We advise you to use Python 3.10 or higher, and PyTorch 2.6 or higher."
+msgstr "要快速上手 Qwen3 ，我们建议您首先尝试使用 `transformers` 进行推理。请确保已安装了 `transformers>=4.51.0` 版本。我们建议您使用 Python 3.10 或以上版本， PyTorch 2.6 或以上版本。"
+#: ../../Qwen/source/getting_started/quickstart.md:14
+#: bd305d4f44484f75bfe0c02a9eda68c4
+msgid "The following is a very simple code snippet showing how to run Qwen3-8B:"
+msgstr "以下是一个非常简单的代码片段示例，展示如何运行 Qwen3 模型："
+#: ../../Qwen/source/getting_started/quickstart.md:63
+#: 0bb48ceb71854514be78497721308702
+msgid "Qwen3 will think before respond, similar to QwQ models. This means the model will use its reasoning abilities to enhance the quality of generated responses. The model will first generate thinking content wrapped in a `<think>...</think>` block, followed by the final response."
+msgstr "Qwen3 将在实际回复前思考，与 QwQ 模型类似。这意味着模型将运用其推理能力来提升生成回复的质量。模型会首先生成包含在 `<think>...</think>` 块中的思考内容，随后给出最终回复。"
+#: ../../Qwen/source/getting_started/quickstart.md:67
+#: d110ccfe1d834169992f03bcf932e250
+msgid "Hard Switch: To strictly disable the model's thinking behavior, aligning its functionality with the previous Qwen2.5-Instruct models, you can set `enable_thinking=False` when formatting the text."
+msgstr "硬开关：为了严格禁用模型的思考行为，使其功能与之前的Qwen2.5-Instruct模型保持一致，您可以在格式化文本时设置`enable_thinking=False`。"
+#: ../../Qwen/source/getting_started/quickstart.md:77
+#: 4bceeb7e0179470f88620507ade7915b
+msgid "It can be particularly useful in scenarios where disabling thinking is essential for enhancing efficiency."
+msgstr "在某些需要通过禁用思考来提升效率的场景中，这一功能尤其有用。"
+#: ../../Qwen/source/getting_started/quickstart.md:79
+#: 16b4b43b7a7b43a698118e17d778a6dd
+msgid "Soft Switch: Qwen3 also understands the user's instruction on its thinking behaviour, in particular, the soft switch `/think` and `/no_think`. You can add them to user prompts or system messages to switch the model's thinking mode from turn to turn.  The model will follow the most recent instruction in multi-turn conversations."
+msgstr "软开关：Qwen3 还能够理解用户对其思考行为的指令，特别是软开关 `/think` 和 `/no_think`。您可以将这些指令添加到用户 (user) 或系统 (system) 消息中，以在对话轮次之间灵活切换模型的思考模式。在多轮对话中，模型将遵循最近的指令。"
+#: ../../Qwen/source/getting_started/quickstart.md:85
+#: 518d0395430f4920973e6da2753c1507
+msgid "For thinking mode, use Temperature=0.6, TopP=0.95, TopK=20, and MinP=0 (the default setting in `generation_config.json`). DO NOT use greedy decoding, as it can lead to performance degradation and endless repetitions.  For more detailed guidance, please refer to the Best Practices section."
+msgstr "对于思考模式，使用 Temperature=0.6，TopP=0.95，TopK=20，以及 MinP=0（`generation_config.json` 中的默认设置）。不要使用贪婪解码，因为它可能导致性能下降和无尽的重复。更多详细指导，请参阅最佳实践部分。"
+#: ../../Qwen/source/getting_started/quickstart.md:89
+#: 80bf598dfdf048a791d05c6a21ccd425
+msgid "For non-thinking mode, we suggest using Temperature=0.7, TopP=0.8, TopK=20, and MinP=0."
+msgstr "对于非思考模式，我们建议使用 Temperature=0.7，TopP=0.8，TopK=20，以及 MinP=0。"
+#: ../../Qwen/source/getting_started/quickstart.md:93
+#: 7a585706796a4db9a9f34ec1241135b5
+msgid "ModelScope"
+msgstr "魔搭 (ModelScope)"
+#: ../../Qwen/source/getting_started/quickstart.md:95
+#: fbf6acee0f534a3d9197221626ce79e4
+msgid "To tackle with downloading issues, we advise you to try [ModelScope](https://github.com/modelscope/modelscope). Before starting, you need to install `modelscope` with `pip`."
+msgstr "为了解决下载问题，我们建议您尝试从 [ModelScope](https://github.com/modelscope/modelscope) 进行下载。开始之前，需要使用 `pip` 安装 `modelscope` 。"
+#: ../../Qwen/source/getting_started/quickstart.md:98
+#: e29964895f744793a18058022ad578b8
+msgid "`modelscope` adopts a programmatic interface similar (but not identical) to `transformers`. For basic usage, you can simply change the first line of code above to the following:"
+msgstr "`modelscope` 采用了与 `transformers` 类似（但不完全一致）的编程接口。对于基础使用，仅需将上面代码第一行做如下修改："
+#: ../../Qwen/source/getting_started/quickstart.md:105
+#: 2686cab2a6f54fe7ae813a0aeeb04d14
+msgid "For more information, please refer to [the documentation of `modelscope`](https://www.modelscope.cn/docs)."
+msgstr "欲获取更多信息，请参考 [`modelscope` 文档](https://www.modelscope.cn/docs)。"
+#: ../../Qwen/source/getting_started/quickstart.md:107
+#: ce23fee238f8458599cc4d7e16a2e509
+msgid "vLLM"
+msgstr ""
+#: ../../Qwen/source/getting_started/quickstart.md:109
+#: cf0e10035e954a328775205ff39e9687
+msgid "To deploy Qwen3, we advise you to use vLLM.  vLLM is a fast and easy-to-use framework for LLM inference and serving.  In the following, we demonstrate how to build a OpenAI-API compatible API service with vLLM."
+msgstr "要部署 Qwen3 ，我们建议您使用 vLLM 。 vLLM 是一个用于 LLM 推理和服务的快速且易于使用的框架。以下，我们将展示如何使用 vLLM 构建一个与 OpenAI 兼容的 API 服务。"
+#: ../../Qwen/source/getting_started/quickstart.md:113
+#: 925651cdb57d478884f151b52834ab3c
+msgid "First, make sure you have installed `vllm>=0.8.5`."
+msgstr "首先，确保你已经安装 `vLLM>=0.8.5` ："
+#: ../../Qwen/source/getting_started/quickstart.md:115
+#: 4cb0c9b830984fafa3f5ee2e74dea6dc
+msgid "Run the following code to build up a vLLM service.  Here we take Qwen3-8B as an example:"
+msgstr "运行以下代码以构建 vLLM 服务。此处我们以 Qwen3-8B 为例："
+#: ../../Qwen/source/getting_started/quickstart.md:122
+#: c7b58160d10d43a2bb6e63572dbeff46
+msgid "Then, you can use the [create chat interface](https://platform.openai.com/docs/api-reference/chat/completions/create) to communicate with Qwen:"
+msgstr "然后，可以使用 [\"create chat\" interface](https://platform.openai.com/docs/api-reference/chat/completions/create>) 来与 Qwen 进行交流："
+#: ../../Qwen/source/getting_started/quickstart.md
+#: 8f4c1e3692a34137ad9fbf6d7a50969c c685b92ca0ea49c0b3925b24cd43317c
+msgid "curl"
+msgstr ""
+#: ../../Qwen/source/getting_started/quickstart.md
+#: 147be07b6f3141c08f8c707a9f06403c ffc3d81775264a00ad0d7bcb85ff6caf
+msgid "Python"
+msgstr ""
+#: ../../Qwen/source/getting_started/quickstart.md:142
+#: ../../Qwen/source/getting_started/quickstart.md:192
+#: 9a1026d8cf10458b8a3e717e105e8d5e ed7621681c36472a90b4be9c1fe98355
+msgid "You can use the API client with the `openai` Python SDK as shown below:"
+msgstr "您可以按照下面所示的方式，使用 `openai` Python SDK中的客户端："
+#: ../../Qwen/source/getting_started/quickstart.md:169
+#: a5ae1f193b044cb982e5ea4d98b30afb
+msgid "While the soft switch is always available, the hard switch is also availabe in vLLM through the following configuration to the API call. To disable thinking, use"
+msgstr "虽然软开关始终可用，但硬开关也可以通过以下 API 调用配置在 vLLM 中使用。要禁用思考，请使用"
+#: ../../Qwen/source/getting_started/quickstart.md:221
+#: a200dc6f700d40f89e22d7745a5f01f0
+msgid "Next Step"
+msgstr "下一步"
+#: ../../Qwen/source/getting_started/quickstart.md:223
+#: e22d4b679b36490fb4877ae01bfb515a
+msgid "Now, you can have fun with Qwen3 models.  Would love to know more about its usage?  Feel free to check other documents in this documentation."
+msgstr "现在，您可以尽情探索 Qwen3 模型的各种用途。若想了解更多，请随时查阅本文档中的其他内容。"
+#~ msgid "Hugging Face Transformers & ModelScope"
+#~ msgstr ""
+#~ msgid "Install with `pip`:"
+#~ msgstr "使用 `pip` 安装："
+#~ msgid "Install with `conda`:"
+#~ msgstr "使用 `conda` 安装："
+#~ msgid "Install from source:"
+#~ msgstr "从源代码安装："
+#~ msgid "As you can see, it's just standard usage for casual LMs in `transformers`!"
+#~ msgstr "如您所见，与 `transformers` 的常规使用方式无二！"
+#~ msgid "Streaming Generation"
+#~ msgstr "流式生成"
+#~ msgid "Streaming mode for model chat is simple with the help of `TextStreamer`.  Below we show you an example of how to use it:"
+#~ msgstr "借助 `TextStreamer` ， 模型生成的流式模式变得非常简单。下面我们将展示一个如何使用它的示例："
+#~ msgid "It will print the text to the console or the terminal as being generated."
+#~ msgstr "命令行或终端中将屏显生成的文本。"
+#~ msgid "vLLM for Deployment"
+#~ msgstr "使用vLLM部署"
+#~ msgid "with `vllm>=0.5.3`, you can also use"
+#~ msgstr "如 `vllm>=0.5.3` ，也可以如下启动："
+#~ msgid "For more information, please refer to [the documentation of `vllm`](https://docs.vllm.ai/en/stable/)."
+#~ msgstr "欲获取更多信息，请参考 [`vllm` 文档](https://docs.vllm.ai/en/stable/)。"
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/speed_benchmark.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/speed_benchmark.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:2
+#: 15e2970b0a874ba0a811c88dcc9afa56
+msgid "Speed Benchmark"
+msgstr "效率评估"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:5
+#: b943913568f3486a8d5f25c46677c6cc
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:7
+#: ea8ddfed427143a3a9931495ff77d2d2
+msgid "This section reports the speed performance of bf16 models, quantized models (including GPTQ-Int4, GPTQ-Int8 and AWQ) of the Qwen2.5 series. Specifically, we report the inference speed (tokens/s) as well as memory footprint (GB) under the conditions of different context lengths."
+msgstr "本部分介绍Qwen2.5系列模型（原始模型和量化模型）的效率测试结果，包括推理速度(tokens/s)与不同上下文长度时的显存占用(GB)。"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:12
+#: 321220545e38471981b3b3ee9294ac31
+msgid "The environment of the evaluation with huggingface transformers is:"
+msgstr "测试HuggingFace ``transformers`` 时的环境配置："
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:14
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:25
+#: 74436af12fad4f0e8d5b028f6f4d83ec 82d2070795c04fc1b3f2d576ae815ad5
+msgid "NVIDIA A100 80GB"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:15
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:26
+#: 265372c1791d432fb9433f2c301e0533 adb249c4691b48f29caacdb412aef0ee
+msgid "CUDA 12.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:16
+#: d478f901ae9d449a95f80306ec738ad5
+msgid "Pytorch 2.3.1"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:17
+#: 4e0f4dc2cf97448eafa173e4cabe9fff
+msgid "Flash Attention 2.5.8"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:18
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:30
+#: 21f8a404846140bd9ac89a0d8c10d256 6f31d384305e47a684e220fdc87e6438
+msgid "Transformers 4.46.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:19
+#: f06fc8c23d1045c0ba7b6700c88eb8ad
+msgid "AutoGPTQ 0.7.1+cu121 (Compiled from source code)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:20
+#: e3d2039804344284ac6a31d5533f9c89
+msgid "AutoAWQ 0.2.6"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:23
+#: 5a013085785944d9bfc35ed44f25708b
+msgid "The environment of the evaluation with vLLM is:"
+msgstr "测试vLLM时的环境配置："
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:27
+#: 2c8aa18142514f73b6a4fc631a17355a
+msgid "vLLM 0.6.3"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:28
+#: e777e67e77ec4694a354e83f8dcfcd01
+msgid "Pytorch 2.4.0"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:29
+#: f1004474078144c8ba0c8f43ca492b55
+msgid "Flash Attention 2.6.3"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:33
+#: c723126f0a2047f78eb8679d81e37a38
+msgid "Notes:"
+msgstr "注意："
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:35
+#: de02ff9670834fa185edb0d4b603c53e
+msgid "We use the batch size of 1 and the least number of GPUs as possible for the evaluation."
+msgstr "batch size 设置为1，使用 GPU 数量尽可能少"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:37
+#: 1bb988262ab04123ab4ec57095bfb994
+msgid "We test the speed and memory of generating 2048 tokens with the input lengths of 1, 6144, 14336, 30720, 63488, and 129024 tokens."
+msgstr "我们测试生成2048 tokens时的速度与显存占用，输入长度分别为1、6144、14336、30720、63488、129024 tokens。(超过32K长度仅有 Qwen2-72B-Instuct 与 Qwen2-7B-Instuct 支持)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:40
+#: 0e38607f735a4aee942ed6629c19aeb3
+msgid "For vLLM, the memory usage is not reported because it pre-allocates all GPU memory. We use ``gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False`` by default."
+msgstr "对于vLLM，由于GPU显存预分配，实际显存使用难以评估。默认情况下，统一设定为``gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False``。"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:46
+#: 2740febb5a214cc8807b00b8597e4e57
+msgid "0.5B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
+#: 0d3800fd9320472ead21e1b6ce766960 0daee88c4fcc40d1bf85d9ca7d2d1ed3
+#: 2658cecd188b4aa089bf62746b8bc7c9 2778cc188db8410bbcf0da565d9dafa2
+#: 31d16a9ce3c04e6e8080a94357eb8e40 4a342b7b7bea429e9f280897ae15f149
+#: 556a5c2ccac84fdfbcdf05130669e0f0 695dbc29c02b42f589dfdc08985f0092
+#: 7b44531701144fab88e6accd4037e5f7 8e7437142a1c49afb8421cdc0e533e5c
+#: 9b082527047b459398c0a4825784cb59 a3fc513b2e8e43e69817f52b060766da
+#: b548bbd636054abca043f3fe60129a5e b96a0a6d3a6047d58321cecf1adead4e
+msgid "Model"
+msgstr "模型"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
+#: 18467ba010774755a3aeb9acd845d0d0 1d0852cb78a648db8b7b64a2505145fe
+#: 2e233b37910f4085b827380c3a5cc95a 37f34d499d904fc9b06b61b36328956a
+#: 4077b79878b54ac187ad3e5906b44513 434fd471714f4f3bbff4ebce1ed02260
+#: 58d16b17c4f4439db5a55f63d0550c6a 60a1b825536447c4bafd641a87db51f2
+#: 7c5e1c1bc8144e4da090a64fe92919c6 a2a14e09c63249bb933cad3087eacef8
+#: bd6c5dcfc9d4479385247f5caf4bd963 da2e631809d04389970e4172167401f2
+#: ddf1a9637c084006bc418b13e2061c01 f00ee91c225344b0ae4c7a78128dd4e7
+msgid "Input Length"
+msgstr "输入长度"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
+#: 1657c1e521524c96ab3e321b914de410 16f663d9ba574eabacc50755122d4d51
+#: 1894375562ee4ba693ca418c09c4ed1d 25bf8d0629804415bc1119d3d04af19d
+#: 2924665a3f7a4e9bbdc99cf807489336 312c120349184b12bac02c340f8671b2
+#: 6e4dfbe22352464bad7903057542e36c 75347400d0864b0e87f3f7ac34165e40
+#: 9a26311405cc4e51a07afba1ff187c22 b3f824bbc86d4ec18e090e9e87b5f0b6
+#: ca7a6f03136048f49962c7163ad52963 d0bac2de0ad4486a808091fa444de8d3
+#: eb93a6b940234777b282f5b781fe76d4 ec72d2950327409dadfc2772b21bb663
+msgid "Quantization"
+msgstr "量化"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
+#: 0a9c10c1df394368a9bf7f16e2d133cc 20f68eddfd2d4b2e8bac11698949992f
+#: 3bdff2d2f7084c5dbd81c9e51c4e8cb6 562c19a04e7c4cfdab7ec3f259d8e351
+#: 5c2a0a292a3b45c08bd24b2002501e91 7b7452b5f7c94063b52f0d234e71fd33
+#: 913b0e8e8dc14ee68e385edd7a9bd8fd 99d423566ee24d029e5645e04bb84ddd
+#: a148f164b00c480fb8cf0c0589a03888 a8ad3a93dda04af3a9cadc2796a694b7
+#: c780cefda234439b86c036f72eab0a72 d8579ec8e3fc40709fd436339b3bba6f
+#: e08a842aebf247719fcadf9ccb4899a4 f1477a135092411c8108d3818d49000c
+msgid "GPU Num"
+msgstr "GPU数量"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:88
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:167
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:246
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
+#: 132301b4648a4a78994ffd0047502ffb 1ed10eae6dc24e4085404998cc4f924f
+#: 23275c0026d049abaa05d76c9f9a0cdc 2b680afbd0dc49ac952913245410f952
+#: 2e5293dcc25c47fb9df4f20580151f21 2fea2c25aa474023a6af451e932fd123
+#: 4419eaba8b914d44a1edac7c9f8e18bf 5459abd810f0461da17f1a44f997ca22
+#: 74190c0c15bc4d31a6a55864f01d2904 95814a65c98f42649fc0278c0e16a029
+#: a54fbc300f1c4fa3b04429f4d36b4dd2 e0e571bc5f6a409c9524e74d20c1ba59
+#: e3b3fb7f2078496c87be43456421c2e4 e7a4f3b9a2ee46a689456214c16ee1f0
+msgid "Speed(tokens/s)"
+msgstr "速度 (tokens/s)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: 0d09a9f5937c4df1b3833adbd25aa608 1a2560a4c8814cb28284389b859caf63
+#: 69cc4d5ee89d449482cfd4ad9023324a 7d8bf237f85949119b1809edfb931cc5
+#: abcf9c8943774551b67e33b9763abd22 dda237af9b8d4539803f71fab7823175
+#: de48bc9a7ab344ac873dba2b0a50d20e
+msgid "GPU Memory(GB)"
+msgstr "显存占用 (GB)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:49
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:128
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:207
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:286
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:326
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:383
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:422
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:481
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:523
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:585
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:626
+#: 1dce6ec0ef864e96b542524a06b05a4e 2691beef5b474c5f8da8504ab2b5274e
+#: 3528cc8b5a304fcf98961542cc3062e8 64bad9a37cc74119947998193ef537a1
+#: 7273dd27f9244b958575b922bab5278a 771fe7364b0c44428775c91cf84ac836
+#: 7ecffcfa9ee74b6d9e85552f5a505e9c 97cd9cfb76cb41b793fceb15ecc9cd6d
+#: a1e3724821b748528f8ae590d3f370cf ae21ba10c9284c1098768ef5fa36a459
+#: cda4575682284b9f8194769030d695d1
+msgid "Note"
+msgstr "注意："
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
+#: c71daeff1f804dca9c60f4831582b2c7 db7f5f91dd7f4e0ab83e20a9b598d6f5
+msgid "Qwen2.5-0.5B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:92
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:94
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:96
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:100
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:102
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:104
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:108
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:110
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:112
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:116
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:118
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:120
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:171
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:173
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:175
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:179
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:181
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:183
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:187
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:189
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:191
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:195
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:197
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:199
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:250
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:252
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:254
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:258
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:260
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:262
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:266
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:268
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:270
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:274
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:276
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:278
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:330
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:332
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:334
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:338
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:340
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:342
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:346
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:348
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:350
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:354
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:356
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:358
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:426
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:428
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:430
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:434
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:436
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:438
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:442
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:444
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:446
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:450
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:452
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:454
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:527
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:529
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:531
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:535
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:537
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:539
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:543
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:545
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:547
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:551
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:553
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:555
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
+#: 004050b870c44233902f4a931ba6b7ca 0087e64dbea04c55b34161fea6d96609
+#: 0190e138080b4fb593089fe84be67f49 0399ee86047345b1acffeb01e5bf765d
+#: 069f3cf9c545472c8eb97c91487a8b1d 0766ab6e940847488c5d140726dee08a
+#: 0793c4db2ef74086bd006023aa3a0b11 08a9bec8b7104ecf9c915fa1ac32699e
+#: 08f3f6b804764c4ab340a8a2bcd22405 09069c21c8104509a81351b3bf0bf79f
+#: 095d6370b91d4003840094c2a73fdec1 0c5ac64b8e5f44a696e5b87f8d2f63b3
+#: 0d1f4a4beb1a485794685d63452fcb83 0dbdbb6a1aef42eb818f6d5b069c39b3
+#: 0f5c4d9fd646426681623dd784a14624 108f6e63c7034b1ea3bdaf04973a23ee
+#: 1192c4ea981d4bd0b7975cb9e13eb790 11e19467a2a94ac180b991348fcaf680
+#: 125aa1af6f2a446b853e56aa535e1ec4 146303d399174036b90f5579a73d223a
+#: 1555d2ede84d464bbb4f4a7c7ed533a5 16211e628dbe49cb95c118f0ec0d16aa
+#: 18b3cada791c42ffba8fab6e1ef24d04 1b4170ac681943348d70fb6a254d318f
+#: 1bb2f628b8da44c495d60d96e6ffcd25 1c78a9eea00345aa91b222192c61b57f
+#: 1d4652545a5941e789f1314ef8932a2e 1db10d28212d49b5ab99d2999bcd9c89
+#: 1e86c36de75d48f2bf7fe17ccf141a8d 1f751e3b878c464cb616bba7c32efb3b
+#: 1f9deac7fce24242a2b1fad24a7ad0fe 21b59eeabe4e41cc8933470471b18124
+#: 22a38b84ca0446e6b586954179796d9c 22bae4394baf455ea14081d604ba3502
+#: 239dcc4750b34cafb6bdb9db3d4db7ae 244ac6fa7d83498fb0a4b1f8c37243e1
+#: 25ba29127e2d442e87a39161a83aaa67 265a73b6f2a04080ab8e4429a0135819
+#: 26c1bee487744245942604a2102fdf36 272d51198eaa4e029c07a8f8264f79b4
+#: 276dd61cc9ce4ed8819f1d4c4b276e25 2aebfb452340469b96f6cb0f4e4be102
+#: 2bd4124ced3a491ab7e992093bfe733f 2c16e57e4e624ba5a3c689db835f21ed
+#: 2c19ceacdfc14e80ac6f89a282317cbb 2f1293191a0147579e4a7a61d2f3ccbf
+#: 31c609844402498bba0950caa3147513 326a335f9fd1408496ab99278cd82f79
+#: 33ec2936cd324655b0df255576283d18 33fc668521af49ad91463d06c06a51d4
+#: 34c3102a5cb64b10ae415cbe0a0c6c71 36271bdd7a824fa08f23bf2b5e6e9c35
+#: 3668da3bbfda4645bfdb01d2616584dc 36cc902c8bb84c958613b7777ec01741
+#: 37f5f1f0d3364b0a8e78088a651bae12 399d4ec6796f4de69b113db548917d0b
+#: 3a7d0d89a3e641f99d5e4e01bbbd053a 3c1da93dabba4aeaa3f7f6919dc5a416
+#: 3f6bb0dac1234df4824123b6980971df 4014ce97bfc847548fd838ac56805bb1
+#: 40280d577d364bb5897b1cce94948a04 407a049c84e24991bbf4539f7f0e98f8
+#: 4166e398923f4f7f9f2a527db209bc34 43484a20a1e24eba92bdc52e2c4b97be
+#: 459842cae7384ae29bac98ce8af43465 468f21ba39d0464d9034f64a33bce568
+#: 4781f9780bcc48a59b8fabc951ef8669 47f16127806340b1b5e378d2d2c815ce
+#: 49647d7237b6469c91b988807d57ddaa 49de3f271c6441af932ce865eba21591
+#: 49e258e9a7a24e42bd7fac56e9b97825 4b3cb38d23e04761818a5d52b0f115f8
+#: 4b6a1eb1888a462697c96dda4dc07f01 4c3f03d181bd454f81b739dab9635887
+#: 4f3e8749ec11409fbdb0adeca0a5b9e6 50036636f0b74b3bb9722f70dd16e660
+#: 50dff419f5124e9fb754673bbacfd647 545edad7273242a8abda88153bb56ff5
+#: 55829e68ece043dc8c44477b676dd8df 570cd5b5da634719aebe39d28dd52d25
+#: 57391e9f0aa545d98a1efc6a97c35362 5a21a7d629054577b9348fd56ff60732
+#: 5c442dfb0f444ee19183ef58a5e037f4 5d7f40999b6b46be8f37868a70bc4b05
+#: 5e064d56238544d495717e79ebe3fbde 5f3960ee8dd3435f88553f581cb2e7fc
+#: 6036b79650b04956afa4f4f24c2e0705 608d7f7675a047daae27408c22edbf6f
+#: 6145ea1263c040b3ad96a7291d0a1da1 621d77aa80fc4ccc8fd9a77760d22939
+#: 626c7ec14bd84b8db6dd86aec4faab9b 62a40b213f514aab999e9cb7633fecc8
+#: 62a8fc78ee7144c1a2ca545fb657dd00 668236b83e224be3be19b6e1542834b3
+#: 66e7065d8bff4cadba93d7e49e28f5b4 671880fd119e4cf58929436e77679523
+#: 69eed85322114d9ea859383fc4831c75 6a5799c6df2d46139a6477d8d72a12a5
+#: 6aabf57be3a842c986358fb67202c11c 6cd7c973a8974f2096ad6aa2a6132cab
+#: 6d782c8ee93d4e1cbb774d25e5f756b4 70e42e8f4d644a9a83f289143306e97c
+#: 712b9a5dfd664565b68f36d1b33b6fcd 71d9e9cd810e445fb0626d82a766cbdb
+#: 739a24ebc4ab44cea8f4953f3fd9b45e 767f36b1085a49d1a8b98e4a1031d525
+#: 7696d65acdc64f62a5158e7c48627529 7705e6acf11147739a33384048dc7600
+#: 771e205929d8414898f7527d5c595c79 77485309a5bc416491c5f99cba091411
+#: 785876e4d7c84b05a0e0f8e81a6e2b01 78cf7b36819345d58a4ccb0074f846c9
+#: 78e784e57675458181b1f991a8a43653 79e4235309c84f8098b0f787b5d39c2b
+#: 7a49aa261fc1409bbc2d656818ab0ce9 7bbe1d4792674f688756f650dd90531a
+#: 7c754b2ba5d94bc8a5ea81ae146cf190 7ccabf54eb194ff9a1b9c9ec6f1a19ea
+#: 7dbdb15bff4745f5ae78ea042ae5831a 7f5f4d11aaeb499e91fc1848330838d7
+#: 813af099a49747daa0ea5fed33dd8c5f 84c49226f1c24fbc9dafbf0c3507f163
+#: 852514144c594b43b69ec7fdba48d51d 85d73f336c3e41d6a8706576d392abc1
+#: 863c357179ae46a493314006d710624e 87cb21eb258f4f9aa1495845aa7c0d6d
+#: 8b54f7f45317454c963970d180c52a28 8de5f37385bf482887ce9ef9e766f73b
+#: 8f023d6f49c34771bc8491e7c01e204f 8f11f3c03f2c43f2b0f29cd54558b311
+#: 8f9cb9f8dc7a4f26994ad9f091d35081 90234aedb000409d856f4af8e70da1d9
+#: 902ba5445dba4834a5283393790f74fb 90a1e25fcf94459fa8f43c23c2df6bd0
+#: 910e6a083c3a465198b0d1a2b3254362 916014a242664103a85d36aa6ae59e04
+#: 92a82e951f784ee7bdfac0bb969e0870 94abb2956e9c4299803eece1ac3db55d
+#: 97bedf7333234cb7ae9bd9300f1145a4 97e6a617bdd345a2a84dc6908058e48d
+#: 98c8ea14ec4342f6ad48a6d44b5c947c 9c32b5a291a842d8b9364e5ce4b009f0
+#: 9c33b7bf4ea74503aed1f06c4be2c0b0 9d98c68ed8004626b88e434c3c3b6058
+#: 9e57190fe54e45ee9bd1889e8934916a 9e9788a59be14895ad4a291cc5f18780
+#: 9efad049e9874db0b70acdf19abaa9ea a1b4961480ac48bda4ef9aa6bdda2b53
+#: a20a15510d6346558ac724878e74c0ab a211e50976994fd7a59c0c5cc018ce6e
+#: a2bfad23e6c3410db342ccd707c56dd5 a38d38efd2af4fd9877ca7b674a213c1
+#: a5cc981777cb474fbf3e1d5683e55a77 a7877201b1894fcd8befe2f68e9ba1f0
+#: aa15f60707484d998b941223889bb9f1 abf8ea3949f547df9c0f5111e68126c4
+#: acadfaf66d974c39a36694be6a1cdbaf acf71f87b4c3425c93607035b45f5cda
+#: ad81e1ab1288459eb50028ff4a0fcbb2 ae97483a84f545ab82e4dc06050b9565
+#: afe9f511dae942a5b56a2a905db5dcf4 b07e439111cb4ba1af2086e1630476d3
+#: b120a81c778a44c69a50d2735cb982d0 b1b00028ed2047d9933d1676e77d0528
+#: b713b8870d5f40179b8b426de714f7fa b788c9ec22fc464f951f895f4a492297
+#: b853d64c89f2464db907043d7058c100 ba57f3e800bd4318baaeeea1b9b25437
+#: bbcf688e47e54345bf6c9c9627a6dc08 bbd24563c0454119be9e045cab784fa7
+#: bc4120e45b1b45a68022476f0cd2ef55 bc9596de311243d389fb89e169594fe8
+#: c07fad76e9cc4235aa5959331f3971aa c22cfc6129104afb8ac4443ce2466384
+#: c2a5d013f4c54105a2378913d0a0b158 c312db100211463988249a33d602fc54
+#: c400676e810f4f29a954b4a4828098ca c4d6c1531dd142e897ccfd6b04dfd198
+#: c50100eb05d74b86b693426e5b4a7484 c6d0b4d609d74d79b82eea37839516a2
+#: c6de0b1b012a434893ff3c35a2a3de48 c70df173fc574fb48976690b442d0e69
+#: ca1a5ded7d02454686e0f4abdbf46d4f caa5e2cfaaac4ccd8a9eb6b3d37081e7
+#: cab1460f044e4df9a64e0807ed27de40 cb01e6cbbe7247ed8aa4bd0a1cf4fcf1
+#: cb086f25908a4414b81f856e712b25b9 cbd21a73a807432087fb0cec7003b006
+#: cdb8a85ceb644aaab49fbe41d1c4d90a d03876bf54a04fb6a5140c4ad3f8c15d
+#: d1e5555794584a6e99200842c351dd35 d21c6dee16c848939844bbdce74a5354
+#: d2a887ac625546dd901d9b945375fd41 d3eb4d2b08814f8fbe84faec2e802654
+#: d40b413976ad4af5ae7f9757ae7e30ed d40dcbcdf8804d4eba69e30d7a1cf822
+#: d4d5a70f30864e98a78172c9a0113ea3 d755fd82bfcc429391770b19e3e43b84
+#: d775043e1bf74f4993b18c7de1b2c3e3 d7bc2a71d91d4348bc12ffc688a29ba4
+#: d8f45e49296a435699d1f936d8b9442f db78cca7bbc34298808df0929f7a72a7
+#: dbceb874cc684ebc829efb78ce703bc7 dbd465638386487187a9286c7d4ad6fc
+#: de1b0e2fc3f34df6aa18ec7e964a9cf2 de50a5cdcdb143b3ae7d24b43eb12b03
+#: deea3077046f47de9f2d4e3171aa03af e0c019034ac947ff8c977ef6be09a84f
+#: e15573096e0441eaa186b9c6cba15d56 e1b147fcbea6419d91a5043a121aa72d
+#: e4cae171971c40a58bfc7ec2a28b4ccf e4da0a9ef6e14a42a84d7241b21a8153
+#: e53213c4447744ce85ddae8a692178b9 e55bd6ad665b4bc8bf76ff477fa396eb
+#: e6b689b8169a4a1b83b71f68ab4922bc e7c50774cd834ac8b683b35ffed29949
+#: e7ca63b493bd4bc79031d17f7d1fc107 e844389615964d0bad31ea881ed8b66a
+#: eb11c8ee408343cc813cf0963011ccf3 ec494d915f1a401c9ed49ffcb490ba56
+#: ecfec3a7f51d45449be9c025a500777a f0b240fa78604a359268f7eedcfa818d
+#: f2958a2bd82b46f2949562177836aed6 f2af8e82a14e4907ac5542c495c0381a
+#: f2fb9166d14a4053b8e8aa12a81a8c4e f32b3d2eaf4a441b880c12096644e578
+#: f62c6d740ea447ab9d42724109a91815 f62cca727dca4267a606cd3d3365f80e
+#: f7ee0419ce9e416fa310375b0132dbdc f87b949186324acb90d74005743eb4da
+#: f8b4435fd6194b2282d4a35118ed8a0b f920421e722e48e3a97d985ccab4a04c
+#: f9fbc3ad43a9496f802630395f05ebae fe81092c0305472f8ec4accc772c36f0
+#: ffc6c14cdf8248e1b5dfeb0124dc9e5b
+msgid "1"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
+#: 0187a5bc6d5e4a8c921bcdd54a3d86ef 03b2c2084287404390755a31b5b6cd54
+#: 0b67fb96a58e4231b8959ca1daca5902 0be21bc4a5b24a75a9ac8b3d0d89423f
+#: 0fa1d6dda02a4261b9875d8ad6b1d4f0 11f4ebf7b6464d10915fd53fc0fc7283
+#: 142cdfc204494fff9d76030b424cdd01 152d2a814a2c473cb061d4576c646837
+#: 168fa1a1ea534041bff4a5c864f398da 218751b6743240e4a6bc231eb2783ba5
+#: 276d6029aca2416eae04c25e24dcd7bb 278c55fc81584c5682aa615ef5872342
+#: 2ddf2872603c436eb3f8f3d19f52a5ed 2e36dcbb2cc749c6b7fa58428d190f15
+#: 2eec2fbeb78141ea8f86db67523c88d2 31116afdf9804ef188c0126ce36e6974
+#: 32aa4aa420b24722a344b40253922934 3692cf66e46049398f571bbd6f7a979e
+#: 3950ab866ce7427681baba920b24f697 39efeff04b0b4e488ca8a016919caaad
+#: 3f293df1b7b447639e2b6094b877a2ee 43fc110e0ccc43a08cf682a73c63f589
+#: 4bef714ab197418b914953e5c67f35de 5188383c08a44ec1970fb906cc279de5
+#: 526cc34db8a042659adb64736c196a09 5d2671d77cb4494e895418cea5e53679
+#: 5d611913c6de4f21a2bf1fce88a5e84e 639e8df2f3bc426eafab1e2063a22e28
+#: 6fc9cd89d0a64a80a9e0066a256748ad 75659406147f4912a999b44044ef50ca
+#: 77e7a0f551f3410a9e91bffe71872dc4 78c4172d61cb4084ab97224f9b0382ac
+#: 7bae0b3e1c7b40bdb79df3fb6dbcf308 7f807186b8f347438366ba841377d7c6
+#: 84d2124c2aa14116a2c9f4d2f6326ade 858659aafa054a30bcf40194f56c2191
+#: 8b70ee6facd44ee69106e197d8878525 8d30050d007e4d458b435da6f6fd20ff
+#: 9508742224314e3d8bf7af1a48c65bbc 9a96025c586f4e20b05ac7462ba49cfc
+#: 9c7f5bade0194fcbb41e79f6aa2e7c0d 9fc633552a7340fdaf712e704cecd3b6
+#: a0534503fce840a0bfdb8ccbcd09b721 a3d9c4953ec043e09eba2c321463a7dd
+#: a60cf673ac974226a427b91637224dab aa918c2646674ff69f1c5b439502f44c
+#: aad41b758405461b8196b0c674d4e926 af0de692dc8640f7bfdf99aacaf696c1
+#: af8a464a4138489bad6f7f90d2843b53 b1b2207c8cfe431e9afbef68bd0a1935
+#: b221750df1674db2bf98012e2524fa18 b93c23b44d044b07b8b574b17139b7f0
+#: bbf9645f2fda41e7aaba6f40a344d6de bfbbcaff52ae4677965e0fbab9c1f8f9
+#: c06984c0c5de490caac6475b2c1e16ee c1ed51d291a6449793a14a34d49275f1
+#: c6e544a56be8478eba5afd694e8ddbd5 d01011fb8196450fa2be92bbb9bce21d
+#: d66d0d4e6d114fa1be64d2fec0a4e034 daf01a908d364efeb1e8376cd55f84cf
+#: f10b58f89f5d430587d95048325fc92c f1555bdb216c4ec388b369c86005fbee
+#: f21875c9ab86487d9a6b82aaa45b9e6d f2465042fd1144e9b2d054230ce1d4c8
+#: fb46c57902d445caa07ffac3603a4091
+msgid "BF16"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
+#: 7b5e3a36f6a043a284ec5055301e9bc8
+msgid "47.40"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:51
+#: 102c9db1961c4753a2676ef55b4cdd62
+msgid "0.97"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:92
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:100
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:108
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:116
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:171
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:179
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:187
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:195
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:250
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:258
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:266
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:274
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:330
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:338
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:346
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:354
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:426
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:434
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:442
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:450
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:527
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:535
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:543
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:551
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
+#: 05a42541d3f84bbe8907b79c47bce543 1021916df4e7427798c523206178ea7f
+#: 178faa6d7a3342a38d67b46da4b63522 188efaea164f483b90eb54365004d664
+#: 220316d41a1a46d4b0808a2667f708cd 238b44852d64437393daf048ea712831
+#: 241b4c595305446aa668e6e006fe4e85 24aefaacddf24fed9e0beb0a1fe42cf0
+#: 25588df142b741a1abd227117302b4ee 26ccf1fd6d514bcd8cab702b85ece8b1
+#: 27ee842afd36468a92eade93f115e693 30346d361fd2463d9760116c95a3eefa
+#: 31758a374f034777b78e094686b73231 390e0cd6adf24bb486a2b0a74ff38c92
+#: 3adbecaaf65243abb8116084c6ec6595 3ff9d0ac031349f49929503c5c589203
+#: 4090bcf01cf94745876397c133ea6fdf 42a10a939b6d4ab0a00fde66ff3ca680
+#: 4f267f4ef0a74914844823f2fe4e816f 55a5ea1a951248b38303fd1d91fc6f45
+#: 581370ace15f466f86fedab33a34a786 5c920896e5134184b1759f59ee91ff7e
+#: 5ea4f0929c134f0797198abbe4fce56a 68f86e78520844e6b841f991dba260df
+#: 6a138b9cf5794f26a16169ee5e91a1c7 6a56951c490649b8afb0b55046a1bc0c
+#: 6def8a436f634c25b0508e3ce4bf5892 6e26e2f0c3784b328d26c4ac3b66203d
+#: 6e303b01c6e949c58f66a212568ec7e9 73120c72875d479380e11a7d00672841
+#: 75b1e27ff2eb42e590ae7c8886a332d2 7a6ed8da05694ecbb5fd3bbec2614c31
+#: 7bd9324b2fa14079a55b85a44f820c1f 80469796054047b0af8e9a9e2e82b52d
+#: 8aea75408cfb471c847cd130de556d94 954a47c396bb4a62b5366190c5f4bf79
+#: 975ebd2751474e1b9440e2ce6b210dc7 98bc2dca66da4d79b2c1189d93fb28d2
+#: 993ee44c1b984989b8274b4ec8585604 9975c8ebdc5943d7ae087521995fab2b
+#: 9f65d55416db4bf091520145dfc736f2 a021c2a23edc44adaf9e27cad0d7a468
+#: a1d647995e5b46f5b35c284fdbfe1cb3 a2f98f670ad34bdfa7c76d07ed606aa8
+#: a3d433970f674f2a8017c5b50265d04f a3f9d42e41114a75a510ab4e017e70ee
+#: addc7deabf1649f0b2c936dae5036113 af5bdeed13a0450c90b5ed38b56ab4f5
+#: b1e820fe88cb4112bab8db8e7ee331ae bdc855477b2d4b43a93a8244b15c6d7d
+#: c66c6c0aa5e14f158d876a454c9335c9 c9445b8212a344d9a92b28bbe30f92f3
+#: ca5c0273837a433e847ff6d23d74a50c cbca22813c7b43d299ab8e5fab9845d4
+#: cd2ff300c0be4cd491c75c3cef51cfbe cdcf9b8b6e2c4a38bbfa5e6b1ea4f807
+#: ce24199ad7a2448ca6282423adbcffb6 d50c80e831a740c092f8b69785e493f8
+#: e67081be3505499298503f4816e56656 e8af8a99134a402b80ff638d53978160
+#: eb85a4ed2dd54dddb1ebe7e3f270a692 ecb7d1f18ad14b33931c939d234ea1f0
+#: f065709afac441268254a04cd99eef78 f1265510b3b2497089fcfdcacf7b2ca8
+msgid "GPTQ-Int8"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
+#: 11423407180b44be82420def384b765c
+msgid "35.17"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
+#: 1196b144f4da4ab8a9b0fe18247efc47
+msgid "0.64"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:53
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
+#: 013d1b40c6ae40dc99e48dacbb8824ff 095d1882d6d24b0fb0dfc53f379d8ff5
+#: 0bd6672b6511428a8eb758a438b6a67e 166ab5c82ec945009f8918afcb69b4c7
+#: 1e03bb0feded479a85cc843d2552f738 28f60cf44044427caebf7a4fafdb3317
+#: 3967d3eead5f4e0cb86cf7b16aa2068e 4e8af5ada1f54045ab70128edb035848
+#: 53022700901043aea02410df922ff5f2 5c360a5acfc7481baed2b3d488a4d7d9
+#: 5d80c9ec79b04217bfee4150a2cde57b 63750b304bc545f39aa7c5f91c36180f
+#: 6604f5723a704317a898a5091d16a3b0 6aa77d8baea24a3dac1ea7f40d2a8545
+#: 6cc7ee00fb6e40298e2e6c46724822ce 708d3f04e2e149c99f231b28acb7f232
+#: 732df26eebef45b5abe97daaf152b560 87ded1a1e4434310a4a5731403a04613
+#: 8c4cd3ebd6854859822c00816ee5f0b6 a4a413c20b0f4428bacec0409b34edca
+#: be01ed05733e4fde8a4551409d05802d c4d69c7e2d6d4eefb6a089dff2ac1a1a
+#: c72ad980b1da4e398f21ebc9d8884dbd c773fc80123945f7b9fa5e9587acbffb
+#: d62ef19a73d64aa5aaf1ce8aeaecc38a e0268ada69784a11957b5f633a6e1ca6
+#: e4dcd51cdeba4d01b2c818dc3b00f3ad ecf5befcd30b4302aad752ac6cd6d112
+msgid "auto_gptq==0.6.0+cu1210"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:94
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:102
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:110
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:118
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:173
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:181
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:189
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:197
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:252
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:260
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:268
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:276
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:332
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:340
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:348
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:356
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:428
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:436
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:444
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:452
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:529
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:537
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:545
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:553
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
+#: 04e9663c7b2b4e4ea435e4c9462b1790 05f89ea76a7846f88fa9496913a0ae86
+#: 06835d06095549f3ba175adb92811d5b 0898b0a9910e4c29b8b4dc867c6bc1e9
+#: 09135405b9ec49e3b84d0d4acf307245 0e82f54795634a54b7e8085dc3179203
+#: 1d4d4906df9946a98e6791377c812b88 24de7c8b77254d0db35bf0572d07f3b7
+#: 2ac992fb2f3648e7a9d99213337636b4 32b69f934b7a4bdab494714ef936c20c
+#: 352b00561584443da20cb8db2693e2e4 3592145247744da9b4631c97e3457ed1
+#: 38bfcd452ef5411198a156a77b5c9973 3f1883c0e82842a38ccb799b969d8ca5
+#: 4333fcc876f54e1e92f1270471e6acfe 4ccc8a9919454ab19f4b765ec0a8c5e1
+#: 4da28702c6284f58afa6f5a8de7ba695 4f94b1eb712f412bbdb335e0f77bb3c6
+#: 50d883123f0f4fd7bb9ae076610a1d26 51f435f934ad47c0a9e621853a9fe36e
+#: 520c1d30dafb4952822baeae0b522894 521d886efb0541769308674f79bdf1dd
+#: 541fee9d1f0a4ea49fa9ca196dfcf88e 58a72a46fab943da9094a0c2a1651f0c
+#: 5f1d9e513c1d4a9aa75acef36c4d08d3 5f71befd9b084bb3ba870d9f6be161b8
+#: 6dfd2ef4ba5948a0b0eac6d9564e036e 75633e27bff04c3b9e5af9e372cbc717
+#: 77a3660f3817454a9015642780181a00 79d62cc4d0a94f1ba85ce3224e2b0b9d
+#: 7aadfa2355774b30890ac1239643b89b 7ac870470e284768ae4fc1ee573626cf
+#: 7ad8a151669b4e628b0f5841e68504a5 7b6d7832a9634a928c3b122497438d97
+#: 7e83253ea49c4fc0b32c0e9642393366 7ee6e1f43139430c9091d83c8d2fe9e7
+#: 80711f2d88664ec0b54f3341bc6fdfd7 80a97c0c25c34d02b496a6216778cce0
+#: 876f2c85465b43d2b4409c477367cdf0 8828c3b68a404c1aa57b10a11a273358
+#: 8bd922e6938346509b52c125387d1530 8e249c1b08384fdcb341feb200456c9f
+#: 96c0650187e54b808ba84817a250eb84 98ecc925675e4b1f9d5a781ac04473f7
+#: 9c3d4141de45437ebc6ec711dfb6155e 9c96b56a84f449a8ad1588f627356d91
+#: a868d4dd843f4b359540f2fe478ab8c9 abdcbade98114f2eb2f784214633d178
+#: ac47ae4525a04b2088616b48222c1ca9 b29555ec0a604c7386e9625df6ba2d13
+#: b4649b7715ac4b03bea02b54c9d9e285 b9ad8a254a1d4ae582b4988d50dea2a0
+#: bbadc3dfc4c14d96b697303085065f83 be21b0c3fa5448959099882097a72060
+#: be6fcff5e7e34fb09b55d478d2cca1b7 c6fcf05da252434c96a33cf09d68a858
+#: d4d1da8f04fe426bb9bc1c3481627bc3 d535a39ef7924fe3a85e13b9cce2e979
+#: dcbff717b5264110affec708bcdd7924 e51462e7a43849d185e3ce6f6000ba3f
+#: e5c3470a7ba84d11b9b570ef5211241f e6e2e563280d437fbd496fd91f18def1
+#: e79053df14df4cb79676af641d972961 ed529d012a274493a6361e86396a5675
+#: f9091ceacfad4b6abb5273e269dc5c9d fea027dc6c0543a985643fe6770b9200
+msgid "GPTQ-Int4"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
+#: 629feb238755401a90f5a01b5549ed65
+msgid "50.60"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:55
+#: b661a2357b9f4d6292924a6a8fac51af
+msgid "0.48"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:96
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:104
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:112
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:120
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:175
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:183
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:191
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:199
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:254
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:262
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:270
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:278
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:334
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:342
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:350
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:358
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:430
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:438
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:446
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:454
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:531
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:539
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:547
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:555
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
+#: 00ae1e8f6e68408b91120e3f5fe12e26 0278b7327f3f49c29ee45c9669529640
+#: 06a24cece162424391957836c3908dc3 0764598b1ee149dc88066f8083f3c2b8
+#: 09aba84a110640769e2ab231a1751c55 0bc3aa9660074c72be7fac7bc48e9896
+#: 13bb8df9dda54af5903b2c8f2a24f964 1a16e066fe1b445dae9f67d6b9c8b093
+#: 2246463bbf8f4aff8755b11448211bac 23331268f0d74cb7a2d86410d1aae0d4
+#: 25448c3d6b0b4061a1c893e36d675292 283b842a54264925bf4fbd46585ae96c
+#: 2a50122e46d34b04a1fb2eb0a2b9589a 3726e44061a2410cb0ec1f629d44fe5d
+#: 3fe8de4f4e29419b9641479658edd557 40c4374fcf4143999a4d33ae66ba7152
+#: 443a018b8ca44104aa7c64efbd02790d 444ed3fde47a4844872d96ef8ad88913
+#: 466d5f5de058466ea26e610b321a8208 50331b0cdaa94c99b39047d529cbef19
+#: 55b3477fd81845c4bbeea92daabf5021 596916e951ab45e88a17b12788439c85
+#: 5e4856b9ac3f40258cde5b6886069f3f 62a1042ff3564780809aadffb768ee26
+#: 684b05dde1054dfaa497f7c440c1fa98 68e673d95d6a4f02b8a902eb91eda4c2
+#: 695dd775d1f9465fa8b1af17821307ff 6ade2a445a074772a112b5b32088fc76
+#: 705d2af811be49a3b47516274f43660a 755f394f28394b34af28a74d6cd9c290
+#: 7646c4beaab849a19b6c763ad1d182ee 76ceb7a8d2e4401591be7ab6f388d92a
+#: 76d2d27e65fe4c4faddec7b80a607848 7a2aaa7b5d294344826661c522293d38
+#: 7aaca1cea47b42dcb5493436f3827763 7ed659a75a934d84b196534906bd5c41
+#: 82e2b6a540a3431f89c89659bc70ef91 8b0238660306499b9bfba0c282aa7373
+#: 8d4b2b7430b543cd8e55be896f0f3f38 8df02ac807c9410aa8a5026aca90e836
+#: 96d82259bb054cc6add5dc2ddccfaa84 9850ed95902d4b53b34ec4c4f6887068
+#: 9931f4682ff843b0ad3d9a93fe7c9cb6 9c3d1b30462c4f6c81375573756cf358
+#: 9d8503ecd4cf4701a8948df67a4cb00e 9eae857b2a98484989a45ba5041c7ee6
+#: a05e3ae470a342b1b55740466ce00982 a94ee937561a40cd8d5519fc5879095d
+#: afd41c79d4da4daea6fbe3a47a2c9f31 b17f3e5cc47a49559fd1ee6bdba0a8c4
+#: b289b859a08345f2b6615dfe9acd2618 b29109f960fe411f852f14d0cf0e3d96
+#: b7618ed0c17e4f19ba74062f52fc683c b7687cc114fc4e30a49fa8a29a48a516
+#: c3c94a19caff4a1ab7c35b8374c25792 c84272d15fa04c5ca00a0acf8128c0ac
+#: cd3d6429ec534826809dbb32dedcc4bc d1118e19e6cf426f851eb2d01e072fd9
+#: dd32eb658c614de0b24db49fe56c20ca deadb165183a410e8cebe1a08107dfd3
+#: e189016ae67c40139d78c18895eeea03 e73e8540c2124dda9cd3dfe97193c607
+#: ed6288c5910b48539c8e71afe03a4460 fc9f345e86404be1918b0cc814548a6d
+msgid "AWQ"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
+#: a0bc1ca319c84934b36fdbdbba00e465
+msgid "37.09"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:57
+#: 9b5e76fec9c24a9481ea34ff1bc45010
+msgid "0.68"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
+#: 009cdbacf29d4d8e8da4bac87eafa353 1337f79317654239a250c2ac20c50043
+#: 198a10cab1134b0a9985680f07c0263c 3ed34d5274de4bdd8471c653245f298a
+#: 46b45940b42b47ad9b44b4dddc99e653 7cc280d41e8d4fac847af04379dbcaf0
+#: 82d0c36b743544f5ae8169dbce2f2f9e 8daafc001afb45e0b1bd72bc341b3a4a
+#: 8eb73b66b02e47058940ac3c22ae5c17 9ed68035fda444b280cd081922448986
+#: acbe015187b44f1f8685400ee9e33904 acfd3839ab144a7c9236023e68c1f372
+#: f2dd730ce308495fa91f2e1ce0e18bfa ffed67b39b5a4d019c891dceb38cc511
+msgid "6144"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
+#: 7cf82f7c6106450286dfe1932280c66f
+msgid "47.45"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:59
+#: 49982cc89a2f4d8684d2559d86e462dd
+msgid "1.23"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
+#: fab3837a99594d58a869f5a024cc4740
+msgid "36.47"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:61
+#: a7f3f3b8edf247da827869b863ced852
+msgid "0.90"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
+#: e4b624bc551c4cbb88e5f86134baa92b
+msgid "48.89"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:63
+#: 73d8e4c6909245e19ac39990988690f7
+msgid "0.73"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
+#: 56a34310e1b349958b9af2f284b5090f
+msgid "37.04"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:65
+#: d732132274644028abf604cb8c277c54
+msgid "0.72"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
+#: 05dfb65f355645b2899f88fc85a589a8 07f2cf0a7f1e4c61be46b107d74bc7a8
+#: 0830e2d4d07a4e3798b281c08cd558a6 33e08e87f1e34edfbb8b4ac678d29636
+#: 47c9804609944facbf3078e896875a19 5b86e781168442afafbf44741d1e3cfb
+#: 6875d8909e564c338648c3daab46f5c5 71f18016a417403a94fffb4b68104863
+#: 955d295ff9e4460b80b1f78a67befdd8 b03403473d1b4db9be0cc264efe5c836
+#: c910a578448f458abe76f56cf9bc141f d0202d134f4a4b4885d01fb6af0b1c66
+#: d252bf874eb04c339a1ba000ec1414b0 ead0c46dbe3c4ca587064072ac574705
+msgid "14336"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
+#: 8c7a2a48e76a4f1a9d1e7df9ffde1f13
+msgid "47.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:67
+#: 9b6161192b5246179f2a955699063597
+msgid "1.60"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
+#: 4892cd9cbf134174b6ff77465855e327
+msgid "35.44"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:69
+#: 86dfdfb5386a4708aa407c655aa35cdf
+msgid "1.26"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
+#: 88bc181521f042568e62824d7691a28e
+msgid "48.26"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:71
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
+#: 1d2494b97b1c406d8a13174f66d3d7ea 2ada85ef0ac74b9db45a9ab4c22c7f9d
+msgid "1.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:73
+#: a3d015312df8486a958f673dd5540b3a
+msgid "37.14"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
+#: 0ea7051371974a64b62d5c646b1b39f6 2cb7e33db1874c2e8de26725eeb6d30e
+#: 2dc955911e47467ebc6f8d8e8f00438b 41c9958bbe0c45e3a5294ebcc018e49e
+#: 4769ee6f27bf4d7083865d994de83db7 634568d05ed2451b8cd4572e1e3506c7
+#: 6d405bde32d1451083eb8d4f5e07873e 6dba0bf51ab6484b99e7cb69b8305cb8
+#: 7e4e611ad61446d7b5053c3313c76abc 884977f8a1e647bd8251b6322823cc18
+#: a351fe3592f340638d0a2869a4fa2c55 c0cf78c6ce1249cd945895aa1c28ea9b
+#: d529809e409e4fa881727901cd523114 dc44327da67642d5b54a79a72fe80359
+msgid "30720"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
+#: bcb9b58426e44c98b18a89dfa3e52f8e
+msgid "47.16"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:75
+#: d4be552d77f74c72ab635263a9856014
+msgid "2.34"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
+#: 137e804e37154192bdadb0e260e56d7c
+msgid "36.25"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:77
+#: 382b06eb5b9c417ab734b5f8b6d38b7d
+msgid "2.01"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
+#: 5fc275b4a5b04d6d8d8f9f6ab89f0db4
+msgid "49.22"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:79
+#: 9134135b7454451ab3bc19a9726d9561
+msgid "1.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
+#: 9faaa71b541c42aaa6d70d2ddf7c18f9
+msgid "36.90"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:81
+#: 5c7c3192f04f49f09ba1e687128356c8
+msgid "1.84"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:85
+#: a3f6fcee9c5640babc4b5eabf5b88c25
+msgid "0.5B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:90
+#: 8ad6a7a9060646bc895949824e36841c
+msgid "311.55"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:92
+#: 953ee3b503da4795bb1fd49839943b8a
+msgid "257.07"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:94
+#: 8d465e0b7a4b4b55b2a776a7c6c65b16
+msgid "260.93"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:96
+#: f663227e81ce406d92a3fcd830212046
+msgid "261.95"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:98
+#: 83849f7959384eaba9dd51876ae36f87
+msgid "304.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:100
+#: 15ef7f8fd94447fd90b95bab150bdeea
+msgid "254.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:102
+#: 5937b73265a142cb8563aad53eaa3691
+msgid "257.33"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:104
+#: a1c8eb8b702140d38a20264edff6a4fd
+msgid "259.80"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:106
+#: 1d60b9a09997456ba14dc698ae4abebd
+msgid "290.28"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:108
+#: 308528a8430241f1bb3f107f627a0a0f
+msgid "243.69"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:110
+#: 888169c1fd1c405687c3cfca214c31fd
+msgid "247.01"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:112
+#: e40fb50ed73c49a2b3f82ebca1fff731
+msgid "249.58"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:114
+#: 733262faf4fb45cd87d519b4edae4976
+msgid "264.51"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:116
+#: 5270fd0a61ae462d835b6e703e646bd5
+msgid "223.86"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:118
+#: bb8aed8e18f540adb12258701658ea86
+msgid "226.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:120
+#: 5bea8e63075f444a99556dab7c2085d1
+msgid "229.84"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:125
+#: 4c1779fa371543268441473c511eea00
+msgid "1.5B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
+#: 2b47649655d3403c8a7bfa72ad59438f 77772cf114fd416f9bdf97c7f767c7d0
+msgid "Qwen2.5-1.5B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
+#: 2317a3c491824506bce0dc8f308ed64a
+msgid "39.68"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:130
+#: 7b74222442c541a89a196a2d04a1c995
+msgid "2.95"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
+#: af0210f522654f73a772664c99ad3c49
+msgid "32.62"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:132
+#: ebfb7961553b444a9104de2d2a43162d
+msgid "1.82"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
+#: 3f373b8291654d22a305e8ac0b5bac1b
+msgid "43.33"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:134
+#: 75dd937abe9d4daca3b4a89984a9b078
+msgid "1.18"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
+#: 75eae517faca488db106a0dd561dbf7d
+msgid "31.70"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:136
+#: 00604ef3dff44cbfa888a50178fa17c5
+msgid "1.51"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
+#: 6c4f1865aefe42809a6fa2411988204c
+msgid "40.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:138
+#: eda3010cbba844459135a33045effa7d
+msgid "3.43"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
+#: b79801dd0f794042b6eba4194ff49675
+msgid "31.46"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:140
+#: 5bf84b4f616041239731a65e072114e8
+msgid "2.30"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
+#: ecb5328d5a89408e8cad5d279266d181
+msgid "43.96"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:142
+#: 04cf902f0b0b4c9c92e8d6f201e8284a
+msgid "1.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
+#: 4df3df3f06614d62b77b869ce135a90e
+msgid "32.30"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:144
+#: 25d4bc8fd75f4778986da18c567968e6
+msgid "1.63"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
+#: b5986361e2bc442289618d57681d1934
+msgid "40.43"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:146
+#: b56c6512adca45a182e883b279807bfe
+msgid "4.16"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
+#: df39a39653e9479f84ede4ccea98cfc4
+msgid "31.06"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:148
+#: a119b069ab274d0e911516ce73273ee0
+msgid "3.03"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
+#: 01c289f40ab8446796a16d95d97f7a38
+msgid "43.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:150
+#: afe2f3a364394d078c4c52ae6cad5cdc
+msgid "2.39"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
+#: f889474d8d334a90ac4f6cad42d70e16
+msgid "32.39"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:152
+#: 977283c361914a6e91ec57451591f400
+msgid "2.36"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
+#: 1a0a4dfa1c7c4957970f6026eb50870b
+msgid "38.59"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:154
+#: 3b9acf6ca56f4c6395bccf4b1d8afee2
+msgid "5.62"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
+#: 4e0a926dcf39492db47679b90f9302a5
+msgid "31.04"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:156
+#: 9561feb00871463ebc9af8e3f845e335
+msgid "4.49"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
+#: 815fadc5cb074cdfa15a0bd9b949f6b3
+msgid "35.68"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:158
+#: 8339dd4bc73d4ebabfb9813ae6a49d67
+msgid "3.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
+#: 08811d687ae14e09bb336f5337cf00f5 279a452c591847d4a64758e9575b4d27
+msgid "31.95"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:160
+#: 788fede2692c4c68a150c04aee3af2cc
+msgid "3.82"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:164
+#: d6835e02c1564c0292dc6c3055603b49
+msgid "1.5B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:169
+#: f3dd9fb7e85940a994f0724992eca2aa
+msgid "183.33"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:171
+#: 4d36e27e41144a29acd66b7a2eb8264e
+msgid "201.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:173
+#: 566569eae862413eaae62378ec60642f
+msgid "217.03"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:175
+#: a0cdffa7e2834b8bbc66a0c5e750155c
+msgid "213.74"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:177
+#: 17fe460794ef429baaff0fc7d7385d33
+msgid "176.68"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:179
+#: c28483d5aa174366813d5139a7fae967
+msgid "192.83"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:181
+#: 78ee7e3f9f784ae696d736ef998eea2f
+msgid "206.63"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:183
+#: 307f7bf98ba445289ca5214054f226f0
+msgid "203.64"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:185
+#: 517cc68aa2104f54afd4065e0bcbd2dc
+msgid "168.69"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:187
+#: 705b401bfbb94547bba99d7497377493
+msgid "183.69"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:189
+#: e55b3f9c71bf4a408c0edb40142f1a70
+msgid "195.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:191
+#: b18ed4384ac0400093621b744a79d07a
+msgid "192.64"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:193
+#: 288f88b10f164d488934b44e52d6f7f7
+msgid "152.04"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:195
+#: fef494f0a7864f20a20ecc28d38ed49d
+msgid "162.82"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:197
+#: cf34ed991cf34bfd9dbc65998268fa0d
+msgid "173.57"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:199
+#: 7b4e77ed996945e5b11210aa94f1164e
+msgid "170.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:204
+#: 29612d8d31cc46f09b29727561c51155
+msgid "3B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
+#: 5daa7c1e15b44259b8c76ca8e835c9cb f9988b1c6e94450ab0a9643847292d46
+msgid "Qwen2.5-3B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
+#: 0807d2037b47455297ab3687fcb3c33a
+msgid "30.80"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:209
+#: cc3af58a90aa4bd5bfa750b3acbb6735
+msgid "5.95"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
+#: 49a46da1fe0a4ccb9970a235cb439876
+msgid "25.69"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:211
+#: 8b4a4859a0c1406399000f00bc6db354
+msgid "3.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
+#: 52096558c50b49958c30aeac722bbf42
+msgid "35.21"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:213
+#: 1cc8a35645d14e88aaed0d02e8a85236
+msgid "2.06"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
+#: 957a98992c524bdd84a056b7bef8ddfd
+msgid "25.29"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:215
+#: 689ecba4fab64dba81093564589bb594
+msgid "2.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
+#: b2cefb3935a84b2397035912535b597e
+msgid "32.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:217
+#: 51964695c1304e858960a1e2c6da46e3
+msgid "6.59"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
+#: 9d3e132d27764a5b8c696c976ca3f781
+msgid "24.69"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:219
+#: d31b90bd313b44a2bb03ecc5339df340
+msgid "3.98"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
+#: bbac59966e2e4a3b8d03d32578d29234
+msgid "34.47"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:221
+#: 07c1e6d2a08f4f909bc19cb9641b62c3
+msgid "2.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
+#: 21e61804c449444d833d4de19839348b
+msgid "24.86"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:223
+#: b892f6ef7d8d46bf98e48e62006eacbc
+msgid "2.62"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
+#: 31adfd13385f4c6195c7f346debdfce2
+msgid "31.72"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:225
+#: 97102e8713254b7bbb4cc62df3dc13ef
+msgid "7.47"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
+#: 5422d3054c0048ac81914ac7becb9b42
+msgid "24.70"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:227
+#: 23ef1837888b405195fbc90dc8bcb571
+msgid "4.89"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
+#: cce3841d106e4fe0a359b33edec99bf2
+msgid "34.36"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:229
+#: c6a0812d0b0b4f8f9ddb656f52e3731c
+msgid "3.58"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
+#: 6c889655c3cc43a4b167132e64a2f6f9
+msgid "25.19"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:231
+#: 31a4bb37f23b4e4d9437f8dc779b6472
+msgid "3.54"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
+#: 96af50cc30974e10a27cf14a2da4811d
+msgid "25.37"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:233
+#: 719843655d5c442cb10b7b5aa4bd2a97
+msgid "9.30"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
+#: 70547ac36f3f48238456a7ed58f78688
+msgid "21.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:235
+#: 42910a5aabf540b4bcea07b02a851032
+msgid "6.72"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
+#: 978ca61e6d0a4998b64c3154dd66b1d4
+msgid "23.60"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:237
+#: f73642b4e2394fdc9668c9db25f52df9
+msgid "5.41"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
+#: 04165880d1684173805853bf7259bcc6
+msgid "24.56"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:239
+#: 34569f818f834c5c8ff784b6ffd99c93
+msgid "5.37"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:243
+#: 220814be8142425e84766a0321be4d5a
+msgid "3B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:248
+#: f2d9ccca39114b63a5286a6611de8125
+msgid "127.61"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:250
+#: 2a077e8cd8d94d8785264d4cbf9a68dd
+msgid "150.02"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:252
+#: 0400ca4767594ecfb4defd558692c824
+msgid "168.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:254
+#: 9d3b059ac69b4a5591ce3ee520a96029
+msgid "165.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:256
+#: 3285226c40d84875a54f59c295b551fa
+msgid "123.15"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:258
+#: 8f9f85faeaa2427b809dd31490586b5a
+msgid "143.09"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:260
+#: e096b5debc964ee0878c772decc9422b
+msgid "159.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:262
+#: 5486a0cba5304a6099702797d0ae549c
+msgid "156.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:264
+#: 66691e93c10c4678946767c7f309da04
+msgid "117.35"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:266
+#: 0671808a30884988a2ecbd069766e7b6
+msgid "135.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:268
+#: bb9c6c1190bb40d0bbff3191e0c4498b
+msgid "149.35"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:270
+#: ef98f5b0376148008b73c5aecc2fa63b
+msgid "147.75"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:272
+#: 53231a6c550840d8abd9bcd6938a51b9
+msgid "105.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:274
+#: 1f6d3b325eb24c659f1d9c3620c8628b
+msgid "118.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:276
+#: 548feaba1e574b098b90cdbd84ebc399
+msgid "129.28"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:278
+#: 9199d71ff5174e8d85d70a23ae7f3ec4
+msgid "127.19"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:283
+#: 56e99eb6d72c42689aaf59d789826803
+msgid "7B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
+#: c59531af032446c18bc0e83534193b9f df1166c61309496aba079d93893e2c73
+msgid "Qwen2.5-7B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
+#: 24c881a74ac0458cb5550fc0900bba79
+msgid "40.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:288
+#: a7cdbcfaf60b45ea8b6cd5b52ac0d746
+msgid "14.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
+#: 4a9bf7b3f9e74ceabfc41137dd140a7a
+msgid "31.55"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:290
+#: 38206be6768b4b9880a2182c3c6958f2
+msgid "8.42"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
+#: 8cd20063c79e415fa8db65f73f64f0d9
+msgid "43.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:292
+#: b37f8a8bcd6540469630d71d413e1c74
+msgid "5.52"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
+#: 8a451541cbc24efeb6cf91ac421d42fc
+msgid "32.03"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:294
+#: 05f7cc11f8ef46a08908b3371001e6d9
+msgid "5.39"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
+#: 864b5a9dc6844aa28aea1a1da8635051
+msgid "38.76"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:296
+#: 7c6500b87f8b4a22b889c6ffce0c0e87
+msgid "15.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
+#: 64f2c38e7f8c4166ba938e9bb4466d7d
+msgid "31.26"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:298
+#: d897f533338b4c62bb0d70069a758d32
+msgid "9.43"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
+#: f33b03aa4b044164b3d2d75c04d8006d
+msgid "38.27"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:300
+#: 329c1e0c94ea434987fbe24d75cd6a2e
+msgid "6.52"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
+#: 7677046faff74313ba2c46c62cf21263
+msgid "32.37"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:302
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
+#: 6284f4a2d24243a28a7303edfa551656 684608e8456a46eba2a061ffdfc03bdc
+#: 7575637305cc49e5bbd65b8c64f102cb
+msgid "6.39"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
+#: 20c6ee40a980497da9a42c6903bdc5ca
+msgid "29.78"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:304
+#: 217fcd110e054ed09b81f583efa7e226
+msgid "16.91"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
+#: c275314a31984fcba2f269f2ed665919
+msgid "26.86"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:306
+#: d48e57d65be94896a7a354e5a3adcff0
+msgid "10.96"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
+#: b3849871d8be4810a4efd55f90d72758
+msgid "28.70"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:308
+#: 3b30c8cf22174fbea359eaafd3d80554
+msgid "8.05"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
+#: 6a26d4e6932c47ff9488adbb09892983
+msgid "30.23"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:310
+#: aff7f4b31ebd41dc998571d2485f7c66
+msgid "7.92"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
+#: 52197e28b9be4c4aa292556a598a6842
+msgid "18.83"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:312
+#: ac2bc04160e54ec392561dc3fa1b62a6
+msgid "19.97"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
+#: af03dc28d6ed4191b76122d847700d65
+msgid "17.59"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:314
+#: a74328e0664e45439e47cc1edd31ac5b
+msgid "14.01"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
+#: 62f32fcb14ed4047a9f6c05cde7f3139
+msgid "18.45"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:316
+#: bf83072778ef44aeb4ff280d3e8e724a
+msgid "11.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
+#: 8596ffd1e39b4879bd3fd934d22974ce
+msgid "19.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:318
+#: 19eabac83a364622ad98eef88909d151
+msgid "10.98"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:323
+#: 03be9057a5634dceb5abd12098d1a94d
+msgid "7B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:328
+#: 8b6c550accd74ab7b0376fe5d4f7b28f
+msgid "84.28"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:330
+#: 893e8c4f6ad8422395a14084bda0dcb9
+msgid "122.01"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:332
+#: 0ff410dada7c4eb09377e4e643821669
+msgid "154.05"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:334
+#: 871ced1c25884d8fb340ce3e420bb681
+msgid "148.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:336
+#: 8b91ba7dac544e1e865406047454e505
+msgid "80.70"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:338
+#: b36155ad7a1f4f46886b69202391a107
+msgid "112.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:340
+#: e83968912eee4144a2a619fdaf2b2e23
+msgid "141.98"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:342
+#: b1e09afc9e444c13a87e05e51533ed03
+msgid "137.64"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:344
+#: e805d3fd9b7b4df88306d9d8d031fa32
+msgid "77.69"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:346
+#: 5288955cf8a749aab0f29781c0a01eed
+msgid "105.25"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:348
+#: 09f607e74caf4469b406af624b1954b0
+msgid "129.35"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:350
+#: 1bdd8e6d9c7d412d9d9c262634ba663c
+msgid "124.91"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:352
+#: e3842b73312347fb98c58b26e3e4503f
+msgid "70.33"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:354
+#: de12b0eb3ea14e33ba09e1efaba35c15
+msgid "90.71"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:356
+#: d869b4da1b9d490ab63ab7b63d311811
+msgid "108.30"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:358
+#: e725f7181b764adc97d2dbf3aa04a97b
+msgid "104.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
+#: 50ce6144945049c08b89fae7f1096bc1 538e4f4bfd6647d0961b53aaeb93cbeb
+#: 6831225e1c2c4493bbc0b94f3284f2ae ae2971e3d03642c8b87d9996a792874e
+msgid "63488"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
+#: e0ccc29135df40a8bc52e8eaa7047d0f
+msgid "50.86"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:360
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
+#: 00c26ad47e7e483f812f1a7686ec946c 115475fff86d489f9da9a050d580ac35
+#: 39e209cc54ef4f249a9e10b4d2b59e9a 471c2edbd90f48599720e4835f4ceccb
+#: 5aeecd3e741e4f7eadc58ec109bcc517 6378f5dfc0c748049349bfe281ede172
+#: 67b476001be94ed1939d90892d4fa792 8512c45d834c44b3bc900ea9b03da35e
+#: 87765daec63740a2928bee81a093ab4e a8599ff0fa864ff89a5536828a23bbd6
+#: b6e42477ae4e489bb80dd0da67e257d4 f3b86ac794b9432182cca62e582e218e
+msgid "setting-64k"
+msgstr "[设定3]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:362
+#: b778e2090cfb4eb0b4fea135fbe7f018
+msgid "60.52"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:364
+#: e8932a3264da487a9d21ef0d46f46ca8
+msgid "67.97"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:366
+#: 4b0a406a6de24339ac3d150668f236f9
+msgid "66.42"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
+#: 4b0bcf340b13449d925405d1b6146c0d 4ed666fe3b7b4f2cbeb3724360e7f873
+#: a4c73d02cdce4822af91add6cbfdc9c1 d84244305719472ea8ea3aa5b1f96026
+msgid "129024"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
+#: 88281dfdb16f49f196441f8fd83a777a
+msgid "28.94"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:368
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
+#: 0b5ace22aa334ae293e187c274d24af3 23c9d9e6b64547238214fa6855a7f3d4
+#: 287f483703d345d7a197ca9664e0d209 6dc7a99a3fd6403da605b1b4fe3ee0f2
+#: 8a87d97b5f254259ab06b8cde9cb9d31 9af99966e7a14e89a7af2aa01537a0b2
+#: ade5ee0f233f4532902eb97817d9faaf b24e425c9d094dfc800f8f9be1b686c5
+#: d01c429ad2414cfda68d399ae4b1ef3c d279d55974c34202830512a83958f0a7
+#: e829a4c032df4e5e9fb1f15828f19920 e8558dccdc7c4071b9d655f86001bb52
+msgid "vllm==0.6.2, new sample config"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:370
+#: 3049540f3214425987489dbbc60f25e5
+msgid "25.97"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:372
+#: 7999f5b1856e44f791d1b050f0db8332
+msgid "26.37"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:374
+#: ee69d83c04864081b5a1b15cb8db3b04
+msgid "26.57"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:377
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:473
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:577
+#: 241f00f91e934d029960f03c27bd3739 c4780aea46374bdbab1a7ad7460f9691
+#: e26e5864d5c84d888cc9a4ee09f2209e
+msgid "[Setting-64k]=(gpu_memory_utilization=0.9 max_model_len=65536 enforce_eager=False)"
+msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:378
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:474
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:578
+#: 0dd68ddb0a5645c184f2f9a378c1843b 8f9d36ee815e4c65b6ee9c4bdd6a701d
+#: df4ab6ac46aa4631a3b06c2d2409c0a5
+msgid "[new sample config]: for vLLM, set the following sampling parameters: SamplingParams(temperature=0.7,top_p=0.8,top_k=20,repetition_penalty=1,presence_penalty=0,frequency_penalty=0,max_tokens=out_length)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:380
+#: 92b7bbe75de343ca9bf5dfe34e1eb018
+msgid "14B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
+#: 33d36e38b5fa4949b8de55c2bbaf132f 69f3d88ca3cb435f9ad699700e9d8b10
+msgid "Qwen2.5-14B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
+#: 338933e55828474387d3671533e441eb
+msgid "24.74"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:385
+#: b5c2b2011e1740829a9ab848c9d1bc67
+msgid "28.08"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
+#: 4bec6d1379e247829df14b913f05108e
+msgid "18.84"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:387
+#: ef756d9d57cc43558d26a2e2cdcf5951
+msgid "16.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
+#: cb18696826a140f4b856bf7e45092436
+msgid "25.89"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:389
+#: 9dfab6d9504c4c0da73b38d301aa9bff
+msgid "9.94"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
+#: 66c1a4c3ee854d578fbffd38590a6173
+msgid "19.23"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:391
+#: ce33adff743948feb696c33af18a3766
+msgid "9.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
+#: 34c1a8fb645b4b9a9fd2d8ce5a67feab
+msgid "20.51"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:393
+#: bb5678c644b541b6a0f5fae43d7c0526
+msgid "29.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
+#: 87bb23f9e2ac49819434e5ab076922ae
+msgid "17.80"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:395
+#: f0962fa7bf5741209577003ddb4cf5f1
+msgid "17.61"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
+#: ab83231e92aa46f5ba1e3f62648318b8
+msgid "20.06"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:397
+#: eb1cdd4e04d64039948029eaf8b4a00b
+msgid "11.36"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
+#: 3dcefb1d53274627bca7e625eee85869
+msgid "19.21"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:399
+#: 8a1e9eaddca74d09b64f935a94595c67
+msgid "11.22"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:401
+#: 2f6eb0bc7f424e6a9f0ad4460a7a4efa
+msgid "13.92"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
+#: 1c1d115e36f44119a97a89ac206e52a0
+msgid "12.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:403
+#: e3593e304bbe45bd8418842fd851580b
+msgid "19.98"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
+#: ba374bdd369642199b56cfb2a9d0784e
+msgid "13.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:405
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
+#: 0cd9daa65abb42728d9dcdce24e351af 5bbb032dcaff408fb538e1491f165423
+msgid "13.81"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
+#: dd59a5da243240f98a196c8a8ad75954
+msgid "14.17"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:407
+#: 701485096f6f4c1bac865dfc343f9aaa
+msgid "13.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
+#: e93d854562b343ada25beed98c702c4f
+msgid "8.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:409
+#: b43df19558bc4526b4e1c7eef86950e1
+msgid "36.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
+#: a28d25884edd4b25a2c6609a0831b244
+msgid "7.77"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:411
+#: 1cf634f3eead4b63a3b18221ca1cc276
+msgid "24.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
+#: 04012ce321284d4b92301661b3d7f54d
+msgid "8.14"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:413
+#: 691467f010c140289335343b4e317885
+msgid "18.71"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
+#: 1bcbae57b17f4171901b2cfa61ca31ba
+msgid "8.31"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:415
+#: 1a1f2770dcf34fb08318658c872f2cc7
+msgid "18.57"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:419
+#: 694c73bc709841779cd45459fb251729
+msgid "14B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:424
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
+#: 190675eb767d4436a465e37c10dbfbce 64048cd8fa354dff9ec1c8098a9c5d6b
+msgid "46.30"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:426
+#: c309b01f907e4a4889aaab7c03d77c7a
+msgid "70.40"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:428
+#: 34e73fd890db4b92a10bb2787ba967ea
+msgid "98.02"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:430
+#: b11faf7833904f608b8f25e96e33a7e8
+msgid "92.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:432
+#: 45bb4137b3c544ec929b1d1b5eed109a
+msgid "43.83"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:434
+#: 6f1d8d18f22e4df38ea4ae324bf69111
+msgid "64.33"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:436
+#: ec572c9bc33d419e8e236fc839861a42
+msgid "86.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:438
+#: c3a1bb544325403b9176ed8e196f9aae
+msgid "83.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:440
+#: 9a0d2d7a5ec040819269a01d9b720eee
+msgid "41.91"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:442
+#: 0d8b29f114624260b87d1db16c8ba5bf
+msgid "59.21"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:444
+#: 283793d00d9d4461a627c46ad889ff1e
+msgid "76.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:446
+#: d415f7ac9bdd4d86b97d814b4928e53e
+msgid "74.03"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:448
+#: 7d901c10446e4eb8a2cad830947e3474
+msgid "37.18"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:450
+#: 974d590a66ff47bfb0b790dfbfe8fe8c
+msgid "49.23"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:452
+#: 529758ddb0d14b4e8fe69d88034c0829
+msgid "60.91"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:454
+#: 3e4501a402574108a70cf1cb72d7623c
+msgid "59.01"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:456
+#: a9d4b6a6e1f544c28c871147fee7e46f
+msgid "26.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:458
+#: 9f5ea50357aa474c8142f12b8f3b870a
+msgid "32.83"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:460
+#: ed09f6b07ff64e35bdb359f1d3b66370
+msgid "37.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:462
+#: 8b91ca3412bb473abbc0ab2eef544d38
+msgid "36.71"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:464
+#: 0ad36a5039164677b1fb1f1c1b83004a
+msgid "14.53"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:466
+#: 4b0e26b3f1df41a8be6ef38bfd3e5604
+msgid "15.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:468
+#: adcacd971a064a03a19874f639fd2804
+msgid "15.13"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:470
+#: 08f646ac94cb48ca80d86d35bf3aab59
+msgid "15.25"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:478
+#: 6187de60c7674655aa4aeb929a040681
+msgid "32B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
+#: 87e9496da06b46eeb9eb003821e17fc1 df6f5ff00cc3491a8d44f70f76a0ae22
+msgid "Qwen2.5-32B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
+#: 1187e6c07e0846c4ba91019dec6fd881
+msgid "17.54"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:483
+#: 0b57fc4aed17424d8b2468ce5f116895
+msgid "61.58"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
+#: 8a7ba27ab37547f6a6bfbdbb0b62fa2d
+msgid "14.52"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:485
+#: 274d30ddbed14e839a7942ce65bb25f5
+msgid "33.56"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
+#: 73b5e1fe4ec0451280181b26c43b0f61
+msgid "19.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:487
+#: a5ca5659f3424b86be625ade3a7a60be
+msgid "18.94"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
+#: 00ab7a5ec2484fb9b0ce1ca7e11fec0a
+msgid "14.60"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:489
+#: 50a6bf9efa544f0a8efcc4d0adc892b5
+msgid "18.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
+#: a6f66b46d37447fd9c1ded781ebc56bb
+msgid "12.49"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:491
+#: 7d4f95aea3f849b0a329135057b8097f
+msgid "63.72"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
+#: 7d6c5d94c54e489aa17ed42c7a907f4a
+msgid "11.61"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:493
+#: dd2b7801beb94b4baea41b036dcfc09d
+msgid "35.86"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
+#: 80acdcef63444172b32bb11548e0df1c
+msgid "13.42"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:495
+#: b8ba8f9c6830492695568619711000a9
+msgid "21.09"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:497
+#: 29ed0a90af354532a68f101dd869a740
+msgid "20.81"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
+#: c8e9d0507b4f4f5bb1db5dc19d905a6a
+msgid "8.95"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:499
+#: de277b24e3a543118cd4924d7369cf48
+msgid "67.31"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
+#: e50b322f12284aeb934a9ece4851ea57
+msgid "8.53"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:501
+#: 8f70b6466fbc40b4998021d901b00712
+msgid "39.28"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
+#: ea984b7052b744b88ac7126331ca259a
+msgid "9.48"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:503
+#: 4628bc748bc049baaf3ba35256e91f23
+msgid "24.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
+#: 8ad0734da7ad45e4998aeee24bfb6cbf
+msgid "9.71"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:505
+#: ad0f34cbeb904db9858c4d5c91ca2468
+msgid "24.39"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
+#: 20f4237587b14cb88a8da4ebab7587e3
+msgid "5.59"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:507
+#: 38d6938125aa4a2da58da0423628e399
+msgid "74.47"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
+#: 5c7c9721136d4be1b730b73a440b2ee2
+msgid "5.42"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:509
+#: 2a228f8a1096453cad463f101d2da912
+msgid "46.45"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
+#: f277ef5442384d39952e77b11802de9a
+msgid "5.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:511
+#: cfbda016e2694bb2917c2500bc8086fb
+msgid "31.84"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
+#: d660478e5a494a1089f8ca771fa6f6cf
+msgid "5.85"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:513
+#: 15394742cc55409bab9ebca52602b265
+msgid "31.56"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:520
+#: c3a65729136e4e3aba49f12c6162955e
+msgid "32B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
+#: dc5d038782af4181a9473a2606134b14
+msgid "22.13"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:525
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
+#: 068b2f72584d47b39f1b5081a3ec41d0 39003cc5472c4ee281132b0bbae2b0fc
+#: f8bde774b59644778436b7ed1b1220f5
+msgid "setting1"
+msgstr "[设定3]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:527
+#: 544f5cbf0b91431e96492201661352a1
+msgid "37.57"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:529
+#: 9f7fa40d209644e5ba834e4548f92cb9
+msgid "55.83"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:531
+#: 88cf119d087c48b389bd01b4b1e488c0
+msgid "51.92"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:533
+#: 6463b2e7cb3e4254b7b34c767e2951d0
+msgid "21.05"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:535
+#: dcb89762e0764f77a074e18b56cb040f
+msgid "34.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:537
+#: bb33a31e88cb44fcb75486b2093626fa
+msgid "49.96"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:539
+#: a6966da4828342ff9f9498eb53cf3050
+msgid "46.68"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:541
+#: 3767dac4fecf44e684b4c85e623ab467
+msgid "19.91"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:543
+#: 93963d97949640d38f56ff2bb94d5e65
+msgid "31.89"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:545
+#: 4d5070af41e14faf86dd896ea915b64a
+msgid "44.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:547
+#: c57efbb51fd84f5f81d36f6a7d30f411
+msgid "41.83"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
+#: 04a842bad8354ed2b67dbef03040f428 168304fd8ba94efbaf4d7d2141133392
+#: 198178cd5a7747528f3bbd10ffce8c9f 1fd3e4fd78574a6897f656fc7cf1c21c
+#: 21b19938194946bdb1e3c08f9a9e65d7 252accfef1f043fa94db8f7416fb82e4
+#: 29e616ccd970481182ba77bb48e2cf88 34a7967c3941474e8995bf3d72af9b97
+#: 3fa2c7d0eec845a39e77223d196cc55c 664d31a65c3942a5af01983a23e8c627
+#: 69036f8a038c4f40a5637979d42d7607 731e5467166a4342af42cd0ac51d84cc
+#: 747f5d35f48542b39a49ce0a24a0d687 75c3543f06234cf58511661bb60a0c01
+#: 7d5749971c8b45dbb6b281780869a053 8416e10a86144cd394e459eba35fd8f5
+#: 883a579d68a3465090857db825c1a91a 889e6b5c831b4804a18884b2b1203db7
+#: 965366c2247a42469424a9a3d5c44988 99eb73e79fc448caaa84fd2f17964b19
+#: 9e77d837d40448788b772fe0270d27d7 a03f7a47720b48b580c5abad8c8e826d
+#: a12d87a0fd20484c8292bcc9168f5512 ad18e58b25b34e349f03fb0b8330f325
+#: b738b0e096e64a66b11e24be110ee277 d48c3ca204334fbc94b149f143dc7034
+#: e8147654e27547cda3404b254cab3a9b eefe53745e72455a86572ad53b69b056
+#: f4bf1185016f4e7696e2c0592397a669
+msgid "2"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:549
+#: 820041e86bfb41efa8dc85841617881a
+msgid "31.82"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:551
+#: 5e4ef5182e0f417d9d2c9405ae51110f
+msgid "26.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:553
+#: 435f106050674ed982e291a6a96dd31f
+msgid "35.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:555
+#: 59165f45acc04f5ba9db908e1f707c5d
+msgid "33.75"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:557
+#: fe4921cfe9ba447bba5428bb6d68477c
+msgid "24.45"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:559
+#: 2ceca97bf1424b1b9f5dfc08eb74ed11
+msgid "18.60"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:561
+#: 30bf0c9cf06146a1a470e4c5a75e9c28
+msgid "22.72"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:563
+#: e45e23f08f97407781920b4ef5d2b5a6
+msgid "21.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:565
+#: 2d31bf4124644faeb2d0265c86b76b98
+msgid "14.31"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:567
+#: 4bad5bc53fa24071ae79ff3a66e2b4e8
+msgid "9.77"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:569
+#: 59fc78b4a54e4b30b83590c0cf55d2c2
+msgid "10.39"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:571
+#: acde06dfa1974cd5bba88b1f91efb950
+msgid "10.34"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:574
+#: 942b8b91b6b045278ab18ef225e4e716
+msgid "For context length 129024, the model needs to be predicted with the following config: \"model_max_length\"=131072"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:575
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:683
+#: 61b86605ec9948e1919424a27af302d6 b72c77ebaf7549edaeec155dc4303e17
+msgid "[Default Setting]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
+msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:576
+#: f9a7440af9174bd9ad6b7cbb9a446e4c
+msgid "[Setting 1]=(gpu_memory_utilization=1.0 max_model_len=32768 enforce_eager=True)"
+msgstr "[设定 3]=(gpu_memory_utilization=1.0 max_model_len=8192 enforce_eager=True)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:582
+#: 46bf81ba73f147dcabb6849cf3201052
+msgid "72B (Transformer)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
+#: 91768f34ee6b4eeb94cd5da42c26c0bd e4dd97c84c0c403390c0c69610b73e0d
+msgid "Qwen2.5-72B-Instruct"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
+#: 5c47cabeaf474ed588d4c38e4b028097
+msgid "8.73"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:587
+#: 569f321b558d44a58839acd828ae96bd
+msgid "136.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
+#: f0413098de2f44b0a7234d37fa59b654
+msgid "8.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:589
+#: 683101e8658a4688bdc08a0558859f7b
+msgid "72.61"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
+#: f7036d248182477e92c7a7abbe395646
+msgid "11.07"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:591
+#: 61f64731ce2f47d4b4bc2cb0a21778d4
+msgid "39.91"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
+#: c36abd184cf74037b12e523090cce321
+msgid "11.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:593
+#: 5317e202c5334b8c93483684f289fc66
+msgid "39.44"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:595
+#: 0b5dcb5053134e9c862e3b7d8c032fdc
+msgid "140.00"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:597
+#: 0c9a76f57fd044ceaab57ffa3883fac6
+msgid "77.81"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
+#: 1875133832db435cabd15c0bc45dee3e
+msgid "7.56"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:599
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
+#: 3d2bc391914042148de39eefb840b94a 8c64b1e0e32b48aaa1a7da4968576ca5
+msgid "42.50"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
+#: 2cdd652eb82b4b959de116c4ef1594f9
+msgid "8.17"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:601
+#: 1145f18b41564ff9a3701ee8f5b91ae5
+msgid "42.13"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
+#: 10f2b403b1ec4c52b2040310779aa010 4b4fd3701363487bba52acab22070c3e
+msgid "3"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
+#: 15752ef061a546a4830e0c0fa3746623
+msgid "4.25"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:603
+#: be9753117c304f6b875beb241499acea
+msgid "149.14"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
+#: 177bfb7606df4fa9b2c4d76a596d094e
+msgid "4.66"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:605
+#: 8971c615f1c843669406e93620fb83c8
+msgid "82.55"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
+#: ec651a2587994931a71cd584c6b60106
+msgid "5.27"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:607
+#: c8b14125b2d44cceae756b97937bc26e
+msgid "46.86"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
+#: cb2116264ea54f1f9c8c504468545115
+msgid "5.57"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:609
+#: 198f6b99f1614dc7b14682cfefccdb80
+msgid "46.38"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
+#: 0a2ef7146f834160b307581169403a30 763d64c6856e4e04bf25657020aaefc5
+msgid "2.94"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:611
+#: 994398398e3f45e39df3773604ec3a43
+msgid "164.79"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:613
+#: 3129aea7c75d41729a42ea7e36a0ab12
+msgid "94.75"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
+#: 75afd12832eb494192acc82a5dedd6b6
+msgid "3.14"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:615
+#: f1f6cf8f615342819b23c30082622676
+msgid "62.57"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
+#: e465aac3db0e4e71a3845e20cb791677
+msgid "3.23"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:617
+#: 8e595b1b68184a97ae29e53d41b5be09
+msgid "61.64"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:623
+#: 9d4bcba347274fea92f019a978acfc75
+msgid "72B (vLLM)"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
+#: 6c68033a6d6c4555b638646e8b30755f
+msgid "18.19"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:628
+#: 16ac2284befb476aaf6f61e6757ce46a
+msgid "Setting 1"
+msgstr "[设定3]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
+#: 00f47e95a8644e729a215fbab8f79533 1c65888e78d44caa964b2b2cc9aaddbc
+#: 3313fc64770e48708415a978295295ae 8a023809a0ae461e817412886a66c234
+#: 97ca85cc013c4eef83f7c1f12dd899d6 ac102e30612c46528f80a3346c093b68
+#: fbca157eadbc4436ab24c66bc43cdbd1
+msgid "4"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
+#: 3394960b4ab8479986a994ff3cb09a53
+msgid "31.37"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:630
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
+#: 15e47b765995446390cc6d9a8f4181df 1b107cbd953a4334809a06174b468034
+#: 2b4282fdc8904d8794efcf0626b35030 32619cf0332c4e21a62b076dcf6b2a6c
+#: 429fb1a0ecc441b093ad7896513abe06 477d6a3bd1794bf5a1b8d7eaab6a0be4
+#: 64212f42ec2940c0b83de99b44dd9eac 88d6a99ce03c423b95320c97a022e0f3
+#: 8d7678a0072f4d058f4335853a443149 8e6b9d2013504734935372ed8690a4ec
+#: acaa974c2ebe4d2d9345413fc0f75f7a acd9a5e7fc9a4422a9941148ba47b9cc
+#: ae8e497e6ea24b579399fcb0a0c3859f c7532543a7284856bf187256b3bfd632
+#: da3744a0015149a29872277c2e34e03d f885d3562a864b51920c38162f6cf57a
+msgid "Default"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:632
+#: 2a3f347427f24ef6b95fe193fc477c0b
+msgid "31.40"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:634
+#: e9e96b0a5a714b91afd15a31f559540d
+msgid "16.47"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:636
+#: 03142321ea94413f8679595fcf312065
+msgid "Setting 2"
+msgstr "[设定2]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:638
+#: 5d670ffb973a4d2d8fa07b5a1db7abfb
+msgid "44.30"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:640
+#: a6072ac996434f329fa02e5924a24d16
+msgid "29.90"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:642
+#: fb43ba1f4171450a9d4a80b5b1d2197c
+msgid "29.37"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:644
+#: 354826fb7e994793b018b30cae6273d5
+msgid "13.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:646
+#: d008144e052447cda1a5458108d14911
+msgid "Setting 3"
+msgstr "[设定3]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:648
+#: 8a59bfabdb0249d28dc0927721fc8f2d
+msgid "40.67"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:650
+#: a1b9350ae6124c219cf53a4033cfc076
+msgid "30.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:652
+#: c01c56e86a774ad6bb98efa013eb5608
+msgid "27.20"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:654
+#: f2e1e743b27140ac97279ae7dac85c51
+msgid "38.10"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:656
+#: 968a0db4bc144188b542754060c62e6b
+msgid "36.63"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:658
+#: e91d009de1024c58817534bbef9bc008
+msgid "27.53"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:660
+#: dce88b5c6a4c4908a977f3fb6b5cebfe
+msgid "23.32"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:662
+#: 0dcb709babd9477f80f9c479da123f0e
+msgid "30.98"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:664
+#: c61e42c38b784a24a9c4e2d2625b9882
+msgid "30.02"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
+#: d2f8a32da9a448f0a0c43828263f7879
+msgid "20.74"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:666
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
+#: 3b5a1c59742843b6baab195dbbc9a210 62b65d0e63fa4eb48925a3b02e55ded5
+#: ef9e6861ff4c4a3a9dcf130ea7b7c66d f74ef4dcd1c94cfd860ca19748730cef
+msgid "Setting 4"
+msgstr "[设定3]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:668
+#: 964f97e07ced4f18912e5375d4f32b46
+msgid "16.27"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:670
+#: 92ce2a8f3f794cc8a3fff6aea1a5c240
+msgid "19.84"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:672
+#: fc2de23e6c2f45e0a6e1f8f9fd69865f
+msgid "19.32"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
+#: 0b6fb2697d274581a22bc874082dc0b7
+msgid "12.68"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:674
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
+#: 15ed72087f634bf282c0fa88d276172a 3b515a0fb51f43aca07377c753098537
+#: 992fa4d8087e46a2a687787aa811e66e 9d280c5a856b48e1bc81b6703a64d6e9
+msgid "Setting 5"
+msgstr "[设定3]"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:676
+#: 58eea16300fb49e08c5f281f20a17746
+msgid "14.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:678
+#: 36fdacaae5cd45ecb1b719fb196ecc9a
+msgid "10.11"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:680
+#: 722cf7e2520545d5b08e722364b68971
+msgid "9.88"
+msgstr ""
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:684
+#: 0f9a1c93969748f2a48c18a69d5681f8
+msgid "[Setting 1]=(gpu_memory_utilization=0.98 max_model_len=4096 enforce_eager=True)"
+msgstr "[设定 1]=(gpu_memory_utilization=0.98 max_model_len=4096 enforce_eager=True)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:685
+#: 6d5bc59b7e70456f9c2ab2e85599dd1d
+msgid "[Setting 2]=(gpu_memory_utilization=1.0 max_model_len=4096 enforce_eager=True)"
+msgstr "[设定 2]=(gpu_memory_utilization=1.0 max_model_len=4096 enforce_eager=True)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:686
+#: 9f881e4c985043dc85e6d8af993fdc38
+msgid "[Setting 3]=(gpu_memory_utilization=1.0 max_model_len=8192 enforce_eager=True)"
+msgstr "[设定 3]=(gpu_memory_utilization=1.0 max_model_len=8192 enforce_eager=True)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:687
+#: 91e2784f053349cd89718c9da941ddb6
+msgid "[Setting 4]=(gpu_memory_utilization=0.9 max_model_len=65536 enforce_eager=False)"
+msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
+#: ../../Qwen/source/getting_started/speed_benchmark.rst:688
+#: 17c8714ac88d4b7e97bf99a3fcf8f8fb
+msgid "[Setting 5]=(gpu_memory_utilization=0.9 max_model_len=131072 enforce_eager=False)"
+msgstr "[默认设定]=(gpu_memory_utilization=0.9 max_model_len=32768 enforce_eager=False)"
--- a/docs/locales/zh_CN/LC_MESSAGES/index.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/index.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/index.rst:34
+msgid "Getting Started"
+msgstr "快速开始"
+#: ../../Qwen/source/index.rst:44
+msgid "Inference"
+msgstr "推理"
+#: ../../Qwen/source/index.rst:51
+msgid "Run Locally"
+msgstr "本地运行"
+#: ../../Qwen/source/index.rst:60
+msgid "Deployment"
+msgstr "部署"
+#: ../../Qwen/source/index.rst:71
+msgid "Quantization"
+msgstr "量化"
+#: ../../Qwen/source/index.rst:80
+msgid "Training"
+msgstr "训练"
+#: ../../Qwen/source/index.rst:87
+msgid "Framework"
+msgstr "框架"
+#: ../../Qwen/source/index.rst:2 6e52d3a497924f828d4c6b9dd59370d5
+msgid "Welcome to Qwen!"
+msgstr "欢迎来到Qwen"
+#: ../../Qwen/source/index.rst:4 235805a6d4a34184821c0f4f81020ef1
+msgid "Qwen3"
+msgstr ""
+#: ../../Qwen/source/index.rst:11 b8a3aa3f31594232959a08d89e9dc7db
+msgid "Qwen is the large language model and large multimodal model series of the Qwen Team, Alibaba Group. Both language models and multimodal models are pretrained on large-scale multilingual and multimodal data and post-trained on quality data for aligning to human preferences. Qwen is capable of natural language understanding, text generation, vision understanding, audio understanding, tool use, role play, playing as AI agent, etc."
+msgstr "Qwen是阿里巴巴集团Qwen团队研发的大语言模型和大型多模态模型系列。无论是语言模型还是多模态模型，均在大规模多语言和多模态数据上进行预训练，并通过高质量数据进行后期微调以贴近人类偏好。Qwen具备自然语言理解、文本生成、视觉理解、音频理解、工具使用、角色扮演、作为AI Agent进行互动等多种能力。"
+#: ../../Qwen/source/index.rst:14 8735c67355064a97b2793b721a701b21
+msgid "The latest version, Qwen3, has the following features:"
+msgstr "最新版本Qwen3有以下特点："
+#: ../../Qwen/source/index.rst:16 1956d75084244379aad9503fcc572f00
+msgid "**Dense and Mixture-of-Experts (MoE) models**, available in 0.6B, 1.7B, 4B, 8B, 14B, 32B and 30B-A3B, 235B-A22B."
+msgstr "**全尺寸稠密与混合专家模型**：0.6B, 1.7B, 4B, 8B, 14B, 32B and 30B-A3B, 235B-A22B"
+#: ../../Qwen/source/index.rst:17 1fdf12161cd14663b67b2c08f9219ddb
+msgid "**Seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose chat) **within a single model**, ensuring optimal performance across various scenarios."
+msgstr "支持在**思考模式**（用于复杂逻辑推理、数学和编码）和 **非思考模式** （用于高效通用对话）之间**无缝切换**，确保在各种场景下的最佳性能。"
+#: ../../Qwen/source/index.rst:18 189ff2a03ad249ef88202c34e9f8aa86
+msgid "**Significantly enhancement in reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning."
+msgstr "**显著增强的推理能力**，在数学、代码生成和常识逻辑推理方面超越了之前的 QwQ（在思考模式下）和 Qwen2.5 指令模型（在非思考模式下）。"
+#: ../../Qwen/source/index.rst:19 64ebcda0381148cb8edf8d92b49469ea
+msgid "**Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience."
+msgstr "**卓越的人类偏好对齐**，在创意写作、角色扮演、多轮对话和指令跟随方面表现出色，提供更自然、更吸引人和更具沉浸感的对话体验。"
+#: ../../Qwen/source/index.rst:20 ec0ebb91f1ed491f8672aefef6307d85
+msgid "**Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks."
+msgstr "**擅长智能体能力**，可以在思考和非思考模式下精确集成外部工具，在复杂的基于代理的任务中在开源模型中表现领先。"
+#: ../../Qwen/source/index.rst:21 526b161edf284e1b913aabc7e7fcc77c
+msgid "**Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**."
+msgstr "**支持 100 多种语言和方言**，具有强大的多语言理解、推理、指令跟随和生成能力。"
+#: ../../Qwen/source/index.rst:23 79ed3f0e7da043bb8b53f510ed244814
+msgid "For more information, please visit our:"
+msgstr "想了解更多信息，欢迎访问："
+#: ../../Qwen/source/index.rst:25 b2e579ae57de4d2985ab1c350fdf2458
+msgid "`Blog <https://qwenlm.github.io/>`__"
+msgstr "`博客 <https://qwenlm.github.io/>`__"
+#: ../../Qwen/source/index.rst:26 406389fe90064e879bd28665a021ee7e
+msgid "`GitHub <https://github.com/QwenLM>`__"
+msgstr "`GitHub <https://github.com/QwenLM>`__"
+#: ../../Qwen/source/index.rst:27 714c64df6aed4e608571de0155199fef
+msgid "`Hugging Face <https://huggingface.co/Qwen>`__"
+msgstr "`Hugging Face <https://huggingface.co/Qwen>`__"
+#: ../../Qwen/source/index.rst:28 214e12e0b1c04b268582b2c46d22334d
+msgid "`ModelScope <https://modelscope.cn/organization/qwen>`__"
+msgstr "`ModelScope <https://modelscope.cn/organization/qwen>`__"
+#: ../../Qwen/source/index.rst:29 9c64e461dc3a440ab92d94887fe3d2d8
+msgid "`Qwen3 Collection <https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f>`__"
+msgstr ""
+#: ../../Qwen/source/index.rst:31 c6056edc8a3a4a12bd3a75eeb210f7a2
+msgid "Join our community by joining our `Discord <https://discord.gg/yPEP2vHTu4>`__ and `WeChat <https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png>`__ group. We are looking forward to seeing you there!"
+msgstr "加入社区，加入 `Discord <https://discord.gg/yPEP2vHTu4>`__ 和 `微信群 <https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png>`__ 。很期待见到你们！"
+#~ msgid "Web UI"
+#~ msgstr "Web UI"
+#~ msgid "Benchmark"
+#~ msgstr "评测"
+#~ msgid "Qwen2.5"
+#~ msgstr ""
+#~ msgid "Dense, easy-to-use, decoder-only language models, available in **0.5B**, **1.5B**, **3B**, **7B**, **14B**, **32B**, and **72B** sizes, and base and instruct variants."
+#~ msgstr "易于使用的仅解码器稠密语言模型，提供 **0.5B** 、**1.5B** 、**3B** 、**7B** 、**14B** 、**32B** 和 **72B** 共7种参数规模的模型，并且有基模型和指令微调模型两种变体（其中“ B ”表示“十亿”， 72B 即为 720 亿）"
+#~ msgid "Pretrained on our latest large-scale dataset, encompassing up to **18T** tokens."
+#~ msgstr "利用我们最新的数据集进行预训练，包含多达 18T tokens （其中“ T ”表示“万亿”， 18T 即为 18 万亿）"
+#~ msgid "Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON."
+#~ msgstr "在遵循指令、生成长文本（超过 8K tokens ）、理解结构化数据（例如，表格）以及生成结构化输出特别是 JSON 方面有了显著改进"
+#~ msgid "More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots."
+#~ msgstr "更加适应多样化的系统提示，增强了角色扮演的实现和聊天机器人的背景设置。"
+#~ msgid "Context length support up to **128K** tokens and can generate up to **8K** tokens."
+#~ msgstr "支持最多达 **128K** tokens 的上下文长度，并能生成多达 **8K** tokens 的文本。"
+#~ msgid "Multilingual support for over **29** languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more."
+#~ msgstr "支持超过 **29** 种语言，包括中文、英文、法文、西班牙文、葡萄牙文、德文、意大利文、俄文、日文、韩文、越南文、泰文、阿拉伯文等。"
+#~ msgid "`Qwen2.5 Collection <https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e>`__"
+#~ msgstr ""
--- a/docs/locales/zh_CN/LC_MESSAGES/inference/transformers.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/inference/transformers.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/inference/transformers.md:1
+#: 0614c94c5d284106b6157f7b89fa087f
+msgid "Transformers"
+msgstr ""
+#: ../../Qwen/source/inference/transformers.md:3
+#: d3760c125a4049b9848d4c98d60104f8
+msgid "Transformers is a library of pretrained natural language processing for inference and training.  Developers can use Transformers to train models on their data, build inference applications, and generate texts with large language models."
+msgstr "Transformers 是一个用于推理和训练的预训练自然语言处理库。开发者可以使用 Transformers 在自己的数据上训练模型、构建推理应用，并通过大型语言模型生成文本。"
+#: ../../Qwen/source/inference/transformers.md:6
+#: 795ca76ad33c4cf89341343edc36115f
+msgid "Environment Setup"
+msgstr "环境配置"
+#: ../../Qwen/source/inference/transformers.md:8
+#: 1331055ed63c4702b1c5a300dab74b09
+msgid "`transformers>=4.51.0`"
+msgstr ""
+#: ../../Qwen/source/inference/transformers.md:9
+#: 5167e357b63a4abc814e861f0f87adf0
+msgid "`torch>=2.6` is recommended"
+msgstr "推荐使用 `torch>=2.6`"
+#: ../../Qwen/source/inference/transformers.md:10
+#: 0be0ab63e365422c8d48cc1143704782
+msgid "GPU is recommended"
+msgstr "推荐使用 GPU"
+#: ../../Qwen/source/inference/transformers.md:13
+#: 0567a5b164704d0eb72c23e3c0525131
+msgid "Basic Usage"
+msgstr "基本用法"
+#: ../../Qwen/source/inference/transformers.md:15
+#: 823777aaf3b149c9bc2dbe19fb70cf06
+msgid "You can use the `pipeline()` interface or the `generate()` interface to generate texts with Qwen3 in transformers."
+msgstr "您可以使用 `pipeline()` 接口或 `generate()` 接口在 transformers 中通过 Qwen3 生成文本。"
+#: ../../Qwen/source/inference/transformers.md:17
+#: 0a59ebb37456410c82118211759d17d9
+msgid "In general, the pipeline interface requires less boilerplate code, which is shown here. The following shows a basic example using pipeline for mult-iturn conversations:"
+msgstr "通常，pipeline 接口需要的样板代码更少，如下所示。以下展示了一个使用 pipeline 进行多轮对话的基本示例："
+#: ../../Qwen/source/inference/transformers.md:44
+#: 970a2d7d3090478580bf41085235ec13
+msgid "There are some important parameters creating the pipeline:"
+msgstr "创建 pipeline 时有一些重要的参数："
+#: ../../Qwen/source/inference/transformers.md:45
+#: 7a6748475636475c83a23e0f2649d145
+msgid "**Model**: `model_name_or_path` could be a model ID like `Qwen/Qwen3-8B` or a local path."
+msgstr "**模型**：`model_name_or_path` 可以是像 `Qwen/Qwen3-8B` 这样的模型 ID，也可以是本地路径。"
+#: ../../Qwen/source/inference/transformers.md:47
+#: 5836c86d2c9e46f1a29d8a827bae7266
+msgid "To download model files to a local directory, you could use"
+msgstr "要将模型文件下载到本地目录，可以使用"
+#: ../../Qwen/source/inference/transformers.md:51
+#: ee52b7ed97df4d3d8eee53879369a8a0
+msgid "You can also download model files using ModelScope if you are in mainland China"
+msgstr "如果您在中国大陆，还可以使用 ModelScope 下载模型文件"
+#: ../../Qwen/source/inference/transformers.md:55
+#: 0fe0e5f3d5d044f584d89008a0f89b0e
+msgid "**Device Placement**: `device_map=\"auto\"` will load the model parameters to multiple devices automatically, if available.  It relies on the `accelerate` pacakge. If you would like to use a single device, you can pass `device` instead of device_map. `device=-1` or `device=\"cpu\"` indicates using CPU, `device=\"cuda\"` indicates using the current GPU, and `device=\"cuda:1\"` or `device=1` indicates using the second GPU. Do not use `device_map` and `device` at the same time!"
+msgstr "**设备分配**：如果可用，`device_map=\"auto\"` 将自动将模型参数加载到多个设备上。它依赖于 `accelerate` 包。如果您想使用单个设备，可以传递 `device` 而不是 `device_map`。`device=-1` 或 `device=\"cpu\"` 表示使用 CPU，`device=\"cuda\"` 表示使用当前 GPU，`device=\"cuda:1\"` 或 `device=1` 表示使用第二个 GPU。不要同时使用 `device_map` 和 `device`！"
+#: ../../Qwen/source/inference/transformers.md:60
+#: b304ed147f944a47b89bb27bc4b6142f
+msgid "**Compute Precision**: `torch_dtype=\"auto\"` will determine automatically the data type to use based on the original precision of the checkpoint and the precision your device supports. For modern devices, the precision determined will be `bfloat16`."
+msgstr "**计算精度**：`torch_dtype=\"auto\"` 将根据检查点的原始精度和设备支持的精度自动确定要使用的数据类型。对于现代设备，确定的精度将是 `bfloat16`。"
+#: ../../Qwen/source/inference/transformers.md:63
+#: 13295d55b1dd493887c9f0aab35ebfcd
+msgid "If you don't pass `torch_dtype=\"auto\"`, the default data type is `float32`, which will take double the memory and be slower in computation."
+msgstr "如果您不传递 `torch_dtype=\"auto\"`，默认数据类型为 `float32`，这将占用两倍的内存并且计算速度较慢。"
+#: ../../Qwen/source/inference/transformers.md:66
+#: 280ff00b4ccd4e59be8d798cd7d4377d
+msgid "Calls to the text generation pipleine will use the generation configuration from the model file, e.g., `generation_config.json`.  Those configuration could be overridden by passing arguments directly to the call. The default is equivalent to"
+msgstr "调用文本生成 pipeline 时，将使用模型文件中的生成配置，例如 `generation_config.json`。这些配置可以通过直接向调用传递参数来覆盖。默认配置等效于"
+#: ../../Qwen/source/inference/transformers.md:73
+#: f3cd39b62e8848dd8493086915577cd2
+msgid "For the best practices in configuring generation parameters, please see the model card."
+msgstr "有关配置生成参数的最佳实践，请参阅模型卡片。"
+#: ../../Qwen/source/inference/transformers.md:75
+#: c69916ddab134eadbb509748b73bb515
+msgid "Thinking & Non-Thinking Mode"
+msgstr "思考与非思考模式"
+#: ../../Qwen/source/inference/transformers.md:77
+#: e23485edb6654b588965a22a20332dce
+msgid "By default, Qwen3 model will think before response. It is also true for the `pipeline()` interface. To switch between thinking and non-thinking mode, two methods can be used"
+msgstr "默认情况下，Qwen3 模型会在回复前进行思考，`pipeline()` 接口也是如此。要切换思考与非思考模式，可以使用以下两种方法："
+#: ../../Qwen/source/inference/transformers.md:80
+#: 3597031da80f43f5883b499eb42e153f
+msgid "Append a final assistant message, containing only `<think>\\n\\n</think>\\n\\n`.  This method is stateless, meaning it will only work for that single turn.  It will also strictly prevented the model from generating thinking content. For example,"
+msgstr "追加一条仅包含 `<think>\\n\\n</think>\\n\\n` 的最终助手 (assistant) 消息。此方法是无状态的，意味着它仅对当前轮对话生效，并且会严格阻止模型生成思考内容。例如："
+#: ../../Qwen/source/inference/transformers.md:97
+#: 5c4b107221d14749a0c61541068be791
+msgid "Add to the user (or the system) message, `/no_think` to disable thinking and `/think` to enable thinking. This method is stateful, meaning the model will follow the most recent instruction in multi-turn conversations. You can also use instructions in natural language."
+msgstr "在用户 (user) 或系统 (system) 消息中添加 `/no_think` 以禁用思考、添加 `/think` 以启用思考。此方法是有状态的，意味着在多轮对话中，模型将遵循最近的指令。您还可以使用自然语言指令。"
+#: ../../Qwen/source/inference/transformers.md:113
+#: c93fb1cf4f5849128d0fe52f86a74aca
+msgid "Parsing Thinking Content"
+msgstr "解析思考内容"
+#: ../../Qwen/source/inference/transformers.md:115
+#: 32af8e0e86e646d1900ddb45b6b576e9
+msgid "If you would like a more structured assistant message format, you can use the following function to extract the thinking content into a field named `reasoning_content` which is similar to the format used by vLLM, SGLang, etc."
+msgstr "如果您希望获得更结构化的助手消息格式，可以使用以下函数将思考内容提取到名为 `reasoning_content` 的字段中，该字段的格式类似于 vLLM、SGLang 等使用的格式。"
+#: ../../Qwen/source/inference/transformers.md:131
+#: ae7a948b483c4e3d973301698cfc82e6
+msgid "Parsing Tool Calls"
+msgstr "解析工具调用"
+#: ../../Qwen/source/inference/transformers.md:133
+#: 9798bad1fc1d49e28c8576a67ea59044
+msgid "For tool calling with Transformers, please refer to [our guide on Function Calling](../framework/function_call.md#hugging-face-transformers)."
+msgstr "有关使用 Transformers 进行工具调用的信息，请参阅[函数调用指南](../framework/function_call.md#hugging-face-transformers)。"
+#: ../../Qwen/source/inference/transformers.md:135
+#: 555f509828dc4e5ab201530f04168b44
+msgid "Serving Quantized models"
+msgstr "使用量化模型"
+#: ../../Qwen/source/inference/transformers.md:137
+#: bb57ca974bda4158b1c3efe90b72d557
+msgid "Qwen3 comes with two types of pre-quantized models, FP8 and AWQ. The command serving those models are the same as the original models except for the name change:"
+msgstr "Qwen3 提供了两种类型的预量化模型：FP8 和 AWQ。使用这些模型的命令与原始模型相同，只是名称有所更改："
+#: ../../Qwen/source/inference/transformers.md:155
+#: 091c7fd44b804573be565b59f4498a5d
+msgid "FP8 computation is supported on NVIDIA GPUs with compute capability > 8.9, that is, Ada Lovelace, Hopper, and later GPUs."
+msgstr "FP8 计算在计算能力 > 8.9 的 NVIDIA GPU 上受支持，即 Ada Lovelace、Hopper 及更新的 GPU。"
+#: ../../Qwen/source/inference/transformers.md:157
+#: 468b4de54b0c436086877aeb58894345
+msgid "For better performance, make sure `triton` and a CUDA compiler compatible with the CUDA version of `torch` in your environment are installed."
+msgstr "为了获得更好的性能，请确保安装了 `triton` 和与环境中 `torch` 的 CUDA 版本兼容的 CUDA 编译器。"
+#: ../../Qwen/source/inference/transformers.md:161
+#: 4c26b8d556764c28813e27713fa14962
+msgid "As of 4.51.0, there are issues with Tranformers when running those checkpoints **across GPUs**. The following method could be used to work around those issues:"
+msgstr "在 4.51.0 版本中，在**跨 GPU**的情况下运行 FP8 存在一些与 Transformers 相关的问题。可以使用以下方法来解决这些问题："
+#: ../../Qwen/source/inference/transformers.md:163
+#: 67ef3f3b2a6c461aaa5a40d4867e8f7b
+msgid "Set the environmnt variable `CUDA_LAUNCH_BLOCKING=1` before running the script; or"
+msgstr "在运行脚本之前设置环境变量 `CUDA_LAUNCH_BLOCKING=1`；或者"
+#: ../../Qwen/source/inference/transformers.md:164
+#: 25d447166ecb4e069a7cdc3f4c2844d8
+msgid "Uncomment [this line](https://github.com/huggingface/transformers/blob/0720e206c6ba28887e4d60ef60a6a089f6c1cc76/src/transformers/integrations/finegrained_fp8.py#L340) in your local installation of `transformers`."
+msgstr "取消注释您本地安装的 `transformers` 中的[这一行](https://github.com/huggingface/transformers/blob/0720e206c6ba28887e4d60ef60a6a089f6c1cc76/src/transformers/integrations/finegrained_fp8.py#L340)。"
+#: ../../Qwen/source/inference/transformers.md:169
+#: 4405b99bd1a941e2b7243a860e65ed1f
+msgid "Enabling Long Context"
+msgstr "启用长上下文"
+#: ../../Qwen/source/inference/transformers.md:171
+#: 5b50bba7f4eb48f6a0e3f408ddc476ea
+msgid "The maximum context length in pre-training for Qwen3 models is 32,768 tokens. It can be extended to 131,072 tokens with RoPE scaling techniques. We have validated the performance with YaRN."
+msgstr "Qwen3 模型在预训练中的最大上下文长度为 32,768 个 token。通过 RoPE 缩放技术，它可以扩展到 131,072 个 token。我们已使用 YaRN 验证了性能。"
+#: ../../Qwen/source/inference/transformers.md:175
+#: 32af7906e1ae44f282e22f3eb656bc47
+msgid "Transformers supports YaRN, which can be enabled either by modifying the model files or overriding the default arguments when loading the model."
+msgstr "Transformers 支持 YaRN，可以通过修改模型文件或在加载模型时覆盖默认参数来启用。"
+#: ../../Qwen/source/inference/transformers.md:177
+#: 4d80b834eb444b65885583b6a0db6a93
+msgid "Modifying the model files: In the config.json file, add the rope_scaling fields:"
+msgstr "修改模型文件：在 config.json 文件中，添加 rope_scaling 字段："
+#: ../../Qwen/source/inference/transformers.md:188
+#: 432eb2b9ed054ea3b5a6597ffc207b53
+msgid "Overriding the default arguments:"
+msgstr "覆盖默认参数："
+#: ../../Qwen/source/inference/transformers.md:210
+#: 9d7651802c2344fa987f709a7ac87c08
+msgid "Transformers implements static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.** We advise adding the `rope_scaling` configuration only when processing long contexts is required.  It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 65,536 tokens, it would be better to set `factor` as 2.0."
+msgstr "Transformers 实现了静态 YaRN，这意味着无论输入长度如何，缩放因子保持不变，**这可能会对较短文本的性能产生影响。** 我们建议仅在需要处理长上下文时添加 `rope_scaling` 配置。还建议根据需要修改 `factor`。例如，如果您的应用程序的典型上下文长度为 65,536 个 token，则最好将 `factor` 设置为 2.0。"
+#: ../../Qwen/source/inference/transformers.md:216
+#: dcc2913c11534349a3aa28988055044c
+msgid "Streaming Generation"
+msgstr "流式输出"
+#: ../../Qwen/source/inference/transformers.md:218
+#: 23238ec6181e498fa754f6d3ea363f52
+msgid "With the help of `TextStreamer`, you can modify your chatting with Qwen3 to streaming mode.  It will print the response as being generated to the console or the terminal."
+msgstr "借助 `TextStreamer` ，您可以将与 Qwen3 的对话切换到流式传输模式。下面是一个关于如何使用它的示例："
+#: ../../Qwen/source/inference/transformers.md:238
+#: 4a6ba39c851b4296907f624a38766570
+msgid "Besides using `TextStreamer`, we can also use `TextIteratorStreamer` which stores print-ready text in a queue, to be used by a downstream application as an iterator:"
+msgstr "除了使用 `TextStreamer` 之外，我们还可以使用 `TextIteratorStreamer` ，它将可打印的文本存储在一个队列中，以便下游应用程序作为迭代器来使用："
+#: ../../Qwen/source/inference/transformers.md:267
+#: 4c6797668ec84bf4866c6e3be2350e82
+msgid "Batch Generation"
+msgstr "批处理"
+#: ../../Qwen/source/inference/transformers.md:270
+#: 132f318ada71477192c48cefc7eeb229
+msgid "Batching is not automatically a win for performance."
+msgstr "批处理不总能提速。"
+#: ../../Qwen/source/inference/transformers.md:296
+#: 3371e43683aa4c6284e08193a40110c4
+msgid "FAQ"
+msgstr "常见问题解答"
+#: ../../Qwen/source/inference/transformers.md:298
+#: 5b68ed1d7a1e48ef9b0943de038e7ebc
+msgid "You may find distributed inference with Transformers is not as fast as you would imagine. Transformers with `device_map=\"auto\"` does not apply tensor parallelism and it only uses one GPU at a time. For Transformers with tensor parallelism, please refer to [its documentation](https://huggingface.co/docs/transformers/v4.51.3/en/perf_infer_gpu_multi)."
+msgstr "您可能会发现使用 Transformers 进行分布式推理的速度不如预期。Transformers 使用 `device_map=\"auto\"` 时并未应用张量并行 (Tensor Parallelism)，且一次仅使用一个 GPU。如需支持张量并行的 Transformers，请参阅[其文档](https://huggingface.co/docs/transformers/v4.51.3/en/perf_infer_gpu_multi)。"
+#~ msgid "The most significant but also the simplest usage of Qwen2.5 is to chat with it using the `transformers` library.  In this document, we show how to chat with `Qwen2.5-7B-Instruct`, in either streaming mode or not."
+#~ msgstr "使用 Qwen2.5 最简单的方法就是利用 `transformers` 库与之对话。在本文档中，我们将展示如何在流式模式或非流式模式下与 Qwen2.5-7B-Instruct 进行对话。"
+#~ msgid "Select the interface you would like to use:"
+#~ msgstr "选择编程接口"
+#~ msgid "Manual"
+#~ msgstr "手动"
+#~ msgid "Using `AutoTokenizer` and `AutoModelForCausalLM`."
+#~ msgstr "使用 `AutoTokenzier` 和 `AutoModelForCausalLM`。"
+#~ msgid "Pipeline"
+#~ msgstr "流水线"
+#~ msgid "Using `pipeline`."
+#~ msgstr "使用 `pipeline`。"
+#~ msgid "You can just write several lines of code with `transformers` to chat with Qwen2.5-Instruct.  Essentially, we build the tokenizer and the model with `from_pretrained` method, and we use `generate` method to perform chatting with the help of chat template provided by the tokenizer. Below is an example of how to chat with Qwen2.5-7B-Instruct:"
+#~ msgstr "你只需借助 `transformers` 库编写几行代码，就能与 Qwen2.5-Instruct 进行对话。实质上，我们通过 `from_pretrained` 方法构建 tokenizer 和模型，然后利用 `generate` 方法，在 tokenizer 提供的对话模板 (Chat Template) 的辅助下进行对话。以下是一个如何与 Qwen2.5-7B-Instruct 进行对话的示例："
+#~ msgid "To continue the chat, simply append the response to the messages with the role assistant and repeat the procedure. The following shows and example:"
+#~ msgstr "如要继续对话，只需将回复内容以 assistant 为 role 加入 messages ，然后重复以上流程即可。下面为示例："
+#~ msgid "Note that the previous method in the original Qwen repo `chat()` is now replaced by `generate()`.  The `apply_chat_template()` function is used to convert the messages into a format that the model can understand.  The `add_generation_prompt` argument is used to add a generation prompt, which refers to `<|im_start|>assistant\\n` to the input. Notably, we apply ChatML template for chat models following our previous practice.  The `max_new_tokens` argument is used to set the maximum length of the response.  The `tokenizer.batch_decode()` function is used to decode the response.  In terms of the input, the above `messages` is an example to show how to format your dialog history and system prompt.  By default, if you do not specify system prompt, we directly use `You are Qwen, created by Alibaba Cloud. You are a helpful assistant.`."
+#~ msgstr "请注意，原 Qwen 仓库中的旧方法 `chat()` 现在已被 `generate()` 方法替代。这里使用了 `apply_chat_template()` 函数将消息转换为模型能够理解的格式。其中的 `add_generation_prompt` 参数用于在输入中添加生成提示，该提示指向 `<|im_start|>assistant\\n` 。尤其需要注意的是，我们遵循先前实践，对 chat 模型应用 ChatML 模板。而 `max_new_tokens` 参数则用于设置响应的最大长度。此外，通过 `tokenizer.batch_decode()` 函数对响应进行解码。关于输入部分，上述的 `messages` 是一个示例，展示了如何格式化对话历史记录和系统提示。默认情况下，如果您没有指定系统提示，我们将直接使用 `You are Qwen, created by Alibaba Cloud. You are a helpful assistant.` 作为系统提示。"
+#~ msgid "`transformers` provides a functionality called \"pipeline\" that encapsulates the many operations in common tasks. You can chat with the model in just 4 lines of code:"
+#~ msgstr "`transformers` 同时提供了“流水线” (\"pipeline\") 功能，封装了常用任务的处理流程，仅用4行代码即可开启对话："
+#~ msgid "To continue the chat, simply append the response to the messages with the role assistant and repeat the procedure.  The following shows and example:"
+#~ msgstr "如要继续对话，只需将回复内容以 assistant 为 role 加入 messages ，然后重复以上流程即可。下面为示例："
+#~ msgid "Batching"
+#~ msgstr "批处理"
+#~ msgid "All common `transformers` methods support batched input and output. For basic usage, the following is an example:"
+#~ msgstr "`transformers` 常用方法均支持批处理。以下为基本用法的示例："
+#~ msgid "With pipeline, it is simpler:"
+#~ msgstr "使用流水线功能，实现批处理代码更简单："
+#~ msgid "Using Flash Attention 2 to Accelerate Generation"
+#~ msgstr "使用 Flash Attention 2 加速生成"
+#~ msgid "With the latest `transformers` and `torch`, Flash Attention 2 will be applied by default if applicable.[^fa2] You do not need to request the use of Flash Attention 2 in `transformers` or install the `flash_attn` package. The following is intended for users that cannot use the latest versions for various reasons."
+#~ msgstr "如果您使用最新版本的 `transformers` 和 `torch` ， Flash Attention 2 将在适用时自动应用。[^fa2] 无需指定使用 `transformers` 中的 Flash Attention 2 或安装 `falsh_attn` 包。下面的说明是为无法使用最新版的用户补充的。"
+#~ msgid "If you would like to apply Flash Attention 2, you need to install an appropriate version of `flash_attn`. You can find pre-built wheels at [its GitHub repository](https://github.com/Dao-AILab/flash-attention/releases), and you should make sure the Python version, the torch version, and the CUDA version of torch are a match. Otherwise, you need to install from source. Please follow the guides at [its GitHub README](https://github.com/Dao-AILab/flash-attention)."
+#~ msgstr "如果你希望使用 Flash Attention 2 ， 你需要安装 `flash_attn` 。 你可以在其 [GitHub 存储库](https://github.com/Dao-AILab/flash-attention/releases) 找到预编译好的版本。注意选择与 Python 、 torch 和 torch 中 CUDA 版本对应的预编译版本。如无对应，你需要从源代码安装编译，请参考其 [GitHub README](https://github.com/Dao-AILab/flash-attention) 。"
+#~ msgid "After a successful installation, you can load the model as shown below:"
+#~ msgstr "成功安装 Flash Attention 2 后，你可以用下面这种方式读取模型："
+#~ msgid "Troubleshooting"
+#~ msgstr "问题排查"
+#~ msgid "Loading models takes a lot of memory"
+#~ msgstr "模型加载使用大量显存"
+#~ msgid "Normally, memory usage after loading the model can be roughly taken as twice the parameter count. For example, a 7B model will take 14GB memory to load. It is because for large language models, the compute dtype is often 16-bit floating point number. Of course, you will need more memory in inference to store the activations."
+#~ msgstr "一般而言，模型加载所需显存可以按参数量乘二计算，例如，7B 模型需要 14GB 显存加载，其原因在于，对于大语言模型，计算所用数据类型为16位浮点数。当然，推理运行时还需要更多显存以记录激活状态。"
+#~ msgid "For `transformers`, `torch_dtype=\"auto\"` is recommended and the model will be loaded in `bfloat16` automatically. Otherwise, the model will be loaded in `float32` and it will need double memory. You can also pass `torch.bfloat16` or `torch.float16` as `torch_dtype` explicitly."
+#~ msgstr "对于 `transformers` ，推荐加载时使用 `torch_dtype=\"auto\"` ，这样模型将以 `bfloat16` 数据类型加载。否则，默认会以 `float32` 数据类型加载，所需显存将翻倍。也可以显式传入 `torch.bfloat16` 或 `torch.float16` 作为 `torch_dtype` 。"
+#~ msgid "Multi-GPU inference is slow"
+#~ msgstr "多卡推理缓慢"
+#~ msgid "`transformers` relies on `accelerate` for multi-GPU inference and the implementation is a kind of naive model parallelism: different GPUs computes different layers of the model. It is enabled by the use of `device_map=\"auto\"` or a customized `device_map` for multiple GPUs."
+#~ msgstr "`transformers` 依赖 `accelerate` 支持多卡推理，其实现为一种简单的模型并行策略：不同的卡计算模型的不同层，分配策略由 `device_map=\"auto\"` 或自定义的 `device_map` 指定。"
+#~ msgid "However, this kind of implementation is not efficient as for a single request, only one GPU computes at the same time and the other GPUs just wait. To use all the GPUs, you need to arrange multiple sequences as on a pipeline, making sure each GPU has some work to do. However, that will require concurrency management and load balancing, which is out of the scope of `transformers`. Even if all things are implemented, you can make use of concurrency to improve the total throughput but the latency for each request is not great."
+#~ msgstr "然而，这种实现方式并不高效，因为对于单一请求而言，同时只有单个 GPU 在进行计算而其他 GPU 则处于等待状态。为了充分利用所有的 GPU ，你需要像流水线一样安排多个处理序列，确保每个 GPU 都有一定的工作负载。但是，这将需要进行并发管理和负载均衡，这些超出了 `transformers` 库的范畴。即便实现了所有这些功能，整体吞吐量可以通过提高并发提高，但每个请求的延迟并不会很理想。"
+#~ msgid "For Multi-GPU inference, we recommend using specialized inference framework, such as vLLM and TGI, which support tensor parallelism."
+#~ msgstr "对于多卡推理，建议使用专门的推理框架，如 vLLM 和 TGI，这些框架支持张量并行。"
+#~ msgid "`RuntimeError: CUDA error: device-side assert triggered`, `Assertion -sizes[i] <= index && index < sizes[i] && \"index out of bounds\" failed.`"
+#~ msgstr "`RuntimeError: CUDA error: device-side assert triggered`, `Assertion -sizes[i] <= index && index < sizes[i] && \"index out of bounds\" failed.`"
+#~ msgid "If it works with single GPU but not multiple GPUs, especially if there are PCI-E switches in your system, it could be related to drivers."
+#~ msgstr "如果在单个 GPU上 工作正常，但在多个 GPU 上无法工作，特别是如果你的系统中有 PCI-E switch，这可能与驱动程序有关。"
+#~ msgid "Try upgrading the GPU driver."
+#~ msgstr "尝试升级显卡驱动"
+#~ msgid "For data center GPUs (e.g., A800, H800, and L40s), please use the data center GPU drivers and upgrade to the latest subrelease, e.g., 535.104.05 to 535.183.01.  You can check the release note at <https://docs.nvidia.com/datacenter/tesla/index.html>, where the issues fixed and known issues are presented."
+#~ msgstr "对于数据中心 GPU （例如， A800 、 H800 和 L40 等），请使用数据中心 GPU 驱动程序并升级到最新子版本，例如从 535.104.05 升级至 535.183.01 。您可以在以下网址查看发布说明：<https://docs.nvidia.com/datacenter/tesla/index.html>，其中列出了已修复的问题和已知问题。"
+#~ msgid "For consumer GPUs (e.g., RTX 3090 and RTX 4090), their GPU drivers are released more frequently and focus more on gaming optimization.  There are online reports that 545.29.02 breaks `vllm` and `torch` but 545.29.06 works.  Their release notes are also less helpful in identifying the real issues.  However, in general, the advice is still upgrading the GPU driver."
+#~ msgstr "对于消费级 GPU （例如， RTX 3090 和 RTX 4090 ），它们的 GPU 驱动程序发布的频率更高，并且更侧重于游戏优化。网上有报告称 545.29.02 版本破坏了 `vllm` 和 `torch` 的运行，但 545.29.06 版本可以正常工作。它们的发布说明在识别实际问题方面帮助较小。然而，总体而言，建议仍然是升级 GPU 驱动程序。"
+#~ msgid "Try disabling P2P for process hang, but it has negative effect on speed."
+#~ msgstr "尝试禁用 P2P 以解决进程挂起的问题，但这会对速度产生负面影响。"
+#~ msgid "Next Step"
+#~ msgstr "下一步"
+#~ msgid "Now you can chat with Qwen2.5 in either streaming mode or not.  Continue to read the documentation and try to figure out more advanced usages of model inference!"
+#~ msgstr "现在，你可以选择流式模式或非流式模式与 Qwen2.5 进行对话。继续阅读文档，并尝试探索模型推理的更多高级用法！"
+#~ msgid "The attention module for a model in `transformers` typically has three variants: `sdpa`, `flash_attention_2`, and `eager`.    The first two are wrappers around related functions in the `torch` and the `flash_attn` packages.    It defaults to `sdpa` if available."
+#~ msgstr "`transformers` 中模型一般实现3种注意力模块： `sdpa` 、 `flash_attention_2` 和 `eager` 。前两种分别封装了 `torch` 和 `flash_attn` 中的相关实现。`transformers` 默认使用 `sdpa` 版本的注意力模块。"
+#~ msgid "In addition, `torch` has integrated three implementations for `sdpa`: `FLASH_ATTENTION` (indicating Flash Attention 2 since version 2.2), `EFFICIENT_ATTENTION` (Memory Efficient Attention), and `MATH`.    It attempts to automatically select the most optimal implementation based on the inputs.    You don't need to install extra packages to use them."
+#~ msgstr "同时， `torch` 包括3种 `sdpa` 实现： `FLASH_ATTENTION` （自 2.2 版本为 Flash Attention 2）、 `EFFICIENT_ATTENTION` (Memory Efficient Attention) 和 `MATH` 。 `torch` 根据输入自动选择最优的实现，你无需额外安装其它包或进行配置。"
+#~ msgid "Hence, if applicable, by default, `transformers` uses `sdpa` and `torch` selects `FLASH_ATTENTION`."
+#~ msgstr "因此，在默认情况下，如果适用， `transformers` 使用 `sdpa` 而 `torch` 会选择 `FLASH_ATTENTION` 。"
+#~ msgid "If you wish to explicitly select the implementations in `torch`, refer to [this tutorial](https://pytorch.org/tutorials/intermediate/scaled_dot_product_attention_tutorial.html)."
+#~ msgstr "如果你希望显式控制 `torch` 使用的 `sdpa` 实现，请参考 [本教程](https://pytorch.org/tutorials/intermediate/scaled_dot_product_attention_tutorial.html)。 "
--- a/docs/locales/zh_CN/LC_MESSAGES/quantization/awq.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/quantization/awq.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/quantization/awq.md:1 363514c3e24c4d2aa54832e85acf34ef
+msgid "AWQ"
+msgstr "AWQ"
+#: ../../Qwen/source/quantization/awq.md:4 36b5c0de1013499f9f1e41edf8fa28ca
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/quantization/awq.md:7 9d6a80a82b044628bc9c911785ac9160
+msgid "For quantized models, one of our recommendations is the usage of [AWQ](https://arxiv.org/abs/2306.00978) with [AutoAWQ](https://github.com/casper-hansen/AutoAWQ)."
+msgstr "对于量化模型，我们推荐使用 [AWQ](https://arxiv.org/abs/2306.00978) 结合 [AutoAWQ](https://github.com/casper-hansen/AutoAWQ) "
+#: ../../Qwen/source/quantization/awq.md:9 139542ed4b414cfb834b3fd81ea88d51
+msgid "**AWQ** refers to Activation-aware Weight Quantization, a hardware-friendly approach for LLM low-bit weight-only quantization."
+msgstr "**AWQ**即激活值感知的权重量化(Activation-aware Weight Quantization)，是一种针对LLM的低比特权重量化的硬件友好方法。"
+#: ../../Qwen/source/quantization/awq.md:11 9a2959bb9f984e36a299bc40abca9402
+msgid "**AutoAWQ** is an easy-to-use Python library for 4-bit quantized models.  AutoAWQ speeds up models by 3x and reduces memory requirements by 3x compared to FP16.  AutoAWQ implements the Activation-aware Weight Quantization (AWQ) algorithm for quantizing LLMs."
+msgstr "**AutoAWQ**是一个易于使用的工具包，用于4比特量化模型。相较于FP16，AutoAWQ能够将模型的运行速度提升3倍，并将内存需求降低至原来的三分之一。AutoAWQ实现了AWQ算法，可用于LLM的量化处理。"
+#: ../../Qwen/source/quantization/awq.md:15 4f9fcd93d1f44b48869224c0f4e8b76a
+msgid "In this document, we show you how to use the quantized model with Hugging Face `transformers` and also how to quantize your own model."
+msgstr "在本文档中，我们将向您展示如何在Hugging Face `transformers`框架下使用量化模型，以及如何对您自己的模型进行量化"
+#: ../../Qwen/source/quantization/awq.md:17 870ebc162f3749b48fe454df85aaaf4b
+msgid "Usage of AWQ Models with Hugging Face transformers"
+msgstr "在Hugging Face transformers中使用AWQ量化模型"
+#: ../../Qwen/source/quantization/awq.md:19 cc7bd785c7ac45a4980fbda683699e43
+msgid "Now, `transformers` has officially supported AutoAWQ, which means that you can directly use the quantized model with `transformers`.  The following is a very simple code snippet showing how to run `Qwen2.5-7B-Instruct-AWQ` with the quantized model:"
+msgstr "现在，`transformers`已经正式支持AutoAWQ，这意味着您可以直接在`transformers`中使用AWQ量化模型。以下是一个非常简单的代码片段，展示如何运行量化模型 `Qwen2.5-7B-Instruct-AWQ` ："
+#: ../../Qwen/source/quantization/awq.md:56 47826d51abf54ad8a89ef9b91127a700
+msgid "Usage of AWQ  Models with vLLM"
+msgstr "在vLLM中使用AWQ量化模型"
+#: ../../Qwen/source/quantization/awq.md:58 b7235ae8f8344dd4a3d2029bbe7a40fc
+msgid "vLLM has supported AWQ, which means that you can directly use our provided AWQ models or those quantized with `AutoAWQ` with vLLM. We recommend using the latest version of vLLM (`vllm>=0.6.1`) which brings performance improvements to AWQ models; otherwise, the performance might not be well-optimized."
+msgstr "vLLM已支持AWQ，您可以直接使用我们提供的AWQ量化模型或使用`AutoAWQ`量化的模型。我们建议使用最新版的vLLM (`vllm>=0.6.1`)，新版为AWQ量化模型提升了效率提；不然推理效率可能并为被良好优化（即效率可能较非量化模型低）。"
+#: ../../Qwen/source/quantization/awq.md:61 940ce8fdb5da442b99af2bc1739911c6
+msgid "Actually, the usage is the same with the basic usage of vLLM.  We provide a simple example of how to launch OpenAI-API compatible API with vLLM and `Qwen2.5-7B-Instruct-AWQ`:"
+msgstr "实际上，使用AWQ模型与vLLM的基本用法相同。我们提供了一个简单的示例，展示了如何通过vLLM启动与OpenAI API兼容的接口，并使用 `Qwen2.5-7B-Instruct-AWQ` 模型："
+#: ../../Qwen/source/quantization/awq.md:64 2d249915352049a6a8d5a06e1f4682ee
+msgid "Run the following in a shell to start an OpenAI-compatible API service:"
+msgstr "在终端中运行以下命令以开启OpenAI兼容API："
+#: ../../Qwen/source/quantization/awq.md:70 be7bfbb81698429cbfcbcd24d062fc08
+msgid "Then, you can call the API as"
+msgstr "随后，您可以这样调用API："
+#: ../../Qwen/source/quantization/awq.md:86 0dff7d5c7b044548a82e0ba68a043d80
+msgid "or you can use the API client with the `openai` Python package as shown below:"
+msgstr "或者你可以按照下面所示的方式，使用 `openai` Python包中的API客户端："
+#: ../../Qwen/source/quantization/awq.md:115 65f4d60502ee486382e9bda9a5a826bb
+msgid "Quantize Your Own Model with AutoAWQ"
+msgstr "使用AutoAWQ量化你的模型"
+#: ../../Qwen/source/quantization/awq.md:117 c7c42af91c1a419194d65200bcfa2f26
+#, fuzzy
+msgid "If you want to quantize your own model to AWQ quantized models, we advise you to use AutoAWQ."
+msgstr "如果您希望将自定义模型量化为AWQ量化模型，我们建议您使用AutoAWQ。推荐通过安装源代码来获取并安装该工具包的最新版本："
+#: ../../Qwen/source/quantization/awq.md:123 232e94883d044030b2193392788b9314
+msgid "Suppose you have finetuned a model based on `Qwen2.5-7B`, which is named `Qwen2.5-7B-finetuned`, with your own dataset, e.g., Alpaca.  To build your own AWQ quantized model, you need to use the training data for calibration.  Below, we provide a simple demonstration for you to run:"
+msgstr "假设你已经基于 `Qwen2.5-7B` 模型进行了微调，并将其命名为 `Qwen2.5-7B-finetuned` ，且使用的是你自己的数据集，比如Alpaca。若要构建你自己的AWQ量化模型，你需要使用训练数据进行校准。以下，我们将为你提供一个简单的演示示例以便运行："
+#: ../../Qwen/source/quantization/awq.md:141 5162195f32ee4ecba229aa137da1aba4
+msgid "Then you need to prepare your data for calibration.  What you need to do is just put samples into a list, each of which is a text.  As we directly use our finetuning data for calibration, we first format it with ChatML template.  For example,"
+msgstr "接下来，您需要准备数据以进行校准。您需要做的就是将样本放入一个列表中，其中每个样本都是一段文本。由于我们直接使用微调数据来进行校准，所以我们首先使用ChatML模板对其进行格式化。例如："
+#: ../../Qwen/source/quantization/awq.md:153 0d4736e90e0242a8be15533de3aab6ff
+msgid "where each `msg` is a typical chat message as shown below:"
+msgstr "其中每个 `msg` 是一个典型的聊天消息，如下所示："
+#: ../../Qwen/source/quantization/awq.md:163 79d86630600945ac85dbe13d07987016
+msgid "Then just run the calibration process by one line of code:"
+msgstr "然后只需通过一行代码运行校准过程："
+#: ../../Qwen/source/quantization/awq.md:169 1ae219a50508465b98e3b3398e631681
+msgid "Finally, save the quantized model:"
+msgstr "最后，保存量化模型："
+#: ../../Qwen/source/quantization/awq.md:176 58316c1a4172418aba9f37925963e17f
+msgid "Then you can obtain your own AWQ quantized model for deployment.  Enjoy!"
+msgstr "然后你就可以得到一个可以用于部署的AWQ量化模型。玩得开心！"
--- a/docs/locales/zh_CN/LC_MESSAGES/quantization/gptq.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/quantization/gptq.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/quantization/gptq.md:1 c90397f810fb44a0abba8dd02f998f1c
+msgid "GPTQ"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:4 b79afc46b0f9474fb0c83751625aefc5
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/quantization/gptq.md:7 898494af2a944193880f27e2f90db4f4
+msgid "[GPTQ](https://arxiv.org/abs/2210.17323) is a quantization method for GPT-like LLMs, which uses one-shot weight quantization based on approximate second-order information. In this document, we show you how to use the quantized model with Hugging Face `transformers` and also how to quantize your own model with [AutoGPTQ](https://github.com/AutoGPTQ/AutoGPTQ)."
+msgstr "[GPTQ](https://arxiv.org/abs/2210.17323)是一种针对类GPT大型语言模型的量化方法，它基于近似二阶信息进行一次性权重量化。在本文档中，我们将向您展示如何使用 `transformers` 库加载并应用量化后的模型，同时也会指导您如何通过[AutoGPTQ](https://github.com/AutoGPTQ/AutoGPTQ)来对您自己的模型进行量化处理。"
+#: ../../Qwen/source/quantization/gptq.md:10 11b82020735d4828a4182cefbf98aeb1
+msgid "Usage of GPTQ Models with Hugging Face transformers"
+msgstr "在Hugging Face transformers中使用GPTQ模型"
+#: ../../Qwen/source/quantization/gptq.md:14 2e9481d850954772949dd33897e0b06b
+msgid "To use the official Qwen2.5 GPTQ models with `transformers`, please ensure that `optimum>=1.20.0` and compatible versions of `transformers` and `auto_gptq` are installed."
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:16 fe6662a312184d40b07d957f4c0888cc
+msgid "You can do that by"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:22 9f0ad8e2a26145cf8bd9d60305566771
+msgid "Now, `transformers` has officially supported AutoGPTQ, which means that you can directly use the quantized model with `transformers`.  For each size of Qwen2.5, we provide both Int4 and Int8 GPTQ quantized models. The following is a very simple code snippet showing how to run `Qwen2.5-7B-Instruct-GPTQ-Int4`:"
+msgstr "现在，`transformers` 正式支持了AutoGPTQ，这意味着您能够直接在`transformers`中使用量化后的模型。以下是一个非常简单的代码片段示例，展示如何运行  `Qwen2.5-7B-Instruct-GPTQ-Int4` （请注意，对于每种大小的Qwen2.5模型，我们都提供了Int4和Int8两种量化版本）："
+#: ../../Qwen/source/quantization/gptq.md:60 855686b8990f403bba151d8498947f23
+msgid "Usage of GPTQ Models with vLLM"
+msgstr "在vLLM中使用GPTQ模型"
+#: ../../Qwen/source/quantization/gptq.md:62 ad572c30a0904598b3cbeba7c38a607a
+msgid "vLLM has supported GPTQ, which means that you can directly use our provided GPTQ models or those trained with `AutoGPTQ` with vLLM. If possible, it will automatically use the GPTQ Marlin kernel, which is more efficient."
+msgstr "vLLM已支持GPTQ，您可以直接使用我们提供的GPTQ量化模型或使用`AutoGPTQ`量化的模型。我们建议使用最新版的vLLM。如有可能，其会自动使用效率更好的GPTQ Marlin实现。"
+#: ../../Qwen/source/quantization/gptq.md:65 09050876d2c04aee9b619d28d4f5589c
+msgid "Actually, the usage is the same with the basic usage of vLLM.  We provide a simple example of how to launch OpenAI-API compatible API with vLLM and `Qwen2.5-7B-Instruct-GPTQ-Int4`:"
+msgstr "实际上，使用GPTQ模型与vLLM的基本用法相同。我们提供了一个简单的示例，展示了如何通过vLLM启动与OpenAI API兼容的接口，并使用 `Qwen2.5-7B-Instruct-GPTQ-Int4` 模型："
+#: ../../Qwen/source/quantization/gptq.md:68 a31dd879cc444b5da8d16fb1705585a6
+msgid "Run the following in a shell to start an OpenAI-compatible API service:"
+msgstr "在终端中运行以下命令以开启OpenAI兼容API："
+#: ../../Qwen/source/quantization/gptq.md:74 9dfb41e03089473792928b05b1225de4
+msgid "Then, you can call the API as"
+msgstr "随后，您可以这样调用API："
+#: ../../Qwen/source/quantization/gptq.md:90 6b440bebe0d84118bb63ed9a7c169ab5
+msgid "or you can use the API client with the `openai` Python package as shown below:"
+msgstr "或者你可以按照下面所示的方式，使用 `openai` Python包中的API客户端："
+#: ../../Qwen/source/quantization/gptq.md:119 7ffaa1ca8b4740b98dc3f804348da523
+msgid "Quantize Your Own Model with AutoGPTQ"
+msgstr "使用AutoGPTQ量化你的模型"
+#: ../../Qwen/source/quantization/gptq.md:121 40bd0b11507c4f06be5a5918d0dc3bdb
+msgid "If you want to quantize your own model to GPTQ quantized models, we advise you to use AutoGPTQ.  It is suggested installing the latest version of the package by installing from source code:"
+msgstr "如果你想将自定义模型量化为GPTQ量化模型，我们建议你使用AutoGPTQ工具。推荐通过安装源代码的方式获取并安装最新版本的该软件包。"
+#: ../../Qwen/source/quantization/gptq.md:130 d6ebb03d51bf4e0686ae17ce3f0a34db
+msgid "Suppose you have finetuned a model based on `Qwen2.5-7B`, which is named `Qwen2.5-7B-finetuned`, with your own dataset, e.g., Alpaca.  To build your own GPTQ quantized model, you need to use the training data for calibration.  Below, we provide a simple demonstration for you to run:"
+msgstr "假设你已经基于 `Qwen2.5-7B` 模型进行了微调，并将该微调后的模型命名为 `Qwen2.5-7B-finetuned` ，且使用的是自己的数据集，比如Alpaca。要构建你自己的GPTQ量化模型，你需要使用训练数据进行校准。以下是一个简单的演示示例，供你参考运行："
+#: ../../Qwen/source/quantization/gptq.md:161 9c1b27cc38764332891a8a13175663fc
+msgid "However, if you would like to load the model on multiple GPUs, you need to use `max_memory` instead of `device_map`. Here is an example:"
+msgstr "但是，如果你想使用多GPU来读取模型，你需要使用 `max_memory` 而不是 `device_map`。下面是一段示例代码："
+#: ../../Qwen/source/quantization/gptq.md:172 c2a9a50734854c19acf3e623597aee80
+msgid "Then you need to prepare your data for calibration.  What you need to do is just put samples into a list, each of which is a text.  As we directly use our finetuning data for calibration, we first format it with ChatML template.  For example,"
+msgstr "接下来，你需要准备数据进行校准。你需要做的是将样本放入一个列表中，其中每个样本都是一段文本。由于我们直接使用微调数据进行校准，所以我们首先使用ChatML模板对它进行格式化处理。例如："
+#: ../../Qwen/source/quantization/gptq.md:188 7621f73d34d04dd791d2eda03edb0d06
+msgid "where each `msg` is a typical chat message as shown below:"
+msgstr "其中每个 `msg` 是一个典型的聊天消息，如下所示："
+#: ../../Qwen/source/quantization/gptq.md:198 293efa14ece74a0aa9cbf32ef21e6bcd
+msgid "Then just run the calibration process by one line of code:"
+msgstr "然后只需通过一行代码运行校准过程："
+#: ../../Qwen/source/quantization/gptq.md:209 919d7a77cc4a4ef084ee8e2240ff1797
+msgid "Finally, save the quantized model:"
+msgstr "最后，保存量化模型："
+#: ../../Qwen/source/quantization/gptq.md:216 b353bdf12d6148fdb0a77662f795ae7e
+msgid "It is unfortunate that the `save_quantized` method does not support sharding.  For sharding, you need to load the model and use `save_pretrained` from transformers to save and shard the model. Except for this, everything is so simple.  Enjoy!"
+msgstr "很遗憾， `save_quantized` 方法不支持模型分片。若要实现模型分片，您需要先加载模型，然后使用来自 `transformers` 库的 `save_pretrained` 方法来保存并分片模型。除此之外，一切操作都非常简单。祝您使用愉快！"
+#: ../../Qwen/source/quantization/gptq.md:222 caea6f76804e40daa394ae2e2d52a6ce
+msgid "Known Issues"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:224 07df69bd48d4445887b5c1fa09f2f0fb
+msgid "Qwen2.5-72B-Instruct-GPTQ-Int4 cannot stop generation properly"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:226
+#: ../../Qwen/source/quantization/gptq.md:235 a4f1c7b0cb5d49f2929ba5d1246e885d
+#: d2dbf88d06974152943e6ec405419390
+msgid "Model"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:226 cb9c0be91ecc46c3b6ecfa97a0a37dd7
+msgid "Qwen2.5-72B-Instruct-GPTQ-Int4"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:227
+#: ../../Qwen/source/quantization/gptq.md:236 c1fe04754a0642fa82ed425d6abaa487
+#: f3ff85cbbc47459fb36b5ad0e38b4a1b
+msgid "Framework"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:227 8a5a4fe9d7634cb1ac65025565c3593a
+#, fuzzy
+msgid "vLLM, AutoGPTQ (including Hugging Face transformers)"
+msgstr "在Hugging Face transformers中使用GPTQ模型"
+#: ../../Qwen/source/quantization/gptq.md:228
+#: ../../Qwen/source/quantization/gptq.md:237 320d56294cc4490f8b30ac523388bc44
+#: c04326d003f949a7b2b63c6c6cb20ac3
+msgid "Description"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:228 22f80d0679dc426dbbfb21b90b993a27
+msgid "Generation cannot stop properly. Continual generation after where it should stop, then repeated texts, either single character, a phrase, or paragraphs, are generated."
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:229
+#: ../../Qwen/source/quantization/gptq.md:238 255a7a8ac98b4d2da51f79f207be5901
+#: 673d23bf488840a2a32a18cd657f334f
+msgid "Workaround"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:229 c2171874ed804ffb826ac686128d7bff
+msgid "The following workaround could be considered"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:230 a59d6759991640609371bf7afd81e0b8
+msgid "Using the original model in 16-bit floating point"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:231 97134ed43ee3414199928d755c24544e
+msgid "Using the AWQ variants or llama.cpp-based models for reduced chances of abnormal generation"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:233 7c30819dea6c4cfb8eee98d0dd217bf9
+msgid "Qwen2.5-32B-Instruct-GPTQ-Int4 broken with vLLM on multiple GPUs"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:235 a4a641abd99a47049c1fd172e9cfa2be
+msgid "Qwen2.5-32B-Instruct-GPTQ-Int4"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:236 70216327dda349cabf03412f5fbe3114
+msgid "vLLM"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:237 8edf21882ff24358b736c73477cfba9d
+msgid "Deployment on multiple GPUs and only garbled text like `!!!!!!!!!!!!!!!!!!` could be generated."
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:238 10d9d8b3d8e74afea5ccd79bc698fb7c
+msgid "Each of the following workaround could be considered"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:239 33d1632f26f9423c847d06af7a5d107d
+msgid "Using the AWQ or GPTQ-Int8 variants"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:240 b27f1f32637349d09b8c74a2041a4d9b
+msgid "Using a single GPU"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:241 fc27883584a04682b9e28b2ccf51dc0e
+msgid "Using Hugging Face `transformers` if latency and throughput are not major concerns"
+msgstr ""
+#: ../../Qwen/source/quantization/gptq.md:244 5664e5bd63c845d49e8cfa75e789dfa3
+msgid "Troubleshooting"
+msgstr "问题排查"
+#: ../../Qwen/source/quantization/gptq.md 06f2358881134920ab43f4256ad6300e
+msgid "With `transformers` and `auto_gptq`, the logs suggest `CUDA extension not installed.` and the inference is slow."
+msgstr "在使用 `transformers` 和 `auto_gptq` 时，日志提示 `CUDA extension not installed.` 并且推理速度缓慢。"
+#: ../../Qwen/source/quantization/gptq.md:248 2d57d681b2d74c27b60523fa86676b6f
+msgid "`auto_gptq` fails to find a fused CUDA kernel compatible with your environment and falls back to a plain implementation. Follow its [installation guide](https://github.com/AutoGPTQ/AutoGPTQ/blob/main/docs/INSTALLATION.md) to install a pre-built wheel or try installing `auto_gptq` from source."
+msgstr "`auto_gptq` 未能找到与您的环境兼容的融合CUDA算子，因此退回到基础实现。请遵循其 [安装指南](https://github.com/AutoGPTQ/AutoGPTQ/blob/main/docs/INSTALLATION.md) 来安装预构建的 wheel 或尝试从源代码安装 `auto_gptq` 。"
+#: ../../Qwen/source/quantization/gptq.md 95b57d1a962c4dc7aa02a69a403e2376
+msgid "Self-quantized Qwen2.5-72B-Instruct-GPTQ with `vllm`, `ValueError: ... must be divisible by ...` is raised. The intermediate size of the self-quantized model is different from the official Qwen2.5-72B-Instruct-GPTQ models."
+msgstr "`vllm` 使用自行量化的 Qwen2.5-72B-Instruct-GPTQ 时，会引发 `ValueError: ... must be divisible by ...` 错误。自量化的模型的 intermediate size 与官方的 Qwen2.5-72B-Instruct-GPTQ 模型不同。"
+#: ../../Qwen/source/quantization/gptq.md:255 ecd9b51a549045949ff18fdb6226ddc8
+#, python-brace-format
+msgid "After quantization the size of the quantized weights are divided by the group size, which is typically 128. The intermediate size for the FFN blocks in Qwen2.5-72B is 29568. Unfortunately, {math}`29568 \\div 128 = 231`. Since the number of attention heads and the dimensions of the weights must be divisible by the tensor parallel size, it means you can only run the quantized model with `tensor_parallel_size=1`, i.e., one GPU card."
+msgstr "量化后，量化权重的大小将被 group size（通常为128）整除。Qwen2-72B 中FFN块的中间大小为29568。不幸的是， {math}`29568 \\div 128 = 231` 。由于注意力头的数量和权重的维度必须能够被张量并行大小整除，这意味着你只能使用 `tensor_parallel_size=1` ，即一张 GPU 卡，来运行量化的模型。"
+#: ../../Qwen/source/quantization/gptq.md:260 8b1c5e3934654679a2d85e3287cf9309
+#, python-brace-format
+msgid "A workaround is to make the intermediate size divisible by {math}`128 \\times 8 = 1024`. To achieve that, the weights should be padded with zeros. While it is mathematically equivalent before and after zero-padding the weights, the results may be slightly different in reality."
+msgstr "一个解决方案是使中间大小能够被 {math}`128 \\times 8 = 1024` 整除。为了达到这一目的，应该使用零值对权重进行填充。虽然在数学上，在对权重进行零填充前后是等价的，但在现实中结果可能会略有不同。"
+#: ../../Qwen/source/quantization/gptq.md:264 ae904f7ab91340c4a6831aef4de643ba
+msgid "Try the following:"
+msgstr "尝试以下方法："
+#: ../../Qwen/source/quantization/gptq.md:297 4cf8c516a2324e618d25333c84be9e6b
+msgid "This will save the padded checkpoint to the specified directory. Then, copy other files from the original checkpoint to the new directory and modify the `intermediate_size` in `config.json` to `29696`. Finally, you can quantize the saved model checkpoint."
+msgstr "这将会把填充后的检查点保存到指定的目录。然后，你需要从原始检查点复制其他文件到新目录，并将 `config.json` 中的 `intermediate_size` 修改为 `29696` 。最后，你可以量化保存的模型检查点。"
--- a/docs/locales/zh_CN/LC_MESSAGES/quantization/llama.cpp.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/quantization/llama.cpp.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/quantization/llama.cpp.md:1
+#: 2cde165afca34e508b163ca4d513c50c
+msgid "llama.cpp"
+msgstr ""
+#: ../../Qwen/source/quantization/llama.cpp.md:3
+#: c6369d5467e449719f2b30253bfdcb99
+msgid "Quantization is a major topic for local inference of LLMs, as it reduces the memory footprint. Undoubtably, llama.cpp natively supports LLM quantization and of course, with flexibility as always."
+msgstr "量化(Quantization)是本地运行大规模语言模型的主要议题，因为它能减少内存占用。毫无疑问，llama.cpp原生支持大规模语言模型的量化，并且一如既往地保持了灵活性。"
+#: ../../Qwen/source/quantization/llama.cpp.md:6
+#: 9aeef2584cd445c9a95c7dd463a0afbe
+msgid "At high-level, all quantization supported by llama.cpp is weight quantization:  Model parameters are quantized into lower bits, and in inference, they are dequantized and used in computation."
+msgstr "在高层次上，llama.cpp所支持的所有量化都是权重量化(weight quantization)：模型参数被量化为低位(bit)数，在推理过程中，它们会被反量化(dequantize)并用于计算。"
+#: ../../Qwen/source/quantization/llama.cpp.md:9
+#: dc633f028b95470489c4c9bca97a6002
+msgid "In addition, you can mix different quantization data types in a single quantized model, e.g., you can quantize the embedding weights using a quantization data type and other weights using a different one. With an adequate mixture of quantization types, much lower quantization error can be attained with just a slight increase of bit-per-weight. The example program `llama-quantize` supports many quantization presets, such as Q4_K_M and Q8_0."
+msgstr "此外，你可以在单一的量化模型中混合使用不同的量化数据类型，例如，你可以使用一种量化数据类型量化嵌入权重(embedding)，而使用另一种量化其他权重。通过适当的量化类型组合，只需略微增加bpw （bit-per-weight, 位权比），就能达到更低的量化误差。示例程序`llama-quantize`支持许多量化预设，如Q4_K_M和Q8_0。"
+#: ../../Qwen/source/quantization/llama.cpp.md:13
+#: 4235cf2df8164eed928d00f57f6807c7
+msgid "If you find the quantization errors still more than expected, you can bring your own scales, e.g., as computed by AWQ, or use calibration data to compute an importance matrix using `llama-imatrix`, which can then be used during quantization to enhance the quality of the quantized models."
+msgstr "如果你发现量化误差仍然超出预期，你可以引入自己的量化尺度，例如由AWQ计算的，或者使用校准数据用`llama-imatrix`来计算一个“重要性矩阵”(importance matrix)，然后在量化过程中使用以提高量化模型的质量。"
+#: ../../Qwen/source/quantization/llama.cpp.md:15
+#: 515fe42fe0f34fce883ae3fa60853f0c
+#, python-brace-format
+msgid "In this document, we demonstrate the common way to quantize your model and evaluate the performance of the quantized model. We will assume you have the example programs from llama.cpp at your hand. If you don't, check our guide [here](../run_locally/llama.cpp.html#getting-the-program){.external}."
+msgstr "在本文档中，我们将展示量化和评估量化模型性能的常见方法。我们会假设你手头有llama.cpp的示例程序。如果没有，请查看我们的[指南](../run_locally/llama.cpp.html#getting-the-program){.external}。"
+#: ../../Qwen/source/quantization/llama.cpp.md:19
+#: 835ffea72afe4c4baf28d764a4a947f4
+msgid "Getting the GGUF"
+msgstr "获取GGUF"
+#: ../../Qwen/source/quantization/llama.cpp.md:21
+#: 93463a200a5543b5b5798795b8edd0b4
+msgid "Now, suppose you would like to quantize `Qwen3-8B`.  You need to first make a GGUF file as shown below:"
+msgstr "现在，假设你想量化`Qwen3-8B-Instruct`。你需要首先创建一个GGUF文件，如下所示："
+#: ../../Qwen/source/quantization/llama.cpp.md:27
+#: 59b1507af3dd430b9de8b1d51ebcfc53
+msgid "Sometimes, it may be better to use fp32 as the start point for quantization. In that case, use"
+msgstr "有时，可能最好将fp32作为量化的起点。在这种情况下，使用"
+#: ../../Qwen/source/quantization/llama.cpp.md:33
+#: d54b89e59e214e1baeba025ecd971e30
+msgid "Quantizing the GGUF without Calibration"
+msgstr "无校准量化GGUF"
+#: ../../Qwen/source/quantization/llama.cpp.md:35
+#: a6d57166997a4a1bad8a28eb4cc5593c
+msgid "For the simplest way, you can directly quantize the model to lower-bits based on your requirements.  An example of quantizing the model to 8 bits is shown below:"
+msgstr "最简单的方法是，你可以根据需求直接将模型量化到低位数。下面是一个将模型量化到8 bit的例子："
+#: ../../Qwen/source/quantization/llama.cpp.md:41
+#: 8094b5237744430aa9594b344901dc4b
+msgid "`Q8_0` is a code for a quantization preset. You can find all the presets in [the source code of `llama-quantize`](https://github.com/ggml-org/llama.cpp/blob/master/examples/quantize/quantize.cpp). Look for the variable `QUANT_OPTIONS`. Common ones used for 7B models include `Q8_0`, `Q5_0`, and `Q4_K_M`.  The letter case doesn't matter, so `q8_0` or `q4_K_m` are perfectly fine."
+msgstr "`Q8_0`是一个量化预设的代号。你可以在[`llama-quantize`的源代码](https://github.com/ggml-org/llama.cpp/blob/master/examples/quantize/quantize.cpp)中找到所有预设。寻找变量`QUANT_OPTIONS`。对于7B模型常用的包括`Q8_0`、`Q5_0`和`Q4_K_M`。字母大小写不重要，所以`q8_0`或`q4_K_m`都是可以接受的。"
+#: ../../Qwen/source/quantization/llama.cpp.md:47
+#: 388bcb0c32464e6c8ec2940b0b564658
+msgid "Now you can use the GGUF file of the quantized model with applications based on llama.cpp. Very simple indeed."
+msgstr "现在，你可以使用基于llama.cpp的应用程序中的量化模型的GGUF文件。确实很简单。"
+#: ../../Qwen/source/quantization/llama.cpp.md:50
+#: a32f72ed508e477a98b67172c20c3d9d
+msgid "However, the accuracy of the quantized model could be lower than expected occasionally, especially for lower-bit quantization. The program may even prevent you from doing that."
+msgstr "然而，量化模型的准确性偶尔可能低于预期，特别是对于低位数量化。程序甚至可能阻止你这样做。"
+#: ../../Qwen/source/quantization/llama.cpp.md:53
+#: 5cbe569e12714a1eb8b5d125f3b85ab6
+msgid "There are several ways to improve quality of quantized models. A common way is to use a calibration dataset in the target domain to identify the weights that really matter and quantize the model in a way that those weights have lower quantization errors, as introduced in the next two methods."
+msgstr "有几种方法可以提高量化模型的质量。一种常见的方法是在目标域中使用校准数据集来识别真正重要的权重，并以这些权重具有较低量化误差的方式量化模型，如下两种方法中将介绍。"
+#: ../../Qwen/source/quantization/llama.cpp.md:57
+#: b6667f74d47e4e9f8ab0ba6fc8de0299
+msgid "Quantizing the GGUF with AWQ Scale"
+msgstr "使用AWQ尺度量化GGUF"
+#: ../../Qwen/source/quantization/llama.cpp.md:60
+#: b08348105b2f4b1d831ace8255b8208e
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/quantization/llama.cpp.md:63
+#: e9191853f77c4c3bbf5b2516cd463229
+msgid "To improve the quality of your quantized models, one possible solution is to apply the AWQ scale, following [this script](https://github.com/casper-hansen/AutoAWQ/blob/main/docs/examples.md#gguf-export). First, when you run `model.quantize()` with `autoawq`, remember to add `export_compatible=True` as shown below:"
+msgstr "为了提高量化模型的质量，一种可能的解决方案是应用AWQ尺度，遵循[这个脚本](https://github.com/casper-hansen/AutoAWQ/blob/main/docs/examples.md#gguf-export)。首先，当你使用`autoawq`运行`model.quantize()`时，记得添加`export_compatible=True`，如下所示："
+#: ../../Qwen/source/quantization/llama.cpp.md:76
+#: a9b8a7e942f343c68ab9bb5c51b75beb
+msgid "The above code will not actually quantize the weights. Instead, it adjusts weights based on a dataset so that they are \"easier\" to quantize.[^AWQ]"
+msgstr "上述代码实际上不会量化权重。相反，它会根据数据集调整权重，使它们“更容易”量化。[^AWQ]"
+#: ../../Qwen/source/quantization/llama.cpp.md:79
+#: 2853451d91ba48e99096e4ff2c6aa28d
+msgid "Then, when you run `convert-hf-to-gguf.py`, remember to replace the model path with the path to the new model:"
+msgstr "然后，当你运行`convert-hf-to-gguf.py`时，记得将模型路径替换为新模型的路径："
+#: ../../Qwen/source/quantization/llama.cpp.md:84
+#: 68cab73a58b14a8a8a38b82f209f6060
+msgid "Finally, you can quantize the model as in the last example:"
+msgstr "最后，你可以像最后一个例子那样量化模型："
+#: ../../Qwen/source/quantization/llama.cpp.md:89
+#: c92ab12879be4e1c98ef49dcdb66e3e0
+msgid "In this way, it should be possible to achieve similar quality with lower bit-per-weight."
+msgstr "这样，应该有可能以更低的bpw实现相似的质量。"
+#: ../../Qwen/source/quantization/llama.cpp.md:95
+#: 95d0914f02b44bacb160815e8f6400c3
+msgid "Quantizing the GGUF with Importance Matrix"
+msgstr "使用重要性矩阵量化GGUF"
+#: ../../Qwen/source/quantization/llama.cpp.md:97
+#: 35543f118a84404ca6e5c52e3c51b8f7
+msgid "Another possible solution is to use the \"important matrix\"[^imatrix], following [this](https://github.com/ggml-org/llama.cpp/tree/master/examples/imatrix)."
+msgstr "另一个可能的解决方案是使用\"重要矩阵\"[^imatrix]，参照[这里](https://github.com/ggml-org/llama.cpp/tree/master/examples/imatrix)。"
+#: ../../Qwen/source/quantization/llama.cpp.md:99
+#: 6f0eba75740b41bc8d277809f72bd839
+msgid "First, you need to compute the importance matrix data of the weights of a model (`-m`) using a calibration dataset (`-f`):"
+msgstr "首先，你需要使用校准数据集（`-f`）计算模型权重的重要性矩阵数据（`-m`）："
+#: ../../Qwen/source/quantization/llama.cpp.md:104
+#: 3a9550cf6a04480788fa31a011c5094f
+msgid "The text is cut in chunks of length `--chunk` for computation. Preferably, the text should be representative of the target domain. The final results will be saved in a file named `qwen3-8b-imatrix.dat` (`-o`), which can then be used:"
+msgstr "文本被切割成长度为`--chunk`的块进行计算。最好，文本应代表目标领域。最终结果将保存在名为`qwen3-8b-imatrix.dat`（`-o`）的文件中，然后可以使用："
+#: ../../Qwen/source/quantization/llama.cpp.md:112
+#: 2f3dfa34285948ff8780c66b41a49fb0
+msgid "For lower-bit quantization mixtures for 1-bit or 2-bit, if you do not provide `--imatrix`, a helpful warning will be printed by `llama-quantize`."
+msgstr "对于1 bit或2 bit的低位数量化混合，如果你不提供`--imatrix`，`llama-quantize`将打印出有用的警告。"
+#: ../../Qwen/source/quantization/llama.cpp.md:116
+#: 742734079457418f82128bb7ab0851bc
+msgid "Perplexity Evaluation"
+msgstr "困惑度(Perplexity)评估"
+#: ../../Qwen/source/quantization/llama.cpp.md:118
+#: d5e074195d2e4136aa741cd5facebc91
+msgid "`llama.cpp` provides an example program for us to calculate the perplexity, which evaluate how unlikely the given text is to the model. It should be mostly used for comparisons: the lower the perplexity, the better the model remembers the given text."
+msgstr "`llama.cpp`为我们提供了一个示例程序来计算困惑度，这评估了给定文本对模型而言的“不可能”程度。它主要用于比较：困惑度越低，模型对给定文本的记忆越好。"
+#: ../../Qwen/source/quantization/llama.cpp.md:121
+#: c7b7d6516be0401284f7d1a5fc21f683
+msgid "To do this, you need to prepare a dataset, say \"wiki test\"[^wiki].  You can download the dataset with:"
+msgstr "要做到这一点，你需要准备一个数据集，比如\"wiki测试集\"[^wiki]。你可以使用以下命令下载数据集："
+#: ../../Qwen/source/quantization/llama.cpp.md:128
+#: 9af07ffbc0214d0e905079b8a791c376
+msgid "Then you can run the test with the following command:"
+msgstr "然后你可以使用以下命令运行测试："
+#: ../../Qwen/source/quantization/llama.cpp.md:132
+#: b74f0297d6904028ba7c715af1501541
+msgid "Wait for some time and you will get the perplexity of the model. There are some numbers of different kinds of quantization mixture [here](https://github.com/ggml-org/llama.cpp/blob/master/examples/perplexity/README.md). It might be helpful to look at the difference and grab a sense of how that kind of quantization might perform."
+msgstr "稍等一段时间，你将得到模型的困惑度。[这里](https://github.com/ggml-org/llama.cpp/blob/master/examples/perplexity/README.md)提供了不同类型的量化模型的数值。观察差异可能有助于理解不同量化方式的潜在表现。"
+#: ../../Qwen/source/quantization/llama.cpp.md:139
+#: 2f906e721b414c9094a0282c011722c4
+msgid "Finally"
+msgstr "结束语"
+#: ../../Qwen/source/quantization/llama.cpp.md:141
+#: b1a3811d3c3941cc8b4693c1e5c168ca
+msgid "In this guide, we demonstrate how to conduct quantization and evaluate the perplexity with llama.cpp. For more information, please visit the [llama.cpp GitHub repo](https://github.com/ggml-org/llama.cpp)."
+msgstr "在本指南中，我们展示了如何使用llama.cpp进行量化和评估困惑度。更多信息，请访问[llama.cpp GitHub仓库](https://github.com/ggml-org/llama.cpp)。"
+#: ../../Qwen/source/quantization/llama.cpp.md:144
+#: bfeea9c5ef7540f88336e17f37c5cac1
+msgid "We usually quantize the fp16 model to 4, 5, 6, and 8-bit models with different quantization mixtures, but sometimes a particular mixture just does not work, so we don't provide those in our HuggingFace Hub. However, others in the community may have success, so if you haven't found what you need in our repos, look around."
+msgstr "我们通常将fp16模型量化为4、5、6和8位模型，采用不同的量化混合，但有时特定的混合就是不起作用，所以我们不在HuggingFace Hub中提供这些。但是，社区中的其他人可能会成功，因此，如果你在我们的仓库中没有找到所需的内容，请四处看看。"
+#: ../../Qwen/source/quantization/llama.cpp.md:147
+#: 54a2e17cd64e4e36bab3fff70e940d62
+msgid "Enjoy your freshly quantized models!"
+msgstr "享受你新鲜量化的模型吧！"
+#: ../../Qwen/source/quantization/llama.cpp.md:91
+#: e51017220a414251bd0e5b184dd4db0a
+msgid "If you are interested in what this means, refer to [the AWQ paper](https://arxiv.org/abs/2306.00978).     Basically, important weights (called salient weights in the paper) are identified based on activations across data examples.     The weights are scaled accordingly such that the salient weights are protected even after quantization."
+msgstr "如果你对这意味着什么感兴趣，请参阅[AWQ论文](https://arxiv.org/abs/2306.00978)。基本上，根据数据实例上的激活，识别出重要的权重（在论文中称为显著权重）。相应地缩放权重，以便即使在量化后也能保护显著权重。"
+#: ../../Qwen/source/quantization/llama.cpp.md:114
+#: 9eeaab6f255647b39afab212008b14ca
+msgid "Here, the importance matrix keeps record of how weights affect the output: the weight should be important is a slight change in its value causes huge difference in the results, akin to the [GPTQ](https://arxiv.org/abs/2210.17323) algorithm."
+msgstr "在这里，重要性矩阵记录了权重如何影响输出：如果权重的微小变化导致结果的巨大差异，则该权重应该是重要的，类似于[GPTQ](https://arxiv.org/abs/2210.17323)算法。"
+#: ../../Qwen/source/quantization/llama.cpp.md:136
+#: e5325dfbcf50472a8766eb96eda47e52
+msgid "It is not a good evaluation dataset for instruct models though, but it is very common and easily accessible.      You probably want to use a dataset similar to your target domain."
+msgstr "虽然它不是指导模型的良好评估数据集，但它非常常见且易于访问。你可能希望使用与目标领域相似的数据集。"
--- a/docs/locales/zh_CN/LC_MESSAGES/run_locally/llama.cpp.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/run_locally/llama.cpp.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/run_locally/llama.cpp.md:1
+#: 28ada5eeaa3d4def898b944359ccbf0d
+msgid "llama.cpp"
+msgstr "llama.cpp"
+#: ../../Qwen/source/run_locally/llama.cpp.md f9c61545fb3f4320abd6b320e034919d
+msgid "llama.cpp as a C++ library"
+msgstr "llama.cpp作为C++库"
+#: ../../Qwen/source/run_locally/llama.cpp.md:6
+#: 2553b3fd749f438e8e48f32c81375f46
+msgid "Before starting, let's first discuss what is llama.cpp and what you should expect, and why we say \"use\" llama.cpp, with \"use\" in quotes. llama.cpp is essentially a different ecosystem with a different design philosophy that targets light-weight footprint, minimal external dependency, multi-platform, and extensive, flexible hardware support:"
+msgstr "开始之前，让我们先谈谈什么是llama.cpp，您应该期待什么，以及为什么我们说带引号“使用”llama.cpp。本质上，llama.cpp是一个不同的生态系统，具有不同的设计理念，旨在实现轻量级、最小外部依赖、多平台以及广泛灵活的硬件支持："
+#: ../../Qwen/source/run_locally/llama.cpp.md:8
+#: a7ab025ba0af4667b2f7ebba93318d26
+msgid "Plain C/C++ implementation without external dependencies"
+msgstr "纯粹的C/C++实现，没有外部依赖"
+#: ../../Qwen/source/run_locally/llama.cpp.md:9
+#: 52a68f3b3acc43b4b251f1a72e82801f
+msgid "Support a wide variety of hardware:"
+msgstr "支持广泛的硬件："
+#: ../../Qwen/source/run_locally/llama.cpp.md:10
+#: 16e1b62ece664855bd2be4aaeca1a805
+msgid "AVX, AVX2 and AVX512 support for x86_64 CPU"
+msgstr "x86_64 CPU的AVX、AVX2和AVX512支持"
+#: ../../Qwen/source/run_locally/llama.cpp.md:11
+#: e763087ad96543fbb80ca4baec4bfe97
+msgid "Apple Silicon via Metal and Accelerate (CPU and GPU)"
+msgstr "通过Metal和Accelerate支持Apple Silicon（CPU和GPU）"
+#: ../../Qwen/source/run_locally/llama.cpp.md:12
+#: 9f801a019f1549fcb3f322aa9264cf08
+msgid "NVIDIA GPU (via CUDA), AMD GPU (via hipBLAS), Intel GPU (via SYCL), Ascend NPU (via CANN), and Moore Threads GPU (via MUSA)"
+msgstr "NVIDIA GPU（通过CUDA）、AMD GPU（通过hipBLAS）、Intel GPU（通过SYCL）、昇腾NPU（通过CANN）和摩尔线程GPU（通过MUSA）"
+#: ../../Qwen/source/run_locally/llama.cpp.md:13
+#: 715936ea95364b9eb63c0b267603f841
+msgid "Vulkan backend for GPU"
+msgstr "GPU的Vulkan后端"
+#: ../../Qwen/source/run_locally/llama.cpp.md:14
+#: 1c46625a23ec4152a410586b838af2e8
+msgid "Various quantization scheme for faster inference and reduced memory footprint"
+msgstr "多种量化方案以加快推理速度并减少内存占用"
+#: ../../Qwen/source/run_locally/llama.cpp.md:15
+#: 41e3e66bf6fd4d65bac569ef9525dd08
+msgid "CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity"
+msgstr "CPU+GPU混合推理，以加速超过总VRAM容量的模型"
+#: ../../Qwen/source/run_locally/llama.cpp.md:17
+#: 7e220bdec17242edb034da6ef03c0217
+msgid "It's like the Python frameworks `torch`+`transformers` or `torch`+`vllm` but in C++. However, this difference is crucial:"
+msgstr "它就像 Python 框架 `torch`+`transformers` 或 `torch`+`vllm` 的组合，但用的是 C++。然而，这一差异至关重要："
+#: ../../Qwen/source/run_locally/llama.cpp.md:19
+#: 449ff521ddf14376b7f577779ffd0d3a
+msgid "Python is an interpreted language:  The code you write is executed line-by-line on-the-fly by an interpreter.  You can run the example code snippet or script with an interpreter or a natively interactive interpreter shell. In addition, Python is learner friendly, and even if you don't know much before, you can tweak the source code here and there."
+msgstr "Python 是一种解释型语言：编写的代码会被解释器逐行实时执行。你可以使用解释器或原生交互式解释器终端来运行示例代码片段或脚本。此外，Python 对学习者非常友好，即使你之前了解不多，也可能修改源代码。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:23
+#: dd8b4f55db6846ed872013f58001ddec
+msgid "C++ is a compiled language:  The source code you write needs to be compiled beforehand, and it is translated to machine code and an executable program by a compiler. The overhead from the language side is minimal.  You do have source code for example programs showcasing how to use the library.  But it is not very easy to modify the source code if you are not verse in C++ or C."
+msgstr "C++ 是一种编译型语言：你编写的源代码需要预先编译，由编译器将其转换为机器码和可执行程序，来自语言层面的开销微乎其微。llama.cpp也提供了示例程序的源代码，展示了如何使用该库。但是，如果你不精通 C++ 或 C 语言，修改源代码并不容易。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:29
+#: 8d2bc05e1031475f9d97d5dddc1a31c7
+msgid "To use llama.cpp means that you use the llama.cpp library in your own program, like writing the source code of [Ollama](https://ollama.com/), [LM Studio](https://lmstudio.ai/), [GPT4ALL](https://www.nomic.ai/gpt4all), [llamafile](https://llamafile.ai/) etc. But that's not what this guide is intended or could do. Instead, here we introduce how to use the `llama-cli` example program, in the hope that you know that llama.cpp does support Qwen2.5 models and how the ecosystem of llama.cpp generally works."
+msgstr "真正使用 llama.cpp 意味着在自己的程序中使用 llama.cpp 库，就像编写 [Ollama](https://ollama.com/)、[LM Studio](https://lmstudio.ai/)、[GPT4ALL](https://www.nomic.ai/gpt4all)、[llamafile](https://llamafile.ai/) 等的源代码。但这并不是本指南的目的或所能做的。相反，这里我们介绍如何使用 `llama-cli` 示例程序，希望你能了解到 llama.cpp 支持 Qwen2.5 模型以及 llama.cpp 生态系统的一般工作原理。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:34
+#: 364cf24aaa7d42039524893406872768
+msgid "In this guide, we will show how to \"use\" [llama.cpp](https://github.com/ggml-org/llama.cpp) to run models on your local machine, in particular, the `llama-cli` and the `llama-server` example program, which comes with the library."
+msgstr "在这份指南中，我们将讨论如何“使用” [llama.cpp](https://github.com/ggml-org/llama.cpp) 在您的本地机器上运行模型，特别是随库提供的 `llama-cli` 和 `llama-server` 示例程序。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:36
+#: 7f501873d71c4f42af9911097ea84c68
+msgid "The main steps are:"
+msgstr "主要步骤如下："
+#: ../../Qwen/source/run_locally/llama.cpp.md:37
+#: e870e4ce3a0b491e8ddd8324982c1fad
+msgid "Get the programs"
+msgstr "获取程序"
+#: ../../Qwen/source/run_locally/llama.cpp.md:38
+#: a9c5836beb7e42c79d8c38a73ca91226
+msgid "Get the Qwen3 models in GGUF[^GGUF] format"
+msgstr "获取 GGUF[^GGUF] 格式的 Qwen3 模型"
+#: ../../Qwen/source/run_locally/llama.cpp.md:39
+#: 7f4f97a838d7464790ca067bc8a9f381
+msgid "Run the program with the model"
+msgstr "使用模型运行程序"
+#: ../../Qwen/source/run_locally/llama.cpp.md:42
+#: e098c21b360a4111a27650f26eade1de
+msgid "llama.cpp supports Qwen3 and Qwen3MoE from version `b5092`."
+msgstr "llama.cpp 自版本 `b5092` 支持 Qwen3 和 Qwen3MoE 。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:45
+#: 9b743c5ec03f425ebe92e37becc4d6cc
+msgid "Getting the Program"
+msgstr "获取程序"
+#: ../../Qwen/source/run_locally/llama.cpp.md:47
+#: 8d2cfc01b1bd49be8f445bc479bed875
+msgid "You can get the programs in various ways.  For optimal efficiency, we recommend compiling the programs locally, so you get the CPU optimizations for free. However, if you don't have C++ compilers locally, you can also install using package managers or downloading pre-built binaries.  They could be less efficient but for non-production example use, they are fine."
+msgstr "你可以通过多种方式获得 llama.cpp 中的程序。为了达到最佳效率，我们建议你本地编译程序，这样可以零成本享受CPU优化。但是，如果你的本地环境没有C++编译器，也可以使用包管理器安装或者下载预编译的二进制文件。虽然它们可能效率较低，但对于非生产用途的例子来说，它们已经足够好用了。"
+#: ../../Qwen/source/run_locally/llama.cpp.md 137946d0ceaf41d48340f243b473d553
+msgid "Compile Locally"
+msgstr "本地编译"
+#: ../../Qwen/source/run_locally/llama.cpp.md:56
+#: b12ed52d2bac494b81cce504f4feabf7
+msgid "Here, we show the basic command to compile `llama-cli` locally on **macOS** or **Linux**. For Windows or GPU users, please refer to [the guide from llama.cpp](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md)."
+msgstr "这里，我们将展示在 **macOS** 或 **Linux** 上本地编译 `llama-cli` 的基本命令。对于 Windows 用户或 GPU 用户，请参考[llama.cpp的指南](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md)。"
+#: ../../Qwen/source/run_locally/llama.cpp.md c60a1a9b078f466d8dad6883b58f4cc0
+msgid "Installing Build Tools"
+msgstr "安装构建工具"
+#: ../../Qwen/source/run_locally/llama.cpp.md:63
+#: d647649fd7bb486183318d413ed4ce53
+msgid "To build locally, a C++ compiler and a build system tool are required.  To see if they have been installed already, type `cc --version` or `cmake --version` in a terminal window."
+msgstr "要进行本地构建，你需要一个C++编译器和一个构建系统工具。在终端窗口中输入`cc --version`或`cmake --version`，看看这些工具是否已经安装好了。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:65
+#: 7f031447e44a4c6cb5b376267fb8860a
+msgid "If installed, the build configuration of the tool will be printed to the terminal, and you are good to go!"
+msgstr "如果已安装，工具的构建配置信息将被打印到终端，那么你就可以开始了！"
+#: ../../Qwen/source/run_locally/llama.cpp.md:66
+#: 58e9dc44f8df4a25a647b68d54c934f4
+msgid "If errors are raised, you need to first install the related tools:"
+msgstr "如果出现错误，说明你需要先安装相关工具："
+#: ../../Qwen/source/run_locally/llama.cpp.md:67
+#: 9c92dddd2e3d44c78c448767851a75b2
+msgid "On macOS, install with the command `xcode-select --install`"
+msgstr "在macOS上，使用命令`xcode-select --install`来安装。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:68
+#: 9e5163c275244ec4b1ac423afd7a1446
+msgid "On Ubuntu, install with the command `sudo apt install build-essential`.  For other Linux distributions, the command may vary; the essential packages needed for this guide are `gcc` and `cmake`."
+msgstr "在Ubuntu上，使用命令`sudo apt install build-essential`来安装。对于其他Linux发行版，命令可能会有所不同；本指南所需的基本包是`gcc`和`cmake`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md 7974674c08ea450caef3ef9c4b775775
+msgid "Compiling the Program"
+msgstr "编译程序"
+#: ../../Qwen/source/run_locally/llama.cpp.md:75
+#: e996a1af17624e8a92d5a024f35384cb
+msgid "For the first step, clone the repo and enter the directory:"
+msgstr "第一步是克隆仓库并进入该目录："
+#: ../../Qwen/source/run_locally/llama.cpp.md:81
+#: 26e2257b7d264ff098b9e9aac386e8bf
+msgid "Then, build llama.cpp using CMake:"
+msgstr "随后，使用 CMake 执行 llama.cpp 构建："
+#: ../../Qwen/source/run_locally/llama.cpp.md:87
+#: f3dd6fd3218844cf87e77bd8d131bd9a
+msgid "The first command will check the local environment and determine which backends and features should be included. The second command will actually build the programs."
+msgstr "第一条命令将检查本地环境并确定需要包含的推理后端与特性。第二条命令将实际构建程序文件。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:90
+#: 824a5208c02447bf9cad4998020a8053
+msgid "To shorten the time, you can also enable parallel compiling based on the CPU cores you have, for example:"
+msgstr "为了缩短时间，你还可以根据你的CPU核心数开启并行编译，例如："
+#: ../../Qwen/source/run_locally/llama.cpp.md:94
+#: dc2d7a5d5aa440eb8b08e80ebbd09c4f
+msgid "This will build the programs with 8 parallel compiling jobs."
+msgstr "这将以8个并行编译任务来构建程序。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:96
+#: 457edcce2b624750827dd017430617a2
+msgid "The built programs will be in `./build/bin/`."
+msgstr "结果将存于 `./build/bin/` 。"
+#: ../../Qwen/source/run_locally/llama.cpp.md a80deae3ccbb4a41b5458bfe34ff2c41
+msgid "Package Managers"
+msgstr "软件包管理器"
+#: ../../Qwen/source/run_locally/llama.cpp.md:101
+#: d2f9b00de571423ab0b6134ab9e71495
+msgid "For **macOS** and **Linux** users, `llama-cli` and `llama-server` can be installed with package managers including Homebrew, Nix, and Flox."
+msgstr "对于**macOS**和**Linux**用户，`llama-cli` 和 `llama-server` 可以通过包括 Homebrew、Nix 和 Flox 在内的软件包管理器进行安装。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:103
+#: fe4ac98dae374260982a1cd23772eedf
+msgid "Here, we show how to install `llama-cli` and `llama-server` with Homebrew.  For other package managers, please check the instructions [here](https://github.com/ggml-org/llama.cpp/blob/master/docs/install.md)."
+msgstr "在这里，我们展示如何使用 Homebrew 安装 `llama-cli` 和 `llama-server` 。对于其他软件包管理器的安装，请查阅[这里的指南](https://github.com/ggml-org/llama.cpp/blob/master/docs/install.md)。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:106
+#: 403b1540c5774684a2558eddf7855455
+msgid "Installing with Homebrew is very simple:"
+msgstr "使用 Homebrew 安装非常简单："
+#: ../../Qwen/source/run_locally/llama.cpp.md:108
+#: e98fd923879c448da69e164a67468303
+msgid "Ensure that Homebrew is available on your operating system.  If you don't have Homebrew, you can install it as in [its website](https://brew.sh/)."
+msgstr "请确保您的操作系统上已安装有 Homebrew。如果没有，您可以按照[官网](https://brew.sh/)上的指导进行安装。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:111
+#: 8442a14a50884a9eac53d35673b8bb95
+msgid "Second, you can install the pre-built binaries, `llama-cli` and `llama-server` included, with a single command:"
+msgstr "其次，您只需一条命令即可安装预先编译好的二进制文件，其中包括 `llama-cli` 和 `llama-server` ："
+#: ../../Qwen/source/run_locally/llama.cpp.md:116
+#: 6dacdef061db483eb52089e13e0d9a83
+msgid "Note that the installed binaries might not be built with the optimal compile options for your hardware, which can lead to poor performance. They also don't support GPU on Linux systems."
+msgstr "请注意，安装的二进制文件可能并未针对您的硬件优化编译选项，这可能导致性能不佳。此外，在 Linux 系统上它们也不支持 GPU。"
+#: ../../Qwen/source/run_locally/llama.cpp.md 54ca313d26be41de889551e4644f5aa9
+msgid "Binary Release"
+msgstr "二进制文件"
+#: ../../Qwen/source/run_locally/llama.cpp.md:122
+#: 8442042815744172bc4b946609019536
+msgid "You can also download pre-built binaries from [GitHub Releases](https://github.com/ggml-org/llama.cpp/releases). Please note that those pre-built binaries files are architecture-, backend-, and os-specific.  If you are not sure what those mean, you probably don't want to use them and running with incompatible versions will most likely fail or lead to poor performance."
+msgstr "您还可以从[GitHub Release](https://github.com/ggml-org/llama.cpp/releases)下载预构建的二进制文件。请注意，这些预构建的二进制文件是特定于架构、后端和操作系统的。如果您不确定这些意味着什么，可能您并不想使用它们。使用不兼容的版本很可能导致运行失败或性能不佳。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:126
+#: 72ca9d49995445f29bc82323f4085df0
+msgid "The file name is like `llama-<version>-bin-<os>-<feature>-<arch>.zip`."
+msgstr "文件名类似于`llama-<version>-bin-<os>-<feature>-<arch>.zip`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:128
+#: f23e3e64be5d492984da99da55b7f9ff
+msgid "There are three simple parts:"
+msgstr "分为三个简单部分："
+#: ../../Qwen/source/run_locally/llama.cpp.md:129
+#: 7b7cdecdb4a94bc8aa29bdb490c55051
+msgid "`<version>`: the version of llama.cpp. The latest is preferred, but as llama.cpp is updated and released frequently, the latest may contain bugs. If the latest version does not work, try the previous release until it works."
+msgstr "`<version>`：llama.cpp的版本。建议使用最新版本，但鉴于llama.cpp频繁更新和发布，最新版本可能包含bug。如果最新版本无法正常工作，请尝试之前的版本直到找到能正常工作的为止。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:130
+#: 3125b20e47824342a931b14f1ece0db4
+msgid "`<os>`: the operating system. `win` for Windows; `macos` for macOS; `linux` for Linux."
+msgstr "`<os>`：操作系统。`win`代表Windows；`macos`代表macOS；`linux`代表Linux。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:131
+#: 75cd1de21b164b54afe1b1593ba034ca
+msgid "`<arch>`: the system architecture. `x64` for `x86_64`, e.g., most Intel and AMD systems, including Intel Mac; `arm64` for `arm64`, e.g., Apple Silicon or Snapdragon-based systems."
+msgstr "`<arch>`：系统架构。`x64`对应`x86_64`，例如大多数Intel和AMD系统，包括Intel Mac；`arm64`对应`arm64`，例如Apple Silicon或基于Snapdragon的系统。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:133
+#: 2a54ebbd822e4d28a3351bff53bb0d6d
+msgid "The `<feature>` part is somewhat complicated for Windows:"
+msgstr "`<feature>`部分对于Windows来说有些复杂："
+#: ../../Qwen/source/run_locally/llama.cpp.md:134
+#: 18421e8375054a0aa729075a1a512e47
+msgid "Running on CPU"
+msgstr "在CPU上运行"
+#: ../../Qwen/source/run_locally/llama.cpp.md:135
+#: 600e9f1569ea4d96a1eedb0e07a1ef57
+msgid "x86_64 CPUs: We suggest try the `avx2` one first."
+msgstr "x86_64 CPU：我们建议首先尝试`avx2`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:136
+#: 34d9757e8e9b43cd84aed0adff92b0f0
+msgid "`noavx`: No hardware acceleration at all."
+msgstr "`noavx`：完全无AVX硬件加速。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:137
+#: d7bb327d152d4db0a98d91b9e17bedca
+msgid "`avx2`, `avx`, `avx512`: SIMD-based acceleration. Most modern desktop CPUs should support avx2, and some CPUs support `avx512`."
+msgstr "`avx2`，`avx`，`avx512`：基于SIMD的加速。大多数现代桌面CPU应该支持AVX2，部分CPU支持AVX512。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:138
+#: c755b0207d034649a7e694715d0125a5
+msgid "`openblas`: Relying on OpenBLAS for acceleration for prompt processing but not generation."
+msgstr "`openblas`：依赖OpenBLAS加速提示词(prompt)处理，但不涉及生成过程。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:139
+#: f4e7484610ce41bfb7f1cb3cd45f47ce
+msgid "arm64 CPUs: We suggest try the `llvm` one first."
+msgstr "arm64 CPU：我们建议首先尝试`llvm`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:140
+#: d92755ab934c4486a99912e8b3447f75
+#, fuzzy
+msgid "[`llvm` and `msvc`](https://github.com/ggml-org/llama.cpp/pull/7191) are different compilers"
+msgstr "[`llvm`和`msvc`](https://github.com/ggerganov/llama.cpp/pull/7191)是不同的编译器"
+#: ../../Qwen/source/run_locally/llama.cpp.md:141
+#: 145fbf58d2d64f53a47d30eea1d6996a
+msgid "Running on GPU: We suggest try the `cu<cuda_verison>` one for NVIDIA GPUs, `kompute` for AMD GPUs, and `sycl` for Intel GPUs first. Ensure that you have related drivers installed."
+msgstr "在GPU上运行：我们建议NVIDIA GPU先尝试`cu<cuda_verison>`，AMD GPU先尝试`kompute`，Intel GPU先尝试`sycl`。请确保已安装相关驱动程序。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:142
+#: d7d306d907ba4e0198cd5780462e45e1
+msgid "[`vulcan`](https://github.com/ggml-org/llama.cpp/pull/2059): support certain NVIDIA and AMD GPUs"
+msgstr "[`vulcan`](https://github.com/ggml-org/llama.cpp/pull/2059)：支持某些NVIDIA和AMD GPU"
+#: ../../Qwen/source/run_locally/llama.cpp.md:143
+#: 2b96eed7f16a4e7db1b772c6b393533f
+msgid "[`kompute`](https://github.com/ggml-org/llama.cpp/pull/4456): support certain NVIDIA and AMD GPUs"
+msgstr "[`kompute`](https://github.com/ggml-org/llama.cpp/pull/4456)：支持某些NVIDIA和AMD GPU"
+#: ../../Qwen/source/run_locally/llama.cpp.md:144
+#: 15f5608b06594bb8a48bb02bad7f6f73
+msgid "[`sycl`](https://github.com/ggml-org/llama.cpp/discussions/5138): Intel GPUs, oneAPI runtime is included"
+msgstr "[`sycl`](https://github.com/ggml-org/llama.cpp/discussions/5138)：Intel GPU，包含oneAPI运行时"
+#: ../../Qwen/source/run_locally/llama.cpp.md:145
+#: e0a1a84d55c3401aa6d865d91578591c
+msgid "`cu<cuda_verison>`: NVIDIA GPUs, CUDA runtime is not included. You can download the `cudart-llama-bin-win-cu<cuda_version>-x64.zip` and unzip it to the same directory if you don't have the corresponding CUDA toolkit installed."
+msgstr "`cu<cuda_verison>`：NVIDIA GPU，未包含CUDA运行时。如果您没有安装相应的CUDA工具包，可以下载`cudart-llama-bin-win-cu<cuda_version>-x64.zip`并将其解压到同一目录中。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:147
+#: caffe142d6d34cd9977f32172a543d0f
+msgid "You don't have much choice for macOS or Linux."
+msgstr "对于macOS或Linux，您的选择不多。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:148
+#: b6638fc41ce1410eba66aca507ec3784
+msgid "Linux: only one prebuilt binary, `llama-<version>-bin-linux-x64.zip`, supporting CPU."
+msgstr "Linux：仅有一个预构建的二进制文件`llama-<version>-bin-linux-x64.zip`，支持CPU。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:149
+#: 43205e30e3e24d91a302ca9c72a8467c
+msgid "macOS: `llama-<version>-bin-macos-x64.zip` for Intel Mac with no GPU support; `llama-<version>-bin-macos-arm64.zip` for Apple Silicon with GPU support."
+msgstr "macOS：对于Intel Mac，使用`llama-<version>-bin-macos-x64.zip`（不支持GPU）；对于Apple Silicon，使用`llama-<version>-bin-macos-arm64.zip`（支持GPU）。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:151
+#: ee63fcc0d70744b885256719742d397e
+msgid "After downloading the `.zip` file, unzip them into a directory and open a terminal at that directory."
+msgstr "下载`.zip`文件后，将其解压到一个目录中，并在该目录下打开终端。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:156
+#: fb60a22681e6451b9cbf8b0d581f75b5
+msgid "Getting the GGUF"
+msgstr "获取 GGUF"
+#: ../../Qwen/source/run_locally/llama.cpp.md:158
+#: 21b68c30d5c349d3b19ba3aa68be1ee0
+msgid "GGUF[^GGUF] is a file format for storing information needed to run a model, including but not limited to model weights, model hyperparameters, default generation configuration, and tokenizer."
+msgstr "GGUF[^GGUF] 是一种文件格式，用于存储运行模型所需的信息，包括但不限于模型权重、模型超参数、默认生成配置和tokenzier。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:160
+#: 2f6f264d1f7544c1942dfa083cdc79d2
+msgid "You can use the official Qwen GGUFs from our HuggingFace Hub or prepare your own GGUF file."
+msgstr "您可以使用我们 HuggingFace Hub 上的官方 Qwen GGUF 文件，或者自己准备 GGUF 文件。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:162
+#: 5fa4bf13f78942128dce441ff6342dd5
+msgid "Using the Official Qwen3 GGUFs"
+msgstr "使用官方 Qwen3 GGUF"
+#: ../../Qwen/source/run_locally/llama.cpp.md:164
+#: 305b8e040240411cb25fa3d659b3c9ce
+msgid "We provide a series of GGUF models in our HuggingFace organization, and to search for what you need you can search the repo names with `-GGUF`."
+msgstr "在我们的 HuggingFace 组织中，我们提供了一系列 GGUF 模型。要查找您需要的模型，可以在仓库名称中搜索 `-GGUF`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:166
+#: 8e5a49035e5143628b55f2c1b8d5bc73
+msgid "Download the GGUF model that you want with `huggingface-cli` (you need to install it first with `pip install huggingface_hub`):"
+msgstr "使用 `huggingface-cli` 下载您想要的 GGUF 模型（首先需要通过 `pip install huggingface_hub` 进行安装）："
+#: ../../Qwen/source/run_locally/llama.cpp.md:171
+#: bf2d06f6b43140f3af26203bd18a5b4d
+msgid "For example:"
+msgstr "比如："
+#: ../../Qwen/source/run_locally/llama.cpp.md:176
+#: cd157baad53d4c8889b0f7aafa7f887b
+msgid "This will download the Qwen3-8B model in GGUF format quantized with the scheme Q4_K_M."
+msgstr "这将下载采用 Q4_K_M 方案量化的 GGUF 格式的 Qwen3-8B model 模型。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:178
+#: bd5c9e3ec9994ecba81c83e1f9077427
+msgid "Preparing Your Own GGUF"
+msgstr "准备您自己的 GGUF"
+#: ../../Qwen/source/run_locally/llama.cpp.md:180
+#: 6e7a23375e09420fb100575c510ad291
+msgid "Model files from HuggingFace Hub can be converted to GGUF, using the `convert-hf-to-gguf.py` Python script. It does require you to have a working Python environment with at least `transformers` installed."
+msgstr "可以使用 `convert-hf-to-gguf.py` Python 脚本将来自 HuggingFace Hub 的模型文件转换为 GGUF。这确实需要您拥有一个工作中的 Python 环境，并至少安装了 `transformers`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:183
+#: 9977b9a396c0417db6056d5c435e150d
+msgid "Obtain the source file if you haven't already:"
+msgstr "如果尚未获取，请先获取源文件："
+#: ../../Qwen/source/run_locally/llama.cpp.md:189
+#: 8170538bc1124019a5224fe42d298427
+msgid "Suppose you would like to use Qwen3-8B you can make a GGUF file for the fp16 model as shown below:"
+msgstr "假设您想使用 Qwen3-8B，可以按照以下方式为 fp16 模型制作 GGUF 文件："
+#: ../../Qwen/source/run_locally/llama.cpp.md:193
+#: 08b6b540b87d4ff09613f08ed615c1df
+msgid "The first argument to the script refers to the path to the HF model directory or the HF model name, and the second argument refers to the path of your output GGUF file. Remember to create the output directory before you run the command."
+msgstr "脚本的第一个参数指的是 HF 模型目录或 HF 模型名称的路径，第二个参数指的是输出 GGUF 文件的路径。在运行命令前，请记得创建输出目录。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:196
+#: 939cf0b39ee641cb8a27f9d11996b191
+msgid "The fp16 model could be a bit heavy for running locally, and you can quantize the model as needed. We introduce the method of creating and quantizing GGUF files in [this guide](../quantization/llama.cpp).  You can refer to that document for more information."
+msgstr "fp16 模型对于本地运行可能有些重，您可以根据需要对模型进行量化。我们在 [这份指南](../quantization/llama.cpp) 中介绍了创建和量化 GGUF 文件的方法。您可以参考该文档获取更多信息。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:201
+#: 33098e99c7a04ec98db9e802a6af49a0
+msgid "Run Qwen with llama.cpp"
+msgstr "使用 llama.cpp 运行 Qwen"
+#: ../../Qwen/source/run_locally/llama.cpp.md:204
+#: 754f5addb1cc406b8af2695a3dece765
+msgid "Regarding switching between thinking and non-thinking modes, while the soft switch is always available, the hard switch implemented in the chat template is not exposed in llama.cpp. The quick workaround is to pass a custom chat template equivalennt to always `enable_thinking=False` via `--chat-template-file`."
+msgstr "关于在思考模式和非思考模式之间切换，虽然软开关始终可用，但在聊天模板中实现的硬开关并未在 llama.cpp 中暴露。快速的解决方法是通过 `--chat-template-file` 传递一个等效于始终设置 `enable_thinking=False` 的自定义聊天模板。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:210
+#: 6a12aa67f5f348e9948e8f789e3abecb
+msgid "llama-cli"
+msgstr ""
+#: ../../Qwen/source/run_locally/llama.cpp.md:212
+#: d08abfb6cd4a432db6963e07c560f5bd
+msgid "[llama-cli](https://github.com/ggml-org/llama.cpp/tree/master/examples/main) is a console program which can be used to chat with LLMs. Simple run the following command where you place the llama.cpp programs:"
+msgstr "[llama-cli](https://github.com/ggml-org/llama.cpp/tree/master/examples/main) 是一个可用于与大型语言模型聊天的控制台程序。只需在放置 llama.cpp 程序的位置运行以下命令："
+#: ../../Qwen/source/run_locally/llama.cpp.md:218
+#: a746a1b907d74e4882acf7ca17b5a02c
+msgid "Here are some explanations to the above command:"
+msgstr "以下是对上述命令的一些解释："
+#: ../../Qwen/source/run_locally/llama.cpp.md:219
+#: 7af666e145aa4d48b44609d8d1835b71
+msgid "**Model**: llama-cli supports using model files from local path, remote url, or HuggingFace hub."
+msgstr "**模型**：llama-cli 支持从本地路径、远程 URL 或 HuggingFace Hub 使用模型文件。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:220
+#: 63c7c39cdf24490c91eda18659c94fb3
+msgid "`-hf Qwen/Qwen3-8B-GGUF:Q8_0` in the above indicates we are using the model file from HuggingFace hub"
+msgstr "上面的 `-hf Qwen/Qwen3-8B-GGUF:Q8_0` 表示我们使用的是来自 HuggingFace Hub 的模型文件。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:221
+#: 6e774ad7835c412c8280f5b22adccac5
+msgid "To use a local path, pass `-m qwen3-8b-q8_0.gguf` instead"
+msgstr "要使用本地路径，传递 `-m qwen3-8b-q8_0.gguf` 即可。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:222
+#: a3991a20f31044e2ae60c1a46e819ffc
+msgid "To use a remote url, pass `-mu https://hf.co/Qwen/Qwen3-8B-GGUF/resolve/main/qwen3-8b-Q8_0.gguf?download=true` instead"
+msgstr "要使用远程 URL，传递 `-mu https://hf.co/Qwen/Qwen3-8B-GGUF/resolve/main/qwen3-8b-Q8_0.gguf?download=true` 即可。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:224
+#: 53f71a56c76e40789bf5ba7e0c9eb2e6
+msgid "**Speed Optimization**:"
+msgstr "**速度优化**："
+#: ../../Qwen/source/run_locally/llama.cpp.md:225
+#: 079f880149d64ac694b6bebd306564bd
+msgid "CPU: llama-cli by default will use CPU and you can change `-t` to specify how many threads you would like it to use, e.g., `-t 8` means using 8 threads."
+msgstr "CPU：llama-cli 默认会使用 CPU，您可以通过更改 `-t` 来指定希望使用的线程数，例如 `-t 8` 表示使用 8 个线程。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:226
+#: 6e10dabde41c404f93bb1148057fee75
+msgid "GPU: If the programs are bulit with GPU support, you can use `-ngl`, which allows offloading some layers to the GPU for computation. If there are multiple GPUs, it will offload to all the GPUs. You can use `-dev` to control the devices used and `-sm` to control which kinds of parallelism is used. For example, `-ngl 99 -dev cuda0,cuda1 -sm row` means offload all layers to GPU 0 and GPU1 using the split mode row.  Adding `-fa` may also speed up the generation."
+msgstr "GPU：如果程序包含 GPU 支持，您可以使用 `-ngl`，它允许将一些层卸载到 GPU 进行计算。如果有多个 GPU，它会卸载到所有 GPU 上。您可以使用 `-dev` 控制使用的设备，并使用 `-sm` 控制使用的并行类型。例如，`-ngl 99 -dev cuda0,cuda1 -sm row` 表示使用 row 切分将所有层卸载到 GPU 0 和 GPU 1。添加 `-fa` 也可能加速生成。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:232
+#: ab04fbce840f4307a1eac8d31602c09a
+msgid "**Sampling Parameters**: llama.cpp supports [a variety of sampling methods](https://github.com/ggml-org/llama.cpp/tree/master/examples/main#generation-flags) and has default configuration for many of them. It is recommended to adjust those parameters according to the actual case and the recommended parameters from Qwen3 modelcard could be used as a reference. If you encounter repetition and endless generation, it is recommended to pass in addition `--presence-penalty` up to `2.0`."
+msgstr "**采样参数**：llama.cpp 支持[多种采样方法](https://github.com/ggml-org/llama.cpp/tree/master/examples/main#generation-flags)，并对其中许多方法有默认配置。建议根据实际情况调整这些参数，Qwen3 模型卡片中推荐的参数可作为参考。如果您遇到重复和无尽生成的情况，建议额外传递 `--presence-penalty`，最大值为 `2.0`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:236
+#: 877d32d7b0314d08b7503762b3a6ae5b
+msgid "**Context Management**: llama.cpp adopts the \"rotating\" context management by default. The `-c` controls the maximum context length (default 4096, 0 means loaded from model), and `-n` controls the maximum generation length each time (default -1 means infinite until ending, -2 means until context full). When the context is full but the generation doesn't end, the first `--keep` tokens (default 0, -1 means all) from the initial prompt is kept, and the first half of the rest is discarded. Then, the model continues to generate based on the new context tokens. You can set `--no-context-shift` to prevent this rotating behaviour and the generation will stop once `-c` is reached."
+msgstr "**上下文管理**：llama.cpp 默认采用“轮换”上下文管理方式。`-c` 控制最大上下文长度（默认值 4096，0 表示从模型加载），`-n` 控制每次生成的最大长度（默认值 -1 表示无限生成直到结束，-2 表示直到上下文满）。当上下文已满但生成未结束时，初始提示中的前 `--keep` 个 token（默认值 0，-1 表示全部）会被保留，其余部分的前半部分会被丢弃。然后，模型基于新的上下文 token 继续生成。您可以设置 `--no-context-shift` 来防止这种轮换行为，一旦达到 `-c`，生成就会停止。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:242
+#: 045740d5ab9647db91ba072b92f20b24
+msgid "llama.cpp supports YaRN, which can be enabled by `-c 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768`."
+msgstr "llama.cpp 支持 YaRN，可以通过 `-c 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768` 启用。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:243
+#: 7ea67acb74ce4152b7924d3bb4d8a9b1
+msgid "**Chat**: `--jinja` indicates using the chat template embedded in the GGUF which is prefered and `--color` indicates coloring the texts so that user input and model output can be better differentiated. If there is a chat template, like in Qwen3 models, llama-cli will enter chat mode automatically. To stop generation or exit press \"Ctrl+C\". You can use `-sys` to add a system prompt."
+msgstr "**聊天**：`--jinja` 表示使用嵌入在 GGUF 中的聊天模板（推荐），`--color` 表示对文本进行着色，以便更好地区分用户输入和模型输出。如果有聊天模板（如 Qwen3 模型中），llama-cli 将自动进入聊天模式。要停止生成或退出，请按 \"Ctrl+C\"。您可以使用 `-sys` 添加系统提示。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:249
+#: 88cb9458091b412d88a892764fa67bdb
+msgid "llama-server"
+msgstr ""
+#: ../../Qwen/source/run_locally/llama.cpp.md:251
+#: 2873c333020b4fd591988547419c0234
+msgid "[llama-server](https://github.com/ggml-org/llama.cpp/tree/master/examples/server) is a simple HTTP server, including a set of LLM REST APIs and a simple web front end to interact with LLMs using llama.cpp."
+msgstr "[llama-server](https://github.com/ggml-org/llama.cpp/tree/master/examples/server) 是一个简单的 HTTP 服务器，包含一组 LLM REST API 和一个简单的 Web 前端，用于通过 llama.cpp 与大型语言模型交互。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:253
+#: c8c19cde1d2a4d9894b7438e00dbb7b5
+msgid "The core command is similar to that of llama-cli. In addition, it supports thinking content parsing and tool call parsing."
+msgstr "其核心命令与 llama-cli 类似。此外，它还支持思考内容解析和工具调用解析。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:260
+#: 24590adb336a48a49938a50f3071714c
+msgid "By default the server will listen at `http://localhost:8080` which can be changed by passing `--host` and `--port`. The web front end can be assess from a browser at `http://localhost:8080/`. The OpenAI compatible API is at `http://localhost:8080/v1/`."
+msgstr "默认情况下，服务器将在 `http://localhost:8080` 监听，可以通过传递 `--host` 和 `--port` 更改。Web 前端可以通过浏览器访问 `http://localhost:8080/`。兼容 OpenAI 的 API 位于 `http://localhost:8080/v1/`。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:265
+#: d84b5c974b724852900b5c2ed8cb8bd8
+msgid "What's More"
+msgstr "还有更多"
+#: ../../Qwen/source/run_locally/llama.cpp.md:267
+#: d6b97b2133f24613bc91ea09bd1bdda1
+msgid "If you still find it difficult to use llama.cpp, don't worry, just check out other llama.cpp-based applications. For example, Qwen3 has already been officially part of Ollama and LM Studio, which are platforms for your to search and run local LLMs."
+msgstr "如果你仍然觉得使用`llama-cli`有困难，别担心，可以尝试其他基于llama.cpp的应用程序。例如，Qwen3已经成为Ollama和LM Studio的官方组成部分，它们是用于搜索和运行本地LLM的平台。"
+#: ../../Qwen/source/run_locally/llama.cpp.md:270
+#: 1b9178ea95a64a6badee5291e042b1f8
+msgid "Have fun!"
+msgstr "玩得开心！"
+#: ../../Qwen/source/run_locally/llama.cpp.md:3
+#: 1b3eec2d725f4dbfa06f9f0d88e053dd
+msgid "GPT-Generated Unified Format"
+msgstr ""
+#~ msgid "Previously, Qwen2 models generate nonsense like `GGGG...` with `llama.cpp` on GPUs. The workaround is to enable flash attention (`-fa`), which uses a different implementation, and offload the whole model to the GPU (`-ngl 80`) due to broken partial GPU offloading with flash attention."
+#~ msgstr "曾有一段时间，在 GPU 上用 `llama.cpp` 运行 Qwen2 模型会生成类似 `GGGG...` 的胡言乱语。一个权宜之计是开启 flash attention (`-fa`) 并将全模型加载到 GPU 上 (`-ngl 80`) 。前者使用不同的算法实现，后者避免触发 flash attention 在模型一部分 GPU 加载时的异常。"
+#~ msgid "Both should be no longer necessary after `b3370`, but it is still recommended enabling both for maximum efficiency."
+#~ msgstr "自版本 `b3370` 起，以上方案已非必需。但考虑最佳效率，仍建议使用两项参数。"
+#~ msgid "![llama-cli conversation start](../assets/imgs/llama-cli-cnv-start.png)"
+#~ msgstr ""
+#~ msgid "llama-cli conversation start"
+#~ msgstr "llama-cli 对话开始"
+#~ msgid "![llama-cli conversation chat](../assets/imgs/llama-cli-cnv-chat.png)"
+#~ msgstr ""
+#~ msgid "llama-cli conversation chat"
+#~ msgstr "llama-cli 对话聊天"
+#~ msgid "![llama-cli interactive first](../assets/imgs/llama-cli-if.png)"
+#~ msgstr ""
+#~ msgid "llama-cli interactive first"
+#~ msgstr "llama-cli 互动模式用户优先"
+#~ msgid "![llama-cli interactive](../assets/imgs/llama-cli-i.png)"
+#~ msgstr ""
+#~ msgid "llama-cli interactive"
+#~ msgstr "llama-cli 互动模式"
+#~ msgid "The main output is as follows: ![llama-cli](../assets/imgs/llama-cli.png)"
+#~ msgstr "主要输出如下所示： ![llama-cli](../assets/imgs/llama-cli.png)"
+#~ msgid "llama-cli"
+#~ msgstr ""
+#~ msgid "![llama-cli mid](../assets/imgs/llama-cli-mid.png)"
+#~ msgstr ""
+#~ msgid "llama-cli mid"
+#~ msgstr "llama-cli 中间"
+#~ msgid "Get the `llama-cli` program"
+#~ msgstr "获取 `llama-cli` 程序"
+#~ msgid "Remember that `llama-cli` is an example program, not a full-blown application. Sometimes it just does not work in the way you would like. This guide could also get quite technical sometimes. If you would like a smooth experience, check out the application mentioned above, which are much easier to \"use\"."
+#~ msgstr "请记住，`llama-cli` 只是一个示例程序，并非完整应用。有时候它可能无法完全按照您的期望运行。本指南有时会涉及一些技术细节。如果您希望获得流畅的体验，请尝试上述提到的应用，它们使用起来会更加便捷。"
+#~ msgid "Then use `make`:"
+#~ msgstr "然后运行 `make` 命令："
+#~ msgid "The command will only compile the parts needed for `llama-cli`. On macOS, it will enable Metal and Accelerate by default, so you can run with GPUs. On Linux, you won't get GPU support by default, but SIMD-optimization is enabled if available."
+#~ msgstr "该命令只会编译`llama-cli`所需的部件。在macOS上，默认情况下会启用Metal和Accelerate，因此你可以使用GPU运行。在Linux上，默认情况下你无法获得GPU支持，但如果可用，会启用CPU SIMD优化。"
+#~ msgid "There are other [example programs](https://github.com/ggerganov/llama.cpp/tree/master/examples) in llama.cpp. You can build them at once with simply (it may take some time):"
+#~ msgstr "在llama.cpp中还有其他的[示例程序](https://github.com/ggerganov/llama.cpp/tree/master/examples)，你可以一次构建它们（可能需要一些时间）："
+#~ msgid "or you can also compile only the one you need, for example:"
+#~ msgstr "你也可以只编译你需要的，例如："
+#~ msgid "Running the Model"
+#~ msgstr "运行模型"
+#~ msgid "Due to random sampling and source code updates, the generated content with the same command as given in this section may be different from what is shown in the examples."
+#~ msgstr "由于随机采样和源代码更新，使用本节中给出的相同命令生成的内容可能与示例中显示的不同。"
+#~ msgid "`llama-cli` provide multiple \"mode\" to \"interact\" with the model. Here, we demonstrate three ways to run the model, with increasing difficulty."
+#~ msgstr "`llama-cli` 提供多种“模式”来与模型进行“交互”。在这里，我们展示三种运行模型的方法，使用难度逐渐增加。"
+#~ msgid "Conversation Mode"
+#~ msgstr "对话模式"
+#~ msgid "For users, to achieve chatbot-like experience, it is recommended to commence in the conversation mode"
+#~ msgstr "对于普通用户来说，为了获得类似聊天机器人的体验，建议从对话模式开始。"
+#~ msgid "The program will first print metadata to the screen until you see the following:"
+#~ msgstr "程序首先会在屏幕上打印元数据，直到你看到以下内容："
+#~ msgid "Now, the model is waiting for your input, and you can chat with the model:"
+#~ msgstr "现在，模型正在等待你的输入，你可以与模型进行对话："
+#~ msgid "That's something, isn't it? You can stop the model generation anytime by Ctrl+C or Command+.  However, if the model generation is ended and the control is returned to you, pressing the combination will exit the program."
+#~ msgstr "这很有趣，对吧？你可以随时通过 Ctrl+C 或 Command+. 来停止模型生成。但是，如果模型生成结束并且控制权返回给你，按下组合键将会退出程序。"
+#~ msgid "So what does the command we used actually do?  Let's explain a little:"
+#~ msgstr "那么，我们使用的命令实际上做了什么呢？让我们来解释一下："
+#~ msgid "-m or --model"
+#~ msgstr "-m 或 --model"
+#~ msgid "Model path, obviously."
+#~ msgstr "显然，这是模型路径。"
+#~ msgid "-co or --color"
+#~ msgstr "-co 或 --color"
+#~ msgid "Colorize output to distinguish prompt and user input from generations. Prompt text is dark yellow; user text is green; generated text is white; error text is red."
+#~ msgstr "为输出着色以区分提示词、用户输入和生成的文本。提示文本为深黄色；用户文本为绿色；生成的文本为白色；错误文本为红色。"
+#~ msgid "-cnv or --conversation"
+#~ msgstr "-cnv 或 --conversation"
+#~ msgid "Run in conversation mode. The program will apply the chat template accordingly."
+#~ msgstr "在对话模式下运行。程序将相应地应用聊天模板。"
+#~ msgid "-p or --prompt"
+#~ msgstr "-p 或 --prompt"
+#~ msgid "In conversation mode, it acts as the system message."
+#~ msgstr "在对话模式下，它作为系统提示。"
+#~ msgid "-fa or --flash-attn"
+#~ msgstr "-fa 或 --flash-attn"
+#~ msgid "Enable Flash Attention if the program is compiled with GPU support."
+#~ msgstr "如果程序编译时支持 GPU，则启用Flash Attention注意力实现。"
+#~ msgid "-ngl or --n-gpu-layers"
+#~ msgstr "-ngl 或 --n-gpu-layers"
+#~ msgid "Layers to the GPU for computation if the program is compiled with GPU support."
+#~ msgstr "如果程序编译时支持 GPU，则将这么多层分配给 GPU 进行计算。"
+#~ msgid "-n or --predict"
+#~ msgstr "-n 或 --predict"
+#~ msgid "Number of tokens to predict."
+#~ msgstr "要预测的token数量。"
+#~ msgid "You can also explore other options by"
+#~ msgstr "你也可以通过以下方式探索其他选项："
+#~ msgid "Interactive Mode"
+#~ msgstr "互动模式"
+#~ msgid "The conversation mode hides the inner workings of LLMs. With interactive mode, you are made aware how LLMs work in the way to completion or continuation. The workflow is like"
+#~ msgstr "对话模式隐藏了大型语言模型（LLMs）的内部机制。在互动模式下，你可以直观地了解LLMs如何完成或继续生成文本。工作流程如下"
+#~ msgid "Give the model an initial prompt, and the model generates a completion."
+#~ msgstr "给模型一个初始提示，模型会生成续写文本。"
+#~ msgid "Interrupt the model generation any time or wait until the model generates a reverse prompt or an eos token."
+#~ msgstr "随时中断模型生成，或者等到模型生成反向提示(reverse prompt)或结束token（eos token）。"
+#~ msgid "Append new texts (with optional prefix and suffix), and then let the model continues the generation."
+#~ msgstr "添加新文本（可选前缀和后缀），然后让模型继续生成。"
+#~ msgid "Repeat Step 2. and Step 3."
+#~ msgstr "重复步骤2和步骤3。"
+#~ msgid "This workflow requires a different set of options, since you have to mind the chat template yourselves. To proper run the Qwen2.5 models, try the following:"
+#~ msgstr "此工作流程需要一组不同的选项，因为你必须自己管理聊天模板。为了正确运行Qwen2.5模型，请尝试以下操作："
+#~ msgid "We use some new options here:"
+#~ msgstr "我们在这里使用了一些新的选项："
+#~ msgid "-sp or --special"
+#~ msgstr "-sp 或 --special"
+#~ msgid "Show the special tokens."
+#~ msgstr "显示特殊token。"
+#~ msgid "-i or --interactive"
+#~ msgstr "-i 或 --interactive"
+#~ msgid "Enter interactive mode. You can interrupt model generation and append new texts."
+#~ msgstr "进入互动模式。你可以中断模型生成并添加新文本。"
+#~ msgid "-if or --interactive-first"
+#~ msgstr "-if 或 --interactive-first"
+#~ msgid "Immediately wait for user input. Otherwise, the model will run at once and generate based on the prompt."
+#~ msgstr "立即等待用户输入。否则，模型将立即运行并根据提示生成文本。"
+#~ msgid "In interactive mode, it is the contexts based on which the model predicts the continuation."
+#~ msgstr "在互动模式下，这是模型续写用的上文。"
+#~ msgid "--in-prefix"
+#~ msgstr ""
+#~ msgid "String to prefix user inputs with."
+#~ msgstr "用户输入附加的前缀字符串。"
+#~ msgid "--in-suffix"
+#~ msgstr ""
+#~ msgid "String to suffix after user inputs with."
+#~ msgstr "用户输入附加的后缀字符串。"
+#~ msgid "The result is like this:"
+#~ msgstr "结果如下："
+#~ msgid "We use `prompt`, `in-prefix`, and `in-suffix` together to implement the chat template (ChatML-like) used by Qwen2.5 with a system message. So the experience is very similar to the conversation mode: you just need to type in the things you want to ask the model and don't need to worry about the chat template once the program starts. Note that, there should not be a new line after user input according to the template, so remember to end your input with `/`."
+#~ msgstr "我们将 `prompt`、`in-prefix` 和 `in-suffix` 结合起来实现Qwen2.5使用的包含系统消息的聊天模板（类似ChatML）。这样的，体验与对话模式非常相似：你只需输入想要询问模型的内容，在程序启动后无需担心聊天模板。请注意，根据模板，用户输入后不应有换行符，所以请以 `/` 结束输入。"
+#~ msgid "Advanced Usage"
+#~ msgstr "高级用法"
+#~ msgid "Interactive mode can achieve a lot more flexible workflows, under the condition that the chat template is maintained properly throughout. The following is an example:"
+#~ msgstr "互动模式可以实现更灵活的工作流程，前提是整个过程中正确维护聊天模板。以下是一个示例："
+#~ msgid "In the above example, I set `--reverse-prompt` to `\"LLM\"` so that the generation is interrupted whenever the model generates `\"LLM\"`[^rp].  The in prefix and in suffix are also set to empty so that I can add content exactly I want. After every generation of `\"LLM\"`, I added the part `\"...not what you think...\"` which are not likely to be generated by the model. Yet the model can continue generation just as fluent, although the logic is broken the second time around. I think it's fun to play around."
+#~ msgstr "在上面的例子中，我将 `--reverse-prompt` 设置为 `\"LLM\"`，以便每当模型生成 `\"LLM\"` 时中断生成过程[^rp]。前缀和后缀也被设置为空，这样我可以精确地添加想要的内容。每次生成 `\"LLM\"` 后，我添加了 `\"...not what you think...\"` 的部分，这部分不太可能由模型生成。然而，模型仍能继续流畅生成，尽管第二次逻辑被破坏。这很有趣，值得探索。"
+#~ msgid "Non-interactive Mode"
+#~ msgstr "非交互模式"
+#~ msgid "You can also use `llama-cli` for text completion by using just the prompt. However, it also means you have to format the input properly and only one turn can be generated."
+#~ msgstr "你还可以仅使用提示词，通过`llama-cli`完成文本续写。但这也意味着你需要正确格式化输入，并且只能生成一次回应。"
+#~ msgid "The following is an example:"
+#~ msgstr "以下是一个示例："
+#~ msgid "The main output is as follows:"
+#~ msgstr "主要步骤如下："
+#~ msgid "In fact, you can start completion anywhere you want, even in the middle of an assistant message:"
+#~ msgstr "实际上，你可以从任何你想要的地方开始续写，即使是在assistant消息的中间："
+#~ msgid "Now you can use `llama-cli` in three very different ways! Try talk to Qwen2.5 and share your experience with the community!"
+#~ msgstr "现在你可以用三种截然不同的方式使用`llama-cli`了！试试和Qwen2.5对话，然后与社区分享你的体验吧！"
+#~ msgid "There are some gotchas in using `--reverse-prompt` as it matches tokens instead of strings. Since the same string can be tokenized differently in different contexts in BPE tokenization, some reverse prompts are never matched even though the string does exist in generation."
+#~ msgstr "`--reverse-prompt`在匹配时针对的是token而非字符串，因此使用时有一些需要注意的地方。由于BPE tokenizer在不同上下文中对相同字符串的tokenization结果可能不同，所以某些反向提示符即使在生成的文本中存在，也可能永远无法匹配成功。"
--- a/docs/locales/zh_CN/LC_MESSAGES/run_locally/mlx-lm.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/run_locally/mlx-lm.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/run_locally/mlx-lm.md:1 3b84b663657743c58c83a421e1302ac4
+msgid "MLX-LM"
+msgstr ""
+#: ../../Qwen/source/run_locally/mlx-lm.md:4 49dc1a423dc3489abb616eeb839d53c6
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/run_locally/mlx-lm.md:7 e85ad62d5b504e0abcf1060c1fd7f2da
+msgid "[mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms) helps you run LLMs locally on Apple Silicon.  It is available at MacOS.  It has already supported Qwen models and this time, we have also provided checkpoints that you can directly use with it."
+msgstr "[mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms)能让你在Apple Silicon上运行大型语言模型，适用于MacOS。mlx-lm已支持Qwen模型，此次我们提供直接可用的模型文件。"
+#: ../../Qwen/source/run_locally/mlx-lm.md:11 44c83220973e4571a64f9d7079493d2f
+msgid "Prerequisites"
+msgstr "准备工作"
+#: ../../Qwen/source/run_locally/mlx-lm.md:13 058158dccfaa435186d7833a913db643
+msgid "The easiest way to get started is to install the `mlx-lm` package:"
+msgstr "首先需要安装`mlx-lm`包："
+#: ../../Qwen/source/run_locally/mlx-lm.md:15 3ae5d0d79fb64cb29c9f13c634fb5d67
+msgid "with `pip`:"
+msgstr "使用`pip`："
+#: ../../Qwen/source/run_locally/mlx-lm.md:21 2d6dd86369c6430392ed6c5e2891397d
+msgid "with `conda`:"
+msgstr "使用`conda`："
+#: ../../Qwen/source/run_locally/mlx-lm.md:27 c11938d4f28344d1b7c59374fd51b458
+#, fuzzy
+msgid "Running with Qwen MLX Files"
+msgstr "使用Qwen MLX模型文件"
+#: ../../Qwen/source/run_locally/mlx-lm.md:29 89cf4fcf7a5c4a63a21e5a8e0caf9496
+msgid "We provide model checkpoints with `mlx-lm` in our Hugging Face organization, and to search for what you need you can search the repo names with `-MLX`."
+msgstr "我们已在Hugging Face提供了适用于`mlx-lm`的模型文件，请搜索带`-MLX`的存储库。"
+#: ../../Qwen/source/run_locally/mlx-lm.md:31 2b0d57b86ebc4d6da87a23f430def570
+msgid "Here provides a code snippet with `apply_chat_template` to show you how to load the tokenizer and model and how to generate contents."
+msgstr "这里我们展示了一个代码样例，其中使用了`apply_chat_template`来应用对话模板。"
+#: ../../Qwen/source/run_locally/mlx-lm.md:52 ee8f71515ca34ebcb9c28b1f2e55b688
+msgid "Make Your MLX files"
+msgstr "自行制作MLX格式模型"
+#: ../../Qwen/source/run_locally/mlx-lm.md:54 fc9029906db54f1593081b7e9aacbaf8
+msgid "You can make mlx files with just one command:"
+msgstr "仅用一条命令即可制作mlx格式模型"
+#: ../../Qwen/source/run_locally/mlx-lm.md:60 d82e773a63ff4a2db2af161af7aeb1b7
+msgid "where"
+msgstr "参数含义分别是"
+#: ../../Qwen/source/run_locally/mlx-lm.md:62 bcb1f835e45e4b54938c34d390846cf2
+msgid "`--hf-path`: the model name on Hugging Face Hub or the local path"
+msgstr "`--hf-path`: Hugging Face Hub上的模型名或本地路径"
+#: ../../Qwen/source/run_locally/mlx-lm.md:63 c69929c8ea734b78bcb95872044c6a2a
+msgid "`--mlx-path`: the path for output files"
+msgstr "`--mlx-path`: 输出模型文件的存储路径"
+#: ../../Qwen/source/run_locally/mlx-lm.md:64 d44a2b228f074e378094497d5bbd543f
+msgid "`-q`: enable quantization"
+msgstr "`-q`: 启用量化"
--- a/docs/locales/zh_CN/LC_MESSAGES/run_locally/ollama.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/run_locally/ollama.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/run_locally/ollama.md:1 b115922199224f47bb532b26bf224620
+msgid "Ollama"
+msgstr "Ollama"
+#: ../../Qwen/source/run_locally/ollama.md:4 d0b6051e7f0744dc894cdaa685a03e14
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/run_locally/ollama.md:7 8e9346ee9e40484ca07e522a28c10a0f
+msgid "[Ollama](https://ollama.com/) helps you run LLMs locally with only a few commands. It is available at MacOS, Linux, and Windows. Now, Qwen2.5 is officially on Ollama, and you can run it with one command:"
+msgstr "[Ollama](https://ollama.com/)帮助您通过少量命令即可在本地运行LLM。它适用于MacOS、Linux和Windows操作系统。现在，Qwen2.5正式上线Ollama，您只需一条命令即可运行它："
+#: ../../Qwen/source/run_locally/ollama.md:15 d320e9f92d7c4e91a6dc80e999b28823
+msgid "Next, we introduce more detailed usages of Ollama for running Qwen2.5 models."
+msgstr "接着，我们介绍在Ollama使用Qwen2.5模型的更多用法"
+#: ../../Qwen/source/run_locally/ollama.md:17 51b225372f6c4b94af27930e3aaefd7e
+msgid "Quickstart"
+msgstr "快速开始"
+#: ../../Qwen/source/run_locally/ollama.md:19 12459a52b35b401085439f7bb53f911d
+msgid "Visit the official website [Ollama](https://ollama.com/) and click download to install Ollama on your device. You can also search models on the website, where you can find the Qwen2.5 models. Except for the default one, you can choose to run Qwen2.5-Instruct models of different sizes by:"
+msgstr "访问官方网站[Ollama](https://ollama.com/)，点击`Download`以在您的设备上安装Ollama。您还可以在网站上搜索模型，在这里您可以找到Qwen2.5系列模型。除了默认模型之外，您可以通过以下方式选择运行不同大小的Qwen2.5-Instruct模型："
+#: ../../Qwen/source/run_locally/ollama.md:23 0367670c6d70484b9be2532d1c29e185
+msgid "`ollama run qwen2.5:0.5b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:24 ab112db6e61547109ce8ea0c4c012471
+msgid "`ollama run qwen2.5:1.5b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:25 183f671bddee433faf2a4a329dcb55eb
+msgid "`ollama run qwen2.5:3b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:26 36a8777ecbeb48769efec7ece108735d
+msgid "`ollama run qwen2.5:7b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:27 f4651bc149b94406b85096f4783c5e53
+msgid "`ollama run qwen2.5:14b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:28 6c9850d60512459988a6b73a6b1e01fc
+msgid "`ollama run qwen2.5:32b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:29 a96e5ef6669c485399d85ef441f6306c
+msgid "`ollama run qwen2.5:72b`"
+msgstr ""
+#: ../../Qwen/source/run_locally/ollama.md:32 3e61ed7de75245559bbca559ded82630
+msgid "`ollama` does not host base models. Even though the tag may not have the instruct suffix, they are all instruct models."
+msgstr "`ollama`并不托管基模型。即便模型标签不带instruct后缀，实际也是instruct模型。"
+#: ../../Qwen/source/run_locally/ollama.md:36 85b7620bbb1e4d38833b5a231e9f12c2
+msgid "Run Ollama with Your GGUF Files"
+msgstr "用Ollama运行你自己的GGUF文件"
+#: ../../Qwen/source/run_locally/ollama.md:38 29df4213bffb4bc7a970d7ce86ad275f
+msgid "Sometimes you don't want to pull models and you just want to use Ollama with your own GGUF files. Suppose you have a GGUF file of Qwen2.5, `qwen2.5-7b-instruct-q5_0.gguf`. For the first step, you need to create a file called `Modelfile`. The content of the file is shown below:"
+msgstr "有时您可能不想拉取模型，而是希望直接使用自己的GGUF文件来配合Ollama。假设您有一个名为`qwen2.5-7b-instruct-q5_0.gguf`的Qwen2.5的GGUF文件。在第一步中，您需要创建一个名为`Modelfile`的文件。该文件的内容如下所示："
+#: ../../Qwen/source/run_locally/ollama.md:101 19c9382f6bfc4aeda355f5745988fadb
+msgid "Then create the ollama model by running:"
+msgstr "然后通过运行下列命令来创建一个ollama模型"
+#: ../../Qwen/source/run_locally/ollama.md:107 4de4ae26fa644c83848aa97598160a02
+msgid "Once it is finished, you can run your ollama model by:"
+msgstr "完成后，你即可运行你的ollama模型："
+#: ../../Qwen/source/run_locally/ollama.md:113 ce86e8ed78164b369eec2548b6260b49
+msgid "Tool Use"
+msgstr "工具调用"
+#: ../../Qwen/source/run_locally/ollama.md:115 09644168e091461abc60e06d579f80b0
+msgid "Tool use is now support Ollama and you should be able to run Qwen2.5 models with it. For more details, see our [function calling guide](../framework/function_call)."
+msgstr "Ollama现已支持工具调用，Qwen2.5也已适配。更多详情，请参阅我们的[函数调用指南](../framework/function_call)"
--- a/docs/locales/zh_CN/LC_MESSAGES/training/llama_factory.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/training/llama_factory.po
+# Copyright (C) 2024, Qwen Team, Alibaba Group.
+# This file is distributed under the same license as the Qwen package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 19:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../Qwen/source/training/llama_factory.rst:2
+#: 7a9018d9e7ee41858ac5c59723365a63
+msgid "LLaMA-Factory"
+msgstr ""
+#: ../../Qwen/source/training/llama_factory.rst:5
+#: 6e90d8f392914d029783ed85b510063f
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../Qwen/source/training/llama_factory.rst:7
+#: e82fbe9827774824a4259372afda3240
+msgid "Here we provide a script for supervised finetuning Qwen2.5 with `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__. This script for supervised finetuning (SFT) has the following features:"
+msgstr "我们将介绍如何使用 `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__ 微调 Qwen2.5 模型。本脚本包含如下特点："
+#: ../../Qwen/source/training/llama_factory.rst:11
+#: 7d37d7835f514ce68b9e4e3054919d3c
+msgid "Support single-GPU and multi-GPU training;"
+msgstr "支持单卡和多卡分布式训练"
+#: ../../Qwen/source/training/llama_factory.rst:13
+#: 232bdd05e26846989ae770a8da52ccc3
+msgid "Support full-parameter tuning, LoRA, Q-LoRA, Dora."
+msgstr "支持全参数微调、LoRA、Q-LoRA 和 DoRA 。"
+#: ../../Qwen/source/training/llama_factory.rst:15
+#: 0cf21b5d01024a0999a290c5fa0f4e9e
+msgid "In the following, we introduce more details about the usage of the script."
+msgstr "下文将介绍更多关于脚本的用法。"
+#: ../../Qwen/source/training/llama_factory.rst:19
+#: aa67b2029a4449a8838c80545256d4c0
+msgid "Installation"
+msgstr "安装"
+#: ../../Qwen/source/training/llama_factory.rst:21
+#: 2b4a2d1e20c342948e987ce6abad71d0
+msgid "Before you start, make sure you have installed the following packages:"
+msgstr "开始之前，确保你已经安装了以下代码库："
+#: ../../Qwen/source/training/llama_factory.rst:23
+#: 488203d62c3143f09325bbe587ef3f7a
+msgid "Follow the instructions of `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__, and build the environment."
+msgstr "根据 `LLaMA-Factory <https://github.com/hiyouga/LLaMA-Factory>`__ 官方指引构建好你的环境"
+#: ../../Qwen/source/training/llama_factory.rst:26
+#: 98fc755a8555428fbcf01b547bcc270f
+msgid "Install these packages (Optional):"
+msgstr "安装下列代码库（可选）："
+#: ../../Qwen/source/training/llama_factory.rst:33
+#: b4d6aa9134de4f35800126a4b71e7a72
+msgid "If you want to use `FlashAttention-2 <https://github.com/Dao-AILab/flash-attention>`__, make sure your CUDA is 11.6 and above."
+msgstr "如你使用 `FlashAttention-2 <https://github.com/Dao-AILab/flash-attention>`__  ，请确保你的CUDA版本在11.6以上。"
+#: ../../Qwen/source/training/llama_factory.rst:38
+#: 5859e0c6dbd24040b05778b6fdea052e
+msgid "Data Preparation"
+msgstr "准备数据"
+#: ../../Qwen/source/training/llama_factory.rst:40
+#: 6ffed6a5040d48238e5d10b3f984a73a
+msgid "LLaMA-Factory provides several training datasets in ``data`` folder, you can use it directly. If you are using a custom dataset, please prepare your dataset as follows."
+msgstr "LLaMA-Factory 在 ``data`` 文件夹中提供了多个训练数据集，您可以直接使用它们。如果您打算使用自定义数据集，请按照以下方式准备您的数据集。"
+#: ../../Qwen/source/training/llama_factory.rst:44
+#: b6d0fcedc5fc40e291255c528bc988fb
+msgid "Organize your data in a **json** file and put your data in ``data`` folder. LLaMA-Factory supports dataset in ``alpaca`` or ``sharegpt`` format."
+msgstr "请将您的数据以 ``json`` 格式进行组织，并将数据放入 data 文件夹中。LLaMA-Factory 支持以 ``alpaca`` 或 ``sharegpt`` 格式的数据集。"
+#: ../../Qwen/source/training/llama_factory.rst:48
+#: 2f73d4edea9044a8bd42c9e4e25e992c
+msgid "The dataset in ``alpaca`` format should follow the below format:"
+msgstr "``alpaca`` 格式的数据集应遵循以下格式："
+#: ../../Qwen/source/training/llama_factory.rst:65
+#: 0669bfde81294b459125997c0a6e8257
+msgid "The dataset in ``sharegpt`` format should follow the below format:"
+msgstr "``sharegpt`` 格式的数据集应遵循以下格式："
+#: ../../Qwen/source/training/llama_factory.rst:86
+#: f1749224279f40bb8b6a3adf517af147
+msgid "Provide your dataset definition in ``data/dataset_info.json`` in the following format ."
+msgstr "在 ``data/dataset_info.json`` 文件中提供您的数据集定义，并采用以下格式："
+#: ../../Qwen/source/training/llama_factory.rst:89
+#: a7f285a82bbd495ab10b76fb5a2be6fb
+msgid "For ``alpaca`` format dataset, the columns in ``dataset_info.json`` should be:"
+msgstr "对于 ``alpaca`` 格式的数据集，其 ``dataset_info.json`` 文件中的列应为："
+#: ../../Qwen/source/training/llama_factory.rst:105
+#: f90f0ba55b93436c9a096afa85489698
+msgid "For ``sharegpt`` format dataset, the columns in ``dataset_info.json`` should be:"
+msgstr "对于 ``sharegpt`` 格式的数据集，``dataset_info.json`` 文件中的列应该包括："
+#: ../../Qwen/source/training/llama_factory.rst:127
+#: f91215519e61450c9c4c245beb4d26d6
+msgid "Training"
+msgstr "训练"
+#: ../../Qwen/source/training/llama_factory.rst:129
+#: 1624352503e24ceb927d2dba808df7ae
+msgid "Execute the following training command:"
+msgstr "执行下列命令："
+#: ../../Qwen/source/training/llama_factory.rst:169
+#: 24676444d9cd42069f3bd760d3c5b0cd
+msgid "and enjoy the training process. To make changes to your training, you can modify the arguments in the training command to adjust the hyperparameters. One argument to note is ``cutoff_len``, which is the maximum length of the training data. Control this parameter to avoid OOM error."
+msgstr "并享受训练过程。若要调整您的训练，您可以通过修改训练命令中的参数来调整超参数。其中一个需要注意的参数是 ``cutoff_len`` ，它代表训练数据的最大长度。通过控制这个参数，可以避免出现OOM（内存溢出）错误。"
+#: ../../Qwen/source/training/llama_factory.rst:176
+#: bd1b02c65c5e4216bde43f8b1dd60ca6
+msgid "Merge LoRA"
+msgstr "合并LoRA"
+#: ../../Qwen/source/training/llama_factory.rst:178
+#: b581a862018f412db480ec68be1512fa
+msgid "If you train your model with LoRA, you probably need to merge adapter parameters to the main branch. Run the following command to perform the merging of LoRA adapters."
+msgstr "如果你使用 LoRA 训练模型，可能需要将adapter参数合并到主分支中。请运行以下命令以执行 LoRA adapter 的合并操作。"
+#: ../../Qwen/source/training/llama_factory.rst:194
+#: fee1bd0eca4b41e1bf3efa0d42ee401e
+msgid "Conclusion"
+msgstr "结语"
+#: ../../Qwen/source/training/llama_factory.rst:196
+#: cfb87dc99dbe4e85a304593333c2241d
+msgid "The above content is the simplest way to use LLaMA-Factory to train Qwen. Feel free to dive into the details by checking the official repo!"
+msgstr "上述内容是使用LLaMA-Factory训练Qwen的最简单方法。 欢迎通过查看官方仓库深入了解详细信息！"
--- a/docs/locales/zh_CN/LC_MESSAGES/training/ms_swift.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/training/ms_swift.po
+# Copyright (C) 2024, Qwen Team
+# This file is distributed under the same license as the Qwen package.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: Qwen \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-04-28 22:42+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+#: ../../source/training/ms_swift.rst:2 498aef3956544f9b97c7a47c66997ea2
+msgid "SWIFT"
+msgstr ""
+#: ../../source/training/ms_swift.rst:5 92e3eed8ef984c1e897d6958ddbf552a
+msgid "To be updated for Qwen3."
+msgstr "仍需为Qwen3更新。"
+#: ../../source/training/ms_swift.rst:7 0a0330c78d8549d1be55501fdc352a15
+msgid "ModelScope SWIFT (ms-swift) is the official large model and multimodal model training and deployment framework provided by the ModelScope community."
+msgstr "ModelScope SWIFT (ms-swift) 是 ModelScope 社区提供的官方大型模型和多模态模型训练与部署框架。"
+#: ../../source/training/ms_swift.rst:9 6e980c27ae67400ca12c11fedf439af8
+msgid "GitHub repository: `ms-swift <https://github.com/modelscope/ms-swift>`__"
+msgstr "GitHub 地址：`ms-swift <https://github.com/modelscope/ms-swift>`__"
+#: ../../source/training/ms_swift.rst:12 8b5caa854cf641c489310f747ed205ba
+msgid "Supervised Fine-Tuning (SFT)"
+msgstr "有监督微调 (SFT)"
+#: ../../source/training/ms_swift.rst:14 43999693226b4bc1ba78d7f8addaa01d
+msgid "The SFT script in ms-swift has the following features:"
+msgstr "ms-swift 中的 SFT 训练具有以下特性："
+#: ../../source/training/ms_swift.rst:16 98001e42cd2f4f1e874a8aab9b32eca4
+msgid "Flexible training options: single-GPU and multi-GPU support"
+msgstr "支持单卡和多卡分布式训练"
+#: ../../source/training/ms_swift.rst:17 4d0fa62cd0364e08b684beca46245de0
+msgid "Efficient tuning methods: full-parameter, LoRA, Q-LoRA, and Dora"
+msgstr "支持全参数微调、LoRA、Q-LoRA 和 DoRA"
+#: ../../source/training/ms_swift.rst:18 fa0834b06bd9417193ae6966e1628433
+msgid "Broad model compatibility: supports various LLM and MLLM architectures"
+msgstr "模型兼容性：支持各种 LLM 和 MLLM 架构"
+#: ../../source/training/ms_swift.rst:20 0098b181ea6b4724bcec06a864c7972b
+msgid "For detailed model compatibility, see: `Supported Models <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html>`__"
+msgstr "支持的模型参考：`支持的模型 <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html>`__"
+#: ../../source/training/ms_swift.rst:23 ../../source/training/ms_swift.rst:193
+#: 491064083ea6496489d30443d86e0124 c40c7e8842a94cf7b8abb7006010b343
+msgid "Environment Setup"
+msgstr "环境配置"
+#: ../../source/training/ms_swift.rst:25 ../../source/training/ms_swift.rst:195
+#: 118f5e679951486cbbe6123781294450 b303a90f4bbc4158a01727472b14e68f
+msgid "Follow the instructions of `ms-swift <https://github.com/modelscope/ms-swift>`__, and build the environment."
+msgstr "根据 `ms-swift <https://github.com/modelscope/ms-swift>`__ 的说明搭建环境。"
+#: ../../source/training/ms_swift.rst:27 f53885ef077d4ac283603fd297b2b2be
+msgid "Optional packages for advanced features::"
+msgstr "可选包：："
+#: ../../source/training/ms_swift.rst:33 ../../source/training/ms_swift.rst:205
+#: 4ec49f5d7a1b4a9a9851cafafe9c7623 da2d1da0c3414e03845aae57397a6fdc
+msgid "Data Preparation"
+msgstr "数据准备"
+#: ../../source/training/ms_swift.rst:35 e8de47b33f6c4196961896fc35c8d038
+msgid "ms-swift supports multiple dataset formats:"
+msgstr "ms-swift 支持多种数据集格式："
+#: ../../source/training/ms_swift.rst:64 5de2bd424af14c52b7d73385caca6fba
+msgid "For complete dataset formatting guidelines, see: `Custom Dataset Documentation <https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html>`__"
+msgstr "有关完整的数据集格式，请参阅：`自定义数据集文档 <https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html>`__"
+#: ../../source/training/ms_swift.rst:66 26962c82458e4dfbab4192a3d56139dd
+msgid "Pre-built datasets are available at: `Supported Datasets <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
+msgstr "内置数据集可以查看：`支持的数据集 <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
+#: ../../source/training/ms_swift.rst:69 35818d0633704fd3a70482e77a32f892
+msgid "Training Examples"
+msgstr "训练示例"
+#: ../../source/training/ms_swift.rst:72 b019313ad9e541839a91228db979ed13
+msgid "Single-GPU Training"
+msgstr "单卡训练"
+#: ../../source/training/ms_swift.rst:74 ac147523e4044971b880dc123bcaf21e
+msgid "**LLM Example (Qwen2.5-7B-Instruct):**"
+msgstr "**LLM 示例 (Qwen2.5-7B-Instruct)：**"
+#: ../../source/training/ms_swift.rst:104 c3a00f6cd3314912b94ceb13f71539df
+msgid "**MLLM Example (Qwen2.5-VL-7B-Instruct):**"
+msgstr "**MLLM 示例 (Qwen2.5-VL-7B-Instruct)：**"
+#: ../../source/training/ms_swift.rst:130
+#: ../../source/training/ms_swift.rst:327 172d134c37604cb6a9b4ff639adbccae
+#: 4f3070b9cb95428d95cde6915ba38023
+msgid "Multi-GPU Training"
+msgstr "多卡训练"
+#: ../../source/training/ms_swift.rst:132
+#: ../../source/training/ms_swift.rst:329 49ae85a99b9940c292862841f1216a7b
+#: 5aca8726082640948ba58c6cd13bc602
+msgid "**LLM Example with DeepSpeed:**"
+msgstr "**LLM 示例：**"
+#: ../../source/training/ms_swift.rst:156
+#: ../../source/training/ms_swift.rst:374 428e4834ff554058b29f682c9a61d4ea
+#: e730333134d54c35bd2c3c01d9009c23
+msgid "**MLLM Example with DeepSpeed:**"
+msgstr "**MLLM 示例：**"
+#: ../../source/training/ms_swift.rst:180 a287c14468ed4625ad50eb75770e4481
+msgid "Reinforcement Learning (RL)"
+msgstr "强化学习 (RL)"
+#: ../../source/training/ms_swift.rst:182 9c4c01d75c264b61965178092f5d4cd2
+msgid "The RL script in ms-swift has the following features:"
+msgstr "ms-swift 中的 RL 训练具有以下特性："
+#: ../../source/training/ms_swift.rst:184 22471f86636445e19f1e2cf921f15fac
+msgid "Support single-GPU and multi-GPU training"
+msgstr "支持单卡和多卡分布式训练"
+#: ../../source/training/ms_swift.rst:185 069b9f84ec464a8aa7229bdc17c23648
+msgid "Support full-parameter tuning, LoRA, Q-LoRA, and Dora"
+msgstr "支持全参数微调、LoRA、Q-LoRA 和 DoRA"
+#: ../../source/training/ms_swift.rst:186 dc6e8583b524406a818cf3491381bc1b
+msgid "Supports multiple RL algorithms including GRPO, DAPO, PPO, DPO, KTO, ORPO, CPO, and SimPO"
+msgstr "支持多种 RL 算法，包括 GRPO、DAPO、PPO、DPO、KTO、ORPO、CPO 和 SimPO"
+#: ../../source/training/ms_swift.rst:187 5a9368d446294733b009dac7065caa95
+msgid "Supports both large language models (LLM) and multimodal models (MLLM)"
+msgstr "支持大型语言模型 (LLM) 和多模态模型 (MLLM)"
+#: ../../source/training/ms_swift.rst:189 6747ebd5490a42fa9bbe39c71d42de64
+msgid "For detailed support information, please refer to: `Supported Features <https://swift.readthedocs.io/en/latest/Instruction/Pre-training-and-Fine-tuning.html#pre-training-and-fine-tuning>`__"
+msgstr "有关详细支持信息，请参考：`训练支持 <https://swift.readthedocs.io/en/latest/Instruction/Pre-training-and-Fine-tuning.html#pre-training-and-fine-tuning>`__"
+#: ../../source/training/ms_swift.rst:196 5c4ebc67dec24fdba825e855d656428f
+msgid "Install these packages (Optional)::"
+msgstr "安装这些软件包 （可选）：："
+#: ../../source/training/ms_swift.rst:207 b4dcc1d366304d0abbfa994d76646547
+msgid "ms-swift has built-in preprocessing logic for several datasets, which can be directly used for training via the ``--dataset`` parameter. For supported datasets, please refer to: `Supported Datasets <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
+msgstr "ms-swift 内置了多个数据集的预处理逻辑，可以通过 ``--dataset`` 参数直接用于训练。有关支持的数据集，请参见：`支持的数据集 <https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets>`__"
+#: ../../source/training/ms_swift.rst:209 24c5befb3f5545a88b84cfa678b2752f
+msgid "You can also use local custom datasets by providing the local dataset path to the ``--dataset`` parameter."
+msgstr "您还可以通过将本地数据集路径提供给 ``--dataset`` 参数来使用本地自定义数据集。"
+#: ../../source/training/ms_swift.rst:211 fb274880c62c4cad8243cbbed73f2adc
+msgid "Example Dataset Formats:"
+msgstr "示例数据集格式："
+#: ../../source/training/ms_swift.rst:224 31d062ff29e14c53ad92ca6d5ab81c23
+msgid "Notes on Dataset Requirements"
+msgstr "数据集要求说明"
+#: ../../source/training/ms_swift.rst:226 f6e1dbba72e9450984df10e9da1f4f4c
+msgid "Reward Function Calculation: Depending on the reward function being used, additional columns may be required in the dataset. For example:"
+msgstr "奖励函数计算：根据使用的奖励函数，数据集中可能需要额外的列。例如："
+#: ../../source/training/ms_swift.rst:228 5b6d41c5b1734f1a87358fb5416c5888
+msgid "When using the built-in accuracy/cosine reward, the dataset must include a ``solution`` column to compute accuracy. The other columns in the dataset will also be passed to the `kwargs` of the reward function."
+msgstr "在使用内置的accuracy/cosine奖励时，数据集必须包含一个 solution 列以计算准确性。数据集中的其他列也将传递到奖励函数的 kwargs 中。"
+#: ../../source/training/ms_swift.rst:231 63ee17a7a6b041a88daee17b96928659
+msgid "Customizing the Reward Function: To tailor the reward function to your specific needs, you can refer to the following resource: `external reward plugin <https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin>`__"
+msgstr "自定义奖励函数：为了满足您的具体需求，可以参考以下资源：`外部奖励插件 <https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin>`__"
+#: ../../source/training/ms_swift.rst:235 5b5aa025adbe492788108bfed1ce09a3
+msgid "GRPO Training Examples"
+msgstr "GRPO 训练示例"
+#: ../../source/training/ms_swift.rst:238 836b7e6ebcd04288968db7886440374b
+msgid "Single-GPU Configuration"
+msgstr "单卡配置"
+#: ../../source/training/ms_swift.rst:240 e9c20b0809904804b2ed1a87986f6af2
+msgid "**LLM (Qwen2.5-7B):**"
+msgstr ""
+#: ../../source/training/ms_swift.rst:282 2b6027439b4a423a9f4984a2bb184109
+msgid "**MLLM (Qwen2.5-VL-7B-Instruct):**"
+msgstr ""
+#: ../../source/training/ms_swift.rst:422 a0267c6288d44958bde43844c55b692d
+msgid "Model Export"
+msgstr "模型导出"
+#: ../../source/training/ms_swift.rst:424 0ad518fadd134498ad3a393397c6d2ae
+msgid "**Merge LoRA Adapters:**"
+msgstr "**合并 LoRA：**"
+#: ../../source/training/ms_swift.rst:432 1a4a2ca034c44df9b573fdf23eed352e
+msgid "**Push to ModelScope Hub:**"
+msgstr "**推送到 ModelScope Hub：**"
+#~ msgid "ms-swift"
+#~ msgstr ""
+#~ msgid "Introduction to ms-swift SFT"
+#~ msgstr "ms-swift SFT 介绍"
--- a/docs/make.bat
+++ b/docs/make.bat
+@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+if "%1" == "" goto help
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
+furo
+myst-parser==4.0.0
+sphinx<8,>4.5.0
+sphinx-copybutton
+sphinx-design>=0.6.0
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
+html {
+    font-size: 16px;
+}
+h1 {
+    font-size: 1.75rem;
+    line-height: 2.5rem;
+}
+h2 {
+    font-size: 1.5rem;
+    line-height: 2rem;
+}
+h3 {
+    font-size: 1.25rem;
+    line-height: 1.75rem;
+}
+h4 {
+    font-size: 1.125rem;
+    line-height: 1.5rem;
+}
+h5 {
+    font-size: 1rem;
+}
+h6 {
+    font-size: 0.75rem;
+}
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+    margin-top: 1.875rem;
+    margin-bottom: 1rem;
+}
+p strong {
+    font-weight: 500;
+}
+p:target {
+    background-color: var(--color-highlight-on-target);
+}
+details.sd-dropdown summary.sd-summary-title {
+    flex-direction: row-reverse;
+    font-weight: 500;
+    padding-left: 0;
+}
+details.sd-dropdown summary.sd-summary-title code.literal {
+    font-weight: bolder;
+    filter: brightness(95%);
+}
+details.sd-dropdown summary.sd-summary-title span.sd-summary-state-marker {
+    padding-left: 0.5em;
+    padding-right: 0.5em
+}
+details.sd-dropdown div.sd-summary-content {
+    padding-left: 2.5em;
+}
+pre.terminal {
+    font-size: 12px !important;
+    line-height: 16px;
+    background-color: black;
+    color: white;
+    padding: .5em;
+    text-wrap: wrap;
+    word-break: break-all;
+}
+pre.terminal span.system {
+    color: greenyellow
+}
+pre.terminal span.user {
+    color: yellowgreen
+}
\ No newline at end of file
--- a/docs/source/_static/design-tabs.js
+++ b/docs/source/_static/design-tabs.js
+// @ts-check
+// Extra JS capability for selected tabs to be synced
+// The selection is stored in local storage so that it persists across page loads.
+/**
+ * @type {Record<string, HTMLElement[]>}
+ */
+let sd_id_to_elements = {};
+const storageKeyPrefix = "sphinx-design-tab-id-";
+/**
+ * Create a key for a tab element.
+ * @param {HTMLElement} el - The tab element.
+ * @returns {[string, string, string] | null} - The key.
+ *
+ */
+function create_key(el) {
+  let syncId = el.getAttribute("data-sync-id");
+  let syncGroup = el.getAttribute("data-sync-group");
+  if (!syncId || !syncGroup) return null;
+  return [syncGroup, syncId, syncGroup + "--" + syncId];
+}
+/**
+ * Initialize the tab selection.
+ *
+ */
+function ready() {
+  // Find all tabs with sync data
+  /** @type {string[]} */
+  let groups = [];
+  document.querySelectorAll(".sd-tab-label").forEach((label) => {
+    if (label instanceof HTMLElement) {
+      let data = create_key(label);
+      if (data) {
+        let [group, id, key] = data;
+        // add click event listener
+        // @ts-ignore
+        label.onclick = onSDLabelClick;
+        // store map of key to elements
+        if (!sd_id_to_elements[key]) {
+          sd_id_to_elements[key] = [];
+        }
+        sd_id_to_elements[key].push(label);
+        if (groups.indexOf(group) === -1) {
+          groups.push(group);
+          // Check if a specific tab has been selected via URL parameter
+          const tabParam = new URLSearchParams(window.location.search).get(
+            group
+          );
+          if (tabParam) {
+            console.log(
+              "sphinx-design: Selecting tab id for group '" +
+              group +
+              "' from URL parameter: " +
+              tabParam
+            );
+            window.sessionStorage.setItem(storageKeyPrefix + group, tabParam);
+          }
+        }
+        // Check is a specific tab has been selected previously
+        let previousId = window.sessionStorage.getItem(
+          storageKeyPrefix + group
+        );
+        if (previousId === id) {
+          // console.log(
+          //   "sphinx-design: Selecting tab from session storage: " + id
+          // );
+          // @ts-ignore
+          label.previousElementSibling.checked = true;
+        }
+      }
+    }
+  });
+}
+/**
+ *  Activate other tabs with the same sync id.
+ *
+ * @this {HTMLElement} - The element that was clicked.
+ */
+function onSDLabelClick() {
+  let data = create_key(this);
+  if (!data) return;
+  const top = this.parentElement?.offsetTop || 0;
+  console.log(top);
+  let [group, id, key] = data;
+  for (const label of sd_id_to_elements[key]) {
+    if (label === this) continue;
+    // @ts-ignore
+    label.previousElementSibling.checked = true;
+  }
+  const diff = (this.parentElement?.offsetTop || 0) - top;
+  if (diff !== 0) {
+    window.scrollBy({ left: 0, top: diff, behavior: "instant" });
+  }
+  window.sessionStorage.setItem(storageKeyPrefix + group, id);
+}
+document.addEventListener("DOMContentLoaded", ready, false);
--- a/docs/source/assets/.DS_Store
+++ b/docs/source/assets/.DS_Store