Add QWQ-32B

317a82e2 · chenych · 37b0ad9f · 317a82e2 · 317a82e2 · 317a82e2
Commit 317a82e2 authored Mar 07, 2025 by chenych
15 changed files
--- a/tests/e2e/test_train.py
+++ b/tests/e2e/test_train.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/eval/test_eval_template.py
+++ b/tests/eval/test_eval_template.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/model_utils/test_attention.py
+++ b/tests/model/model_utils/test_attention.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,10 @@

 import os

+import pytest
 from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available

+from llamafactory.extras.packages import is_transformers_version_greater_than
 from llamafactory.train.test_utils import load_infer_model


@@ -27,6 +29,7 @@ INFER_ARGS = {
 }


+@pytest.mark.xfail(is_transformers_version_greater_than("4.48"), reason="Attention refactor.")
 def test_attention():
    attention_available = ["disabled"]
    if is_torch_sdpa_available():

--- a/tests/model/model_utils/test_checkpointing.py
+++ b/tests/model/model_utils/test_checkpointing.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/data/test_processor.py
+++ b/tests/data/test_processor.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,24 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from typing import Tuple
+import os

 import pytest
+import torch
+from transformers import AutoConfig, AutoModelForCausalLM

-from llamafactory.data.processors.processor_utils import infer_seqlen
-
-
-@pytest.mark.parametrize(
-    "test_input,test_output",
-    [
-        ((3000, 2000, 1000), (600, 400)),
-        ((2000, 3000, 1000), (400, 600)),
-        ((1000, 100, 1000), (900, 100)),
-        ((100, 1000, 1000), (100, 900)),
-        ((100, 500, 1000), (100, 500)),
-        ((500, 100, 1000), (500, 100)),
-        ((10, 10, 1000), (10, 10)),
-    ],
-)
-def test_infer_seqlen(test_input: Tuple[int, int, int], test_output: Tuple[int, int]):
-    assert test_output == infer_seqlen(*test_input)
+from llamafactory.model.model_utils.misc import find_expanded_modules
+
+
+HF_TOKEN = os.getenv("HF_TOKEN")
+
+
+@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
+def test_expanded_modules():
+    config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+    with torch.device("meta"):
+        model = AutoModelForCausalLM.from_config(config)
+
+    expanded_modules = find_expanded_modules(model, ["q_proj", "v_proj"], num_layer_trainable=4)
+    assert expanded_modules == [
+        "model.layers.7.self_attn.q_proj",
+        "model.layers.7.self_attn.v_proj",
+        "model.layers.15.self_attn.q_proj",
+        "model.layers.15.self_attn.v_proj",
+        "model.layers.23.self_attn.q_proj",
+        "model.layers.23.self_attn.v_proj",
+        "model.layers.31.self_attn.q_proj",
+        "model.layers.31.self_attn.v_proj",
+    ]
--- a/tests/model/model_utils/test_packing.py
+++ b/tests/model/model_utils/test_packing.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/model_utils/test_visual.py
+++ b/tests/model/model_utils/test_visual.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/test_base.py
+++ b/tests/model/test_base.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/test_freeze.py
+++ b/tests/model/test_freeze.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/test_full.py
+++ b/tests/model/test_full.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/model/test_pissa.py
+++ b/tests/model/test_pissa.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/tests/test_throughput.py
+++ b/tests/test_throughput.py
-import os
-import time
-
-from openai import OpenAI
-from transformers.utils.versions import require_version
-
-
-require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
-
-
-def main():
-    client = OpenAI(
-        api_key="0",
-        base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
-    )
-    messages = [{"role": "user", "content": "Write a long essay about environment protection as long as possible."}]
-    num_tokens = 0
-    start_time = time.time()
-    for _ in range(8):
-        result = client.chat.completions.create(messages=messages, model="test")
-        num_tokens += result.usage.completion_tokens
-
-    elapsed_time = time.time() - start_time
-    print("Throughput: {:.2f} tokens/s".format(num_tokens / elapsed_time))
-    # --infer_backend hf: 27.22 tokens/s (1.0x)
-    # --infer_backend vllm: 73.03 tokens/s (2.7x)
-
-
-if __name__ == "__main__":
-    main()
--- a/tests/test_toolcall.py
+++ b/tests/test_toolcall.py
-import json
-import os
-from typing import Sequence
-
-from openai import OpenAI
-from transformers.utils.versions import require_version
-
-
-require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
-
-
-def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
-    grade_to_score = {"A": 4, "B": 3, "C": 2}
-    total_score, total_hour = 0, 0
-    for grade, hour in zip(grades, hours):
-        total_score += grade_to_score[grade] * hour
-        total_hour += hour
-    return round(total_score / total_hour, 2)
-
-
-def main():
-    client = OpenAI(
-        api_key="0",
-        base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
-    )
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "calculate_gpa",
-                "description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
-                        "hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
-                    },
-                    "required": ["grades", "hours"],
-                },
-            },
-        }
-    ]
-    tool_map = {"calculate_gpa": calculate_gpa}
-
-    messages = []
-    messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
-    result = client.chat.completions.create(messages=messages, model="test", tools=tools)
-    if result.choices[0].message.tool_calls is None:
-        raise ValueError("Cannot retrieve function call from the response.")
-
-    messages.append(result.choices[0].message)
-    tool_call = result.choices[0].message.tool_calls[0].function
-    print(tool_call)
-    # Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
-    name, arguments = tool_call.name, json.loads(tool_call.arguments)
-    tool_result = tool_map[name](**arguments)
-    messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
-    result = client.chat.completions.create(messages=messages, model="test", tools=tools)
-    print(result.choices[0].message.content)
-    # Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
-
-
-if __name__ == "__main__":
-    main()
--- a/tests/train/test_sft_trainer.py
+++ b/tests/train/test_sft_trainer.py
-# Copyright 2024 the LlamaFactory team.
+# Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.