Commit 317a82e2 authored by chenych's avatar chenych
Browse files

Add QWQ-32B

parent 37b0ad9f
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -14,8 +14,10 @@
import os
import pytest
from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
from llamafactory.extras.packages import is_transformers_version_greater_than
from llamafactory.train.test_utils import load_infer_model
......@@ -27,6 +29,7 @@ INFER_ARGS = {
}
@pytest.mark.xfail(is_transformers_version_greater_than("4.48"), reason="Attention refactor.")
def test_attention():
attention_available = ["disabled"]
if is_torch_sdpa_available():
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,24 +12,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Tuple
import os
import pytest
import torch
from transformers import AutoConfig, AutoModelForCausalLM
from llamafactory.data.processors.processor_utils import infer_seqlen
@pytest.mark.parametrize(
"test_input,test_output",
[
((3000, 2000, 1000), (600, 400)),
((2000, 3000, 1000), (400, 600)),
((1000, 100, 1000), (900, 100)),
((100, 1000, 1000), (100, 900)),
((100, 500, 1000), (100, 500)),
((500, 100, 1000), (500, 100)),
((10, 10, 1000), (10, 10)),
],
)
def test_infer_seqlen(test_input: Tuple[int, int, int], test_output: Tuple[int, int]):
assert test_output == infer_seqlen(*test_input)
from llamafactory.model.model_utils.misc import find_expanded_modules
HF_TOKEN = os.getenv("HF_TOKEN")
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
def test_expanded_modules():
config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
with torch.device("meta"):
model = AutoModelForCausalLM.from_config(config)
expanded_modules = find_expanded_modules(model, ["q_proj", "v_proj"], num_layer_trainable=4)
assert expanded_modules == [
"model.layers.7.self_attn.q_proj",
"model.layers.7.self_attn.v_proj",
"model.layers.15.self_attn.q_proj",
"model.layers.15.self_attn.v_proj",
"model.layers.23.self_attn.q_proj",
"model.layers.23.self_attn.v_proj",
"model.layers.31.self_attn.q_proj",
"model.layers.31.self_attn.v_proj",
]
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
import os
import time
from openai import OpenAI
from transformers.utils.versions import require_version
require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
def main():
client = OpenAI(
api_key="0",
base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
)
messages = [{"role": "user", "content": "Write a long essay about environment protection as long as possible."}]
num_tokens = 0
start_time = time.time()
for _ in range(8):
result = client.chat.completions.create(messages=messages, model="test")
num_tokens += result.usage.completion_tokens
elapsed_time = time.time() - start_time
print("Throughput: {:.2f} tokens/s".format(num_tokens / elapsed_time))
# --infer_backend hf: 27.22 tokens/s (1.0x)
# --infer_backend vllm: 73.03 tokens/s (2.7x)
if __name__ == "__main__":
main()
import json
import os
from typing import Sequence
from openai import OpenAI
from transformers.utils.versions import require_version
require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
grade_to_score = {"A": 4, "B": 3, "C": 2}
total_score, total_hour = 0, 0
for grade, hour in zip(grades, hours):
total_score += grade_to_score[grade] * hour
total_hour += hour
return round(total_score / total_hour, 2)
def main():
client = OpenAI(
api_key="0",
base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
)
tools = [
{
"type": "function",
"function": {
"name": "calculate_gpa",
"description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
"parameters": {
"type": "object",
"properties": {
"grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
"hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
},
"required": ["grades", "hours"],
},
},
}
]
tool_map = {"calculate_gpa": calculate_gpa}
messages = []
messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
result = client.chat.completions.create(messages=messages, model="test", tools=tools)
if result.choices[0].message.tool_calls is None:
raise ValueError("Cannot retrieve function call from the response.")
messages.append(result.choices[0].message)
tool_call = result.choices[0].message.tool_calls[0].function
print(tool_call)
# Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
name, arguments = tool_call.name, json.loads(tool_call.arguments)
tool_result = tool_map[name](**arguments)
messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
result = client.chat.completions.create(messages=messages, model="test", tools=tools)
print(result.choices[0].message.content)
# Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
if __name__ == "__main__":
main()
# Copyright 2024 the LlamaFactory team.
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment