#!/usr/bin/env python3 """Minimal Qwen3 classify demo. This mirrors the docs example: llm = LLM(model="Qwen/Qwen3-Reranker-0.6B", runner="pooling") outputs = llm.classify(["prompt 1", "prompt 2"]) # batched You can run with raw text prompts (default) or with token IDs. """ import os from vllm import LLM from vllm.inputs import token_inputs # Two sample prompts (batch of 2) PROMPTS = [ "你是一个搜索排序专家,请你仔细阅读以下Doc和Query,给出文章满意度评分及具体原因。Query:sdl 概述", "请根据以下文档与查询,判断文档是否满足查询意图,并给出理由。Query:sdl 概述", ] def run_with_prompts(llm: LLM): outputs = llm.classify(PROMPTS) for i, out in enumerate(outputs): probs = out.outputs.probs print(f"[text] Prompt {i} -> probs: {probs} (num_classes={len(probs)})") def run_with_token_ids(llm: LLM): # Build token IDs using the model tokenizer so IDs match Qwen3. tok = llm.get_tokenizer() ids_batch = [tok.encode(p) for p in PROMPTS] outputs = llm.classify([token_inputs(ids) for ids in ids_batch]) for i, out in enumerate(outputs): probs = out.outputs.probs print(f"[tokens] Prompt {i} -> probs: {probs} (num_classes={len(probs)})") if __name__ == "__main__": # Set via env QWEN3_MODEL to override, e.g.: Qwen/Qwen3-Reranker-0.6B model = os.environ.get("QWEN3_MODEL", "Qwen/Qwen3-Reranker-0.6B") llm = LLM( model=model, task="classify", # deprecated but still supported; maps to pooling+convert trust_remote_code=True, enforce_eager=True, ) # Option A: classify using text prompts (recommended) run_with_prompts(llm) # Option B: classify using token IDs (uncomment to try) # run_with_token_ids(llm)