import argparse import torch import vllm from vllm import LLM parse = argparse.ArgumentParser() parse.add_argument("--model_name_or_path", type=str, default="Qwen/Qwen3-Embedding-0.6B") args = parse.parse_args() def get_detailed_instruct(task_description: str, query: str) -> str: return f'Instruct: {task_description}\nQuery:{query}' # Each query must come with a one-sentence instruction that describes the task task = 'Given a web search query, retrieve relevant passages that answer the query' queries = [ get_detailed_instruct(task, 'What is the capital of China?'), get_detailed_instruct(task, 'Explain gravity') ] # No need to add instruction for retrieval documents documents = [ "The capital of China is Beijing.", "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun." ] input_texts = queries + documents print("input_texts:", input_texts) # Initialize the model for embedding model = LLM(model=args.model_name_or_path, task="embed") outputs = model.embed(input_texts) embeddings = torch.tensor([o.outputs.embedding for o in outputs]) scores = (embeddings[:2] @ embeddings[2:].T) print("scores:", scores.tolist())