import argparse from sentence_transformers import SentenceTransformer parse = argparse.ArgumentParser() parse.add_argument("--model_name_or_path", type=str, default="Qwen/Qwen3-Embedding-0.6B") args = parse.parse_args() # Load the model model = SentenceTransformer(args.model_name_or_path) # We recommend enabling flash_attention_2 for better acceleration and memory saving, # together with setting `padding_side` to "left": # model = SentenceTransformer( # "Qwen/Qwen3-Embedding-0.6B", # model_kwargs={"attn_implementation": "flash_attention_2", "device_map": "auto"}, # tokenizer_kwargs={"padding_side": "left"}, # ) # The queries and documents to embed queries = [ "What is the capital of China?", "Explain gravity", ] documents = [ "The capital of China is Beijing.", "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.", ] # Encode the queries and documents. Note that queries benefit from using a prompt # Here we use the prompt called "query" stored under `model.prompts`, but you can # also pass your own prompt via the `prompt` argument query_embeddings = model.encode(queries, prompt_name="query") document_embeddings = model.encode(documents) # Compute the (cosine) similarity between the query and document embeddings similarity = model.similarity(query_embeddings, document_embeddings) print(similarity)