offline_inference_embedding.py 561 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
from vllm import LLM

# Sample prompts.
prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]

# Create an LLM.
12
13
14
15
16
17
model = LLM(
    model="intfloat/e5-mistral-7b-instruct",
    task="embed",  # You should pass task="embed" for embedding models
    enforce_eager=True,
)

18
# Generate embedding. The output is a list of PoolingRequestOutputs.
19
20
21
22
outputs = model.encode(prompts)
# Print the outputs.
for output in outputs:
    print(output.outputs.embedding)  # list of 4096 floats