{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Embedding Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Launch A Server" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Embedding server is ready. Proceeding with the next steps.\n" ] } ], "source": [ "import subprocess\n", "import time\n", "import requests\n", "\n", "# Equivalent to running this in the shell:\n", "# python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --port 30010 --host 0.0.0.0 --is-embedding --log-level error\n", "embedding_process = subprocess.Popen(\n", " [\n", " \"python\",\n", " \"-m\",\n", " \"sglang.launch_server\",\n", " \"--model-path\",\n", " \"Alibaba-NLP/gte-Qwen2-7B-instruct\",\n", " \"--port\",\n", " \"30010\",\n", " \"--host\",\n", " \"0.0.0.0\",\n", " \"--is-embedding\",\n", " \"--log-level\",\n", " \"error\",\n", " ],\n", " text=True,\n", " stdout=subprocess.DEVNULL,\n", " stderr=subprocess.DEVNULL,\n", ")\n", "\n", "while True:\n", " try:\n", " response = requests.get(\n", " \"http://localhost:30010/v1/models\",\n", " headers={\"Authorization\": \"Bearer None\"},\n", " )\n", " if response.status_code == 200:\n", " break\n", " except requests.exceptions.RequestException:\n", " time.sleep(1)\n", "\n", "print(\"Embedding server is ready. Proceeding with the next steps.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Use Curl" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.0083160400390625, 0.0006804466247558594, -0.00809478759765625, -0.0006995201110839844, 0.0143890380859375, -0.0090179443359375, 0.01238250732421875, 0.00209808349609375, 0.0062103271484375, -0.003047943115234375]\n" ] } ], "source": [ "# Get the first 10 elements of the embedding\n", "\n", "! curl -s http://localhost:30010/v1/embeddings \\\n", " -H \"Content-Type: application/json\" \\\n", " -H \"Authorization: Bearer None\" \\\n", " -d '{\"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\", \"input\": \"Once upon a time\"}' \\\n", " | python3 -c \"import sys, json; print(json.load(sys.stdin)['data'][0]['embedding'][:10])\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using OpenAI Compatible API" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.00603485107421875, -0.0190582275390625, -0.01273345947265625, 0.01552581787109375, 0.0066680908203125, -0.0135955810546875, 0.01131439208984375, 0.0013713836669921875, -0.0089874267578125, 0.021759033203125]\n" ] } ], "source": [ "import openai\n", "\n", "client = openai.Client(\n", " base_url=\"http://127.0.0.1:30010/v1\", api_key=\"None\"\n", ")\n", "\n", "# Text embedding example\n", "response = client.embeddings.create(\n", " model=\"Alibaba-NLP/gte-Qwen2-7B-instruct\",\n", " input=\"How are you today\",\n", ")\n", "\n", "embedding = response.data[0].embedding[:10]\n", "print(embedding)" ] } ], "metadata": { "kernelspec": { "display_name": "AlphaMeemory", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 2 }