fix black in pre-commit (#1940)

c77c1e05 · Chayenne · GitHub · dca87ec3 · c77c1e05 · c77c1e05
Unverified Commit c77c1e05 authored Nov 07, 2024 by Chayenne Committed by GitHub Nov 08, 2024
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,6 +30,6 @@ repos:
    rev: 24.10.0
    hooks:
      - id: black
-        additional_dependencies: ['.[jupyter]']
+        types: [python]
-        types: [python, jupyter]
+      - id: black-jupyter
-        types_or: [python, jupyter]
+        types: [jupyter]
--- a/docs/backend/native_api.ipynb
+++ b/docs/backend/native_api.ipynb
@@ -34,10 +34,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:08.536886Z",
+     "iopub.execute_input": "2024-11-07T18:44:42.063503Z",
-     "iopub.status.busy": "2024-11-05T05:08:08.536763Z",
+     "iopub.status.busy": "2024-11-07T18:44:42.063379Z",
-     "iopub.status.idle": "2024-11-05T05:08:34.725831Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.255300Z",
-     "shell.execute_reply": "2024-11-05T05:08:34.725316Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.254547Z"
    }
   },
   "outputs": [],
@@ -73,10 +73,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:34.727530Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.258292Z",
-     "iopub.status.busy": "2024-11-05T05:08:34.727333Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.257710Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.359784Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.611559Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.359090Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.610842Z"
    }
   },
   "outputs": [],
@@ -101,10 +101,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.362286Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.613911Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.362140Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.613746Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.368711Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.620286Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.368220Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.619779Z"
    }
   },
   "outputs": [],
@@ -132,10 +132,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.371313Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.622407Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.370877Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.622267Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.376712Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.628290Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.376230Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.627793Z"
    }
   },
   "outputs": [],
@@ -164,10 +164,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.378982Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.630585Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.378597Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.630235Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.391820Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.643498Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.391336Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.643007Z"
    }
   },
   "outputs": [],
@@ -183,10 +183,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.393748Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.645336Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.393606Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.645196Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.398645Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.650363Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.398145Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.649837Z"
    }
   },
   "outputs": [],
@@ -211,10 +211,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.400683Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.652212Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.400419Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.652076Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.406146Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.658633Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.405661Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.658119Z"
    }
   },
   "outputs": [],
@@ -241,10 +241,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.408176Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.660468Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.407884Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.660325Z",
-     "iopub.status.idle": "2024-11-05T05:08:35.413587Z",
+     "iopub.status.idle": "2024-11-07T18:45:07.666476Z",
-     "shell.execute_reply": "2024-11-05T05:08:35.413108Z"
+     "shell.execute_reply": "2024-11-07T18:45:07.665984Z"
    }
   },
   "outputs": [],
@@ -271,10 +271,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:35.416090Z",
+     "iopub.execute_input": "2024-11-07T18:45:07.668242Z",
-     "iopub.status.busy": "2024-11-05T05:08:35.415793Z",
+     "iopub.status.busy": "2024-11-07T18:45:07.668108Z",
-     "iopub.status.idle": "2024-11-05T05:08:36.552549Z",
+     "iopub.status.idle": "2024-11-07T18:45:08.725709Z",
-     "shell.execute_reply": "2024-11-05T05:08:36.551870Z"
+     "shell.execute_reply": "2024-11-07T18:45:08.725021Z"
    }
   },
   "outputs": [],
@@ -296,10 +296,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:36.554823Z",
+     "iopub.execute_input": "2024-11-07T18:45:08.727865Z",
-     "iopub.status.busy": "2024-11-05T05:08:36.554680Z",
+     "iopub.status.busy": "2024-11-07T18:45:08.727721Z",
-     "iopub.status.idle": "2024-11-05T05:08:38.053945Z",
+     "iopub.status.idle": "2024-11-07T18:45:11.165841Z",
-     "shell.execute_reply": "2024-11-05T05:08:38.053034Z"
+     "shell.execute_reply": "2024-11-07T18:45:11.165282Z"
    }
   },
   "outputs": [],
@@ -335,10 +335,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:08:38.056783Z",
+     "iopub.execute_input": "2024-11-07T18:45:11.167853Z",
-     "iopub.status.busy": "2024-11-05T05:08:38.056497Z",
+     "iopub.status.busy": "2024-11-07T18:45:11.167711Z",
-     "iopub.status.idle": "2024-11-05T05:09:04.436030Z",
+     "iopub.status.idle": "2024-11-07T18:45:39.542988Z",
-     "shell.execute_reply": "2024-11-05T05:09:04.435311Z"
+     "shell.execute_reply": "2024-11-07T18:45:39.542135Z"
    }
   },
   "outputs": [],
@@ -360,10 +360,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:09:04.438987Z",
+     "iopub.execute_input": "2024-11-07T18:45:39.545416Z",
-     "iopub.status.busy": "2024-11-05T05:09:04.438568Z",
+     "iopub.status.busy": "2024-11-07T18:45:39.545005Z",
-     "iopub.status.idle": "2024-11-05T05:09:04.485291Z",
+     "iopub.status.idle": "2024-11-07T18:45:39.588793Z",
-     "shell.execute_reply": "2024-11-05T05:09:04.484829Z"
+     "shell.execute_reply": "2024-11-07T18:45:39.588054Z"
    }
   },
   "outputs": [],
@@ -392,10 +392,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:09:04.487191Z",
+     "iopub.execute_input": "2024-11-07T18:45:39.590729Z",
-     "iopub.status.busy": "2024-11-05T05:09:04.486929Z",
+     "iopub.status.busy": "2024-11-07T18:45:39.590446Z",
-     "iopub.status.idle": "2024-11-05T05:09:25.553481Z",
+     "iopub.status.idle": "2024-11-07T18:45:59.660376Z",
-     "shell.execute_reply": "2024-11-05T05:09:25.552747Z"
+     "shell.execute_reply": "2024-11-07T18:45:59.659992Z"
    }
   },
   "outputs": [],
@@ -419,10 +419,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:09:25.555813Z",
+     "iopub.execute_input": "2024-11-07T18:45:59.661779Z",
-     "iopub.status.busy": "2024-11-05T05:09:25.555666Z",
+     "iopub.status.busy": "2024-11-07T18:45:59.661641Z",
-     "iopub.status.idle": "2024-11-05T05:09:26.354372Z",
+     "iopub.status.idle": "2024-11-07T18:46:00.475726Z",
-     "shell.execute_reply": "2024-11-05T05:09:26.353693Z"
+     "shell.execute_reply": "2024-11-07T18:46:00.475269Z"
    }
   },
   "outputs": [],
@@ -445,10 +445,7 @@
    "prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n",
    "\n",
    "url = \"http://localhost:30030/classify\"\n",
-    "data = {\n",
+    "data = {\"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \"text\": prompts}\n",
-    "    \"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \n",
-    "    \"text\": prompts\n",
-    "}\n",
    "\n",
    "responses = requests.post(url, json=data).json()\n",
    "for response in responses:\n",
@@ -460,10 +457,10 @@
   "execution_count": 15,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:09:26.356532Z",
+     "iopub.execute_input": "2024-11-07T18:46:00.477283Z",
-     "iopub.status.busy": "2024-11-05T05:09:26.356327Z",
+     "iopub.status.busy": "2024-11-07T18:46:00.477025Z",
-     "iopub.status.idle": "2024-11-05T05:09:26.396590Z",
+     "iopub.status.idle": "2024-11-07T18:46:00.525758Z",
-     "shell.execute_reply": "2024-11-05T05:09:26.395914Z"
+     "shell.execute_reply": "2024-11-07T18:46:00.525236Z"
    }
   },
   "outputs": [],

--- a/docs/backend/offline_engine_api.ipynb
+++ b/docs/backend/offline_engine_api.ipynb
@@ -35,10 +35,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:21:27.503026Z",
+     "iopub.execute_input": "2024-11-07T18:46:04.789536Z",
-     "iopub.status.busy": "2024-11-05T05:21:27.502741Z",
+     "iopub.status.busy": "2024-11-07T18:46:04.789418Z",
-     "iopub.status.idle": "2024-11-05T05:21:49.554631Z",
+     "iopub.status.idle": "2024-11-07T18:46:27.038169Z",
-     "shell.execute_reply": "2024-11-05T05:21:49.553690Z"
+     "shell.execute_reply": "2024-11-07T18:46:27.037540Z"
    }
   },
   "outputs": [],
@@ -64,10 +64,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:21:49.558275Z",
+     "iopub.execute_input": "2024-11-07T18:46:27.040005Z",
-     "iopub.status.busy": "2024-11-05T05:21:49.558110Z",
+     "iopub.status.busy": "2024-11-07T18:46:27.039872Z",
-     "iopub.status.idle": "2024-11-05T05:21:52.717287Z",
+     "iopub.status.idle": "2024-11-07T18:46:30.203840Z",
-     "shell.execute_reply": "2024-11-05T05:21:52.716842Z"
+     "shell.execute_reply": "2024-11-07T18:46:30.203368Z"
    }
   },
   "outputs": [],
@@ -99,10 +99,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:21:52.721738Z",
+     "iopub.execute_input": "2024-11-07T18:46:30.205880Z",
-     "iopub.status.busy": "2024-11-05T05:21:52.720908Z",
+     "iopub.status.busy": "2024-11-07T18:46:30.205719Z",
-     "iopub.status.idle": "2024-11-05T05:22:01.770341Z",
+     "iopub.status.idle": "2024-11-07T18:46:39.256561Z",
-     "shell.execute_reply": "2024-11-05T05:22:01.769510Z"
+     "shell.execute_reply": "2024-11-07T18:46:39.255880Z"
    }
   },
   "outputs": [],
@@ -137,10 +137,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:01.772662Z",
+     "iopub.execute_input": "2024-11-07T18:46:39.259464Z",
-     "iopub.status.busy": "2024-11-05T05:22:01.772377Z",
+     "iopub.status.busy": "2024-11-07T18:46:39.259309Z",
-     "iopub.status.idle": "2024-11-05T05:22:04.897499Z",
+     "iopub.status.idle": "2024-11-07T18:46:42.384955Z",
-     "shell.execute_reply": "2024-11-05T05:22:04.896867Z"
+     "shell.execute_reply": "2024-11-07T18:46:42.384378Z"
    }
   },
   "outputs": [],
@@ -179,10 +179,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:04.899754Z",
+     "iopub.execute_input": "2024-11-07T18:46:42.387431Z",
-     "iopub.status.busy": "2024-11-05T05:22:04.899478Z",
+     "iopub.status.busy": "2024-11-07T18:46:42.387279Z",
-     "iopub.status.idle": "2024-11-05T05:22:13.970245Z",
+     "iopub.status.idle": "2024-11-07T18:46:51.448572Z",
-     "shell.execute_reply": "2024-11-05T05:22:13.969779Z"
+     "shell.execute_reply": "2024-11-07T18:46:51.447781Z"
    }
   },
   "outputs": [],
@@ -216,10 +216,10 @@
   "execution_count": 6,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:13.972039Z",
+     "iopub.execute_input": "2024-11-07T18:46:51.451177Z",
-     "iopub.status.busy": "2024-11-05T05:22:13.971846Z",
+     "iopub.status.busy": "2024-11-07T18:46:51.450952Z",
-     "iopub.status.idle": "2024-11-05T05:22:14.027421Z",
+     "iopub.status.idle": "2024-11-07T18:46:51.497530Z",
-     "shell.execute_reply": "2024-11-05T05:22:14.027003Z"
+     "shell.execute_reply": "2024-11-07T18:46:51.496850Z"
    }
   },
   "outputs": [],

--- a/docs/backend/openai_api_completions.ipynb
+++ b/docs/backend/openai_api_completions.ipynb
@@ -39,10 +39,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:09:30.637832Z",
+     "iopub.execute_input": "2024-11-07T18:46:54.813876Z",
-     "iopub.status.busy": "2024-11-05T05:09:30.637709Z",
+     "iopub.status.busy": "2024-11-07T18:46:54.813741Z",
-     "iopub.status.idle": "2024-11-05T05:09:58.830158Z",
+     "iopub.status.idle": "2024-11-07T18:47:24.015527Z",
-     "shell.execute_reply": "2024-11-05T05:09:58.829395Z"
+     "shell.execute_reply": "2024-11-07T18:47:24.014987Z"
    }
   },
   "outputs": [],
@@ -79,10 +79,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:09:58.833008Z",
+     "iopub.execute_input": "2024-11-07T18:47:24.018153Z",
-     "iopub.status.busy": "2024-11-05T05:09:58.832805Z",
+     "iopub.status.busy": "2024-11-07T18:47:24.017755Z",
-     "iopub.status.idle": "2024-11-05T05:10:00.187146Z",
+     "iopub.status.idle": "2024-11-07T18:47:25.374821Z",
-     "shell.execute_reply": "2024-11-05T05:10:00.186657Z"
+     "shell.execute_reply": "2024-11-07T18:47:25.374397Z"
    }
   },
   "outputs": [],
@@ -119,10 +119,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:00.189444Z",
+     "iopub.execute_input": "2024-11-07T18:47:25.376617Z",
-     "iopub.status.busy": "2024-11-05T05:10:00.189289Z",
+     "iopub.status.busy": "2024-11-07T18:47:25.376495Z",
-     "iopub.status.idle": "2024-11-05T05:10:03.291891Z",
+     "iopub.status.idle": "2024-11-07T18:47:28.482537Z",
-     "shell.execute_reply": "2024-11-05T05:10:03.291173Z"
+     "shell.execute_reply": "2024-11-07T18:47:28.482125Z"
    }
   },
   "outputs": [],
@@ -165,10 +165,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:03.294389Z",
+     "iopub.execute_input": "2024-11-07T18:47:28.484819Z",
-     "iopub.status.busy": "2024-11-05T05:10:03.294237Z",
+     "iopub.status.busy": "2024-11-07T18:47:28.484673Z",
-     "iopub.status.idle": "2024-11-05T05:10:03.469357Z",
+     "iopub.status.idle": "2024-11-07T18:47:28.659814Z",
-     "shell.execute_reply": "2024-11-05T05:10:03.468661Z"
+     "shell.execute_reply": "2024-11-07T18:47:28.659435Z"
    }
   },
   "outputs": [],
@@ -198,10 +198,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:03.471573Z",
+     "iopub.execute_input": "2024-11-07T18:47:28.661844Z",
-     "iopub.status.busy": "2024-11-05T05:10:03.471430Z",
+     "iopub.status.busy": "2024-11-07T18:47:28.661710Z",
-     "iopub.status.idle": "2024-11-05T05:10:04.977081Z",
+     "iopub.status.idle": "2024-11-07T18:47:30.168922Z",
-     "shell.execute_reply": "2024-11-05T05:10:04.976391Z"
+     "shell.execute_reply": "2024-11-07T18:47:30.168600Z"
    }
   },
   "outputs": [],
@@ -234,10 +234,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:04.979428Z",
+     "iopub.execute_input": "2024-11-07T18:47:30.171319Z",
-     "iopub.status.busy": "2024-11-05T05:10:04.979272Z",
+     "iopub.status.busy": "2024-11-07T18:47:30.171176Z",
-     "iopub.status.idle": "2024-11-05T05:10:08.568761Z",
+     "iopub.status.idle": "2024-11-07T18:47:33.760113Z",
-     "shell.execute_reply": "2024-11-05T05:10:08.568355Z"
+     "shell.execute_reply": "2024-11-07T18:47:33.759713Z"
    }
   },
   "outputs": [],
@@ -273,10 +273,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:08.571102Z",
+     "iopub.execute_input": "2024-11-07T18:47:33.762729Z",
-     "iopub.status.busy": "2024-11-05T05:10:08.570964Z",
+     "iopub.status.busy": "2024-11-07T18:47:33.762590Z",
-     "iopub.status.idle": "2024-11-05T05:10:23.214087Z",
+     "iopub.status.idle": "2024-11-07T18:47:34.255316Z",
-     "shell.execute_reply": "2024-11-05T05:10:23.213664Z"
+     "shell.execute_reply": "2024-11-07T18:47:34.254907Z"
    }
   },
   "outputs": [],
@@ -297,7 +297,10 @@
    "response = client.chat.completions.create(\n",
    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
    "    messages=[\n",
-    "        {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"Give me the information of the capital of France in the JSON format.\",\n",
+    "        },\n",
    "    ],\n",
    "    temperature=0,\n",
    "    max_tokens=128,\n",
@@ -322,10 +325,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:23.216229Z",
+     "iopub.execute_input": "2024-11-07T18:47:34.257393Z",
-     "iopub.status.busy": "2024-11-05T05:10:23.216076Z",
+     "iopub.status.busy": "2024-11-07T18:47:34.257246Z",
-     "iopub.status.idle": "2024-11-05T05:10:23.884236Z",
+     "iopub.status.idle": "2024-11-07T18:47:34.413506Z",
-     "shell.execute_reply": "2024-11-05T05:10:23.883897Z"
+     "shell.execute_reply": "2024-11-07T18:47:34.413172Z"
    }
   },
   "outputs": [],
@@ -365,10 +368,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:23.886276Z",
+     "iopub.execute_input": "2024-11-07T18:47:34.414816Z",
-     "iopub.status.busy": "2024-11-05T05:10:23.886136Z",
+     "iopub.status.busy": "2024-11-07T18:47:34.414541Z",
-     "iopub.status.idle": "2024-11-05T05:10:23.905880Z",
+     "iopub.status.idle": "2024-11-07T18:47:34.431341Z",
-     "shell.execute_reply": "2024-11-05T05:10:23.905529Z"
+     "shell.execute_reply": "2024-11-07T18:47:34.431081Z"
    }
   },
   "outputs": [],
@@ -427,10 +430,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:23.907468Z",
+     "iopub.execute_input": "2024-11-07T18:47:34.432325Z",
-     "iopub.status.busy": "2024-11-05T05:10:23.907247Z",
+     "iopub.status.busy": "2024-11-07T18:47:34.432208Z",
-     "iopub.status.idle": "2024-11-05T05:10:26.920212Z",
+     "iopub.status.idle": "2024-11-07T18:47:37.444337Z",
-     "shell.execute_reply": "2024-11-05T05:10:26.919865Z"
+     "shell.execute_reply": "2024-11-07T18:47:37.444000Z"
    }
   },
   "outputs": [],
@@ -482,10 +485,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:26.922675Z",
+     "iopub.execute_input": "2024-11-07T18:47:37.445894Z",
-     "iopub.status.busy": "2024-11-05T05:10:26.922413Z",
+     "iopub.status.busy": "2024-11-07T18:47:37.445744Z",
-     "iopub.status.idle": "2024-11-05T05:10:51.961703Z",
+     "iopub.status.idle": "2024-11-07T18:48:02.482532Z",
-     "shell.execute_reply": "2024-11-05T05:10:51.960846Z"
+     "shell.execute_reply": "2024-11-07T18:48:02.482042Z"
    }
   },
   "outputs": [],
@@ -565,10 +568,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:10:51.964749Z",
+     "iopub.execute_input": "2024-11-07T18:48:02.485206Z",
-     "iopub.status.busy": "2024-11-05T05:10:51.964215Z",
+     "iopub.status.busy": "2024-11-07T18:48:02.485064Z",
-     "iopub.status.idle": "2024-11-05T05:11:05.023450Z",
+     "iopub.status.idle": "2024-11-07T18:48:15.521489Z",
-     "shell.execute_reply": "2024-11-05T05:11:05.023101Z"
+     "shell.execute_reply": "2024-11-07T18:48:15.521156Z"
    }
   },
   "outputs": [],
@@ -660,10 +663,10 @@
   "execution_count": 13,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:05.024877Z",
+     "iopub.execute_input": "2024-11-07T18:48:15.522794Z",
-     "iopub.status.busy": "2024-11-05T05:11:05.024561Z",
+     "iopub.status.busy": "2024-11-07T18:48:15.522657Z",
-     "iopub.status.idle": "2024-11-05T05:11:06.358695Z",
+     "iopub.status.idle": "2024-11-07T18:48:16.875740Z",
-     "shell.execute_reply": "2024-11-05T05:11:06.357635Z"
+     "shell.execute_reply": "2024-11-07T18:48:16.874847Z"
    }
   },
   "outputs": [],

--- a/docs/backend/openai_api_embeddings.ipynb
+++ b/docs/backend/openai_api_embeddings.ipynb
@@ -35,10 +35,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:17.227174Z",
+     "iopub.execute_input": "2024-11-07T18:48:21.128020Z",
-     "iopub.status.busy": "2024-11-05T05:22:17.226952Z",
+     "iopub.status.busy": "2024-11-07T18:48:21.127898Z",
-     "iopub.status.idle": "2024-11-05T05:22:42.445791Z",
+     "iopub.status.idle": "2024-11-07T18:48:45.310371Z",
-     "shell.execute_reply": "2024-11-05T05:22:42.444980Z"
+     "shell.execute_reply": "2024-11-07T18:48:45.309469Z"
    }
   },
   "outputs": [],
@@ -72,10 +72,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:42.448147Z",
+     "iopub.execute_input": "2024-11-07T18:48:45.313506Z",
-     "iopub.status.busy": "2024-11-05T05:22:42.447775Z",
+     "iopub.status.busy": "2024-11-07T18:48:45.313123Z",
-     "iopub.status.idle": "2024-11-05T05:22:42.495311Z",
+     "iopub.status.idle": "2024-11-07T18:48:45.364918Z",
-     "shell.execute_reply": "2024-11-05T05:22:42.495027Z"
+     "shell.execute_reply": "2024-11-07T18:48:45.364155Z"
    }
   },
   "outputs": [],
@@ -106,10 +106,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:42.496666Z",
+     "iopub.execute_input": "2024-11-07T18:48:45.367776Z",
-     "iopub.status.busy": "2024-11-05T05:22:42.496524Z",
+     "iopub.status.busy": "2024-11-07T18:48:45.367490Z",
-     "iopub.status.idle": "2024-11-05T05:22:42.540687Z",
+     "iopub.status.idle": "2024-11-07T18:48:45.411386Z",
-     "shell.execute_reply": "2024-11-05T05:22:42.540060Z"
+     "shell.execute_reply": "2024-11-07T18:48:45.411134Z"
    }
   },
   "outputs": [],
@@ -140,10 +140,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:42.542551Z",
+     "iopub.execute_input": "2024-11-07T18:48:45.412462Z",
-     "iopub.status.busy": "2024-11-05T05:22:42.542282Z",
+     "iopub.status.busy": "2024-11-07T18:48:45.412351Z",
-     "iopub.status.idle": "2024-11-05T05:22:42.928542Z",
+     "iopub.status.idle": "2024-11-07T18:48:45.768796Z",
-     "shell.execute_reply": "2024-11-05T05:22:42.928181Z"
+     "shell.execute_reply": "2024-11-07T18:48:45.768406Z"
    }
   },
   "outputs": [],
@@ -176,10 +176,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:42.930093Z",
+     "iopub.execute_input": "2024-11-07T18:48:45.770227Z",
-     "iopub.status.busy": "2024-11-05T05:22:42.929954Z",
+     "iopub.status.busy": "2024-11-07T18:48:45.770106Z",
-     "iopub.status.idle": "2024-11-05T05:22:44.799945Z",
+     "iopub.status.idle": "2024-11-07T18:48:47.447065Z",
-     "shell.execute_reply": "2024-11-05T05:22:44.799562Z"
+     "shell.execute_reply": "2024-11-07T18:48:47.446733Z"
    }
   },
   "outputs": [],
@@ -208,10 +208,10 @@
   "execution_count": 6,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:44.801418Z",
+     "iopub.execute_input": "2024-11-07T18:48:47.448510Z",
-     "iopub.status.busy": "2024-11-05T05:22:44.801192Z",
+     "iopub.status.busy": "2024-11-07T18:48:47.448337Z",
-     "iopub.status.idle": "2024-11-05T05:22:45.094634Z",
+     "iopub.status.idle": "2024-11-07T18:48:47.743336Z",
-     "shell.execute_reply": "2024-11-05T05:22:45.093950Z"
+     "shell.execute_reply": "2024-11-07T18:48:47.742276Z"
    }
   },
   "outputs": [],

--- a/docs/backend/openai_api_vision.ipynb
+++ b/docs/backend/openai_api_vision.ipynb
@@ -39,10 +39,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:22:49.320999Z",
+     "iopub.execute_input": "2024-11-07T18:43:47.311708Z",
-     "iopub.status.busy": "2024-11-05T05:22:49.320880Z",
+     "iopub.status.busy": "2024-11-07T18:43:47.311517Z",
-     "iopub.status.idle": "2024-11-05T05:23:21.537478Z",
+     "iopub.status.idle": "2024-11-07T18:44:18.512576Z",
-     "shell.execute_reply": "2024-11-05T05:23:21.536956Z"
+     "shell.execute_reply": "2024-11-07T18:44:18.511909Z"
    }
   },
   "outputs": [],
@@ -78,10 +78,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:23:21.539953Z",
+     "iopub.execute_input": "2024-11-07T18:44:18.515678Z",
-     "iopub.status.busy": "2024-11-05T05:23:21.539100Z",
+     "iopub.status.busy": "2024-11-07T18:44:18.515314Z",
-     "iopub.status.idle": "2024-11-05T05:23:25.880179Z",
+     "iopub.status.idle": "2024-11-07T18:44:22.880793Z",
-     "shell.execute_reply": "2024-11-05T05:23:25.879744Z"
+     "shell.execute_reply": "2024-11-07T18:44:22.880303Z"
    }
   },
   "outputs": [],
@@ -129,10 +129,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:23:25.881742Z",
+     "iopub.execute_input": "2024-11-07T18:44:22.883309Z",
-     "iopub.status.busy": "2024-11-05T05:23:25.881595Z",
+     "iopub.status.busy": "2024-11-07T18:44:22.883160Z",
-     "iopub.status.idle": "2024-11-05T05:23:26.758503Z",
+     "iopub.status.idle": "2024-11-07T18:44:27.048810Z",
-     "shell.execute_reply": "2024-11-05T05:23:26.758084Z"
+     "shell.execute_reply": "2024-11-07T18:44:27.048074Z"
    }
   },
   "outputs": [],
@@ -176,10 +176,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:23:26.760098Z",
+     "iopub.execute_input": "2024-11-07T18:44:27.051312Z",
-     "iopub.status.busy": "2024-11-05T05:23:26.759955Z",
+     "iopub.status.busy": "2024-11-07T18:44:27.051190Z",
-     "iopub.status.idle": "2024-11-05T05:23:27.849510Z",
+     "iopub.status.idle": "2024-11-07T18:44:32.358097Z",
-     "shell.execute_reply": "2024-11-05T05:23:27.849117Z"
+     "shell.execute_reply": "2024-11-07T18:44:32.357628Z"
    }
   },
   "outputs": [],
@@ -227,10 +227,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:23:27.850994Z",
+     "iopub.execute_input": "2024-11-07T18:44:32.359532Z",
-     "iopub.status.busy": "2024-11-05T05:23:27.850864Z",
+     "iopub.status.busy": "2024-11-07T18:44:32.359413Z",
-     "iopub.status.idle": "2024-11-05T05:23:31.609137Z",
+     "iopub.status.idle": "2024-11-07T18:44:36.164664Z",
-     "shell.execute_reply": "2024-11-05T05:23:31.608748Z"
+     "shell.execute_reply": "2024-11-07T18:44:36.164005Z"
    }
   },
   "outputs": [],
@@ -276,10 +276,10 @@
   "execution_count": 6,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:23:31.610683Z",
+     "iopub.execute_input": "2024-11-07T18:44:36.167123Z",
-     "iopub.status.busy": "2024-11-05T05:23:31.610560Z",
+     "iopub.status.busy": "2024-11-07T18:44:36.166535Z",
-     "iopub.status.idle": "2024-11-05T05:23:32.965146Z",
+     "iopub.status.idle": "2024-11-07T18:44:37.743761Z",
-     "shell.execute_reply": "2024-11-05T05:23:32.963922Z"
+     "shell.execute_reply": "2024-11-07T18:44:37.742510Z"
    }
   },
   "outputs": [],

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -31,7 +31,7 @@ extensions = [
 ]
 nbsphinx_allow_errors = True
-nbsphinx_execute = 'never'
+nbsphinx_execute = "never"
 autosectionlabel_prefix_document = True
 nbsphinx_allow_directives = True
@@ -49,7 +49,7 @@ myst_enable_extensions = [
 myst_heading_anchors = 3
-nbsphinx_kernel_name = 'python3'
+nbsphinx_kernel_name = "python3"
 nbsphinx_execute_arguments = [
    "--InlineBackend.figure_formats={'svg', 'pdf'}",
    "--InlineBackend.rc={'figure.dpi': 96}",
@@ -130,8 +130,10 @@ html_context = {
 html_static_path = ["_static"]
 html_css_files = ["css/custom_log.css"]
 def setup(app):
-    app.add_css_file('css/custom_log.css')
+    app.add_css_file("css/custom_log.css")
 myst_enable_extensions = [
    "dollarmath",

--- a/docs/start/send_request.ipynb
+++ b/docs/start/send_request.ipynb
@@ -33,10 +33,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:10.680191Z",
+     "iopub.execute_input": "2024-11-07T18:48:52.032229Z",
-     "iopub.status.busy": "2024-11-05T05:11:10.679710Z",
+     "iopub.status.busy": "2024-11-07T18:48:52.032105Z",
-     "iopub.status.idle": "2024-11-05T05:11:39.882385Z",
+     "iopub.status.idle": "2024-11-07T18:49:20.226042Z",
-     "shell.execute_reply": "2024-11-05T05:11:39.881827Z"
+     "shell.execute_reply": "2024-11-07T18:49:20.225562Z"
    }
   },
   "outputs": [],
@@ -49,7 +49,7 @@
    ")\n",
    "\n",
    "server_process = execute_shell_command(\n",
-    "\"\"\"\n",
+    "    \"\"\"\n",
    "python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
    "--port 30000 --host 0.0.0.0\n",
    "\"\"\"\n",
@@ -70,10 +70,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:39.883923Z",
+     "iopub.execute_input": "2024-11-07T18:49:20.228006Z",
-     "iopub.status.busy": "2024-11-05T05:11:39.883721Z",
+     "iopub.status.busy": "2024-11-07T18:49:20.227572Z",
-     "iopub.status.idle": "2024-11-05T05:11:40.124980Z",
+     "iopub.status.idle": "2024-11-07T18:49:20.469885Z",
-     "shell.execute_reply": "2024-11-05T05:11:40.124557Z"
+     "shell.execute_reply": "2024-11-07T18:49:20.469518Z"
    }
   },
   "outputs": [],
@@ -101,10 +101,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:40.126564Z",
+     "iopub.execute_input": "2024-11-07T18:49:20.471956Z",
-     "iopub.status.busy": "2024-11-05T05:11:40.126369Z",
+     "iopub.status.busy": "2024-11-07T18:49:20.471811Z",
-     "iopub.status.idle": "2024-11-05T05:11:40.324316Z",
+     "iopub.status.idle": "2024-11-07T18:49:20.667997Z",
-     "shell.execute_reply": "2024-11-05T05:11:40.323693Z"
+     "shell.execute_reply": "2024-11-07T18:49:20.667630Z"
    }
   },
   "outputs": [],
@@ -115,9 +115,7 @@
    "\n",
    "data = {\n",
    "    \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
-    "    \"messages\": [\n",
+    "    \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}],\n",
-    "        {\"role\": \"user\", \"content\": \"What is the capital of France?\"}\n",
-    "    ]\n",
    "}\n",
    "\n",
    "response = requests.post(url, json=data)\n",
@@ -136,10 +134,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:40.327043Z",
+     "iopub.execute_input": "2024-11-07T18:49:20.669977Z",
-     "iopub.status.busy": "2024-11-05T05:11:40.326759Z",
+     "iopub.status.busy": "2024-11-07T18:49:20.669826Z",
-     "iopub.status.idle": "2024-11-05T05:11:41.687336Z",
+     "iopub.status.idle": "2024-11-07T18:49:22.004855Z",
-     "shell.execute_reply": "2024-11-05T05:11:41.686855Z"
+     "shell.execute_reply": "2024-11-07T18:49:22.004472Z"
    }
   },
   "outputs": [],
@@ -171,10 +169,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:41.688676Z",
+     "iopub.execute_input": "2024-11-07T18:49:22.006983Z",
-     "iopub.status.busy": "2024-11-05T05:11:41.688527Z",
+     "iopub.status.busy": "2024-11-07T18:49:22.006858Z",
-     "iopub.status.idle": "2024-11-05T05:11:42.717140Z",
+     "iopub.status.idle": "2024-11-07T18:49:23.029098Z",
-     "shell.execute_reply": "2024-11-05T05:11:42.716452Z"
+     "shell.execute_reply": "2024-11-07T18:49:23.028697Z"
    }
   },
   "outputs": [],
@@ -197,7 +195,7 @@
    "# Handle the streaming output\n",
    "for chunk in response:\n",
    "    if chunk.choices[0].delta.content:\n",
-    "        print(chunk.choices[0].delta.content, end='', flush=True)"
+    "        print(chunk.choices[0].delta.content, end=\"\", flush=True)"
   ]
  },
  {
@@ -214,10 +212,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:42.720467Z",
+     "iopub.execute_input": "2024-11-07T18:49:23.031712Z",
-     "iopub.status.busy": "2024-11-05T05:11:42.720182Z",
+     "iopub.status.busy": "2024-11-07T18:49:23.031571Z",
-     "iopub.status.idle": "2024-11-05T05:11:43.480765Z",
+     "iopub.status.idle": "2024-11-07T18:49:23.787752Z",
-     "shell.execute_reply": "2024-11-05T05:11:43.480143Z"
+     "shell.execute_reply": "2024-11-07T18:49:23.787368Z"
    }
   },
   "outputs": [],
@@ -250,10 +248,10 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:43.483575Z",
+     "iopub.execute_input": "2024-11-07T18:49:23.789840Z",
-     "iopub.status.busy": "2024-11-05T05:11:43.483295Z",
+     "iopub.status.busy": "2024-11-07T18:49:23.789702Z",
-     "iopub.status.idle": "2024-11-05T05:11:44.242950Z",
+     "iopub.status.idle": "2024-11-07T18:49:24.545631Z",
-     "shell.execute_reply": "2024-11-05T05:11:44.242248Z"
+     "shell.execute_reply": "2024-11-07T18:49:24.545241Z"
    }
   },
   "outputs": [],
@@ -290,10 +288,10 @@
   "execution_count": 8,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2024-11-05T05:11:44.245660Z",
+     "iopub.execute_input": "2024-11-07T18:49:24.547641Z",
-     "iopub.status.busy": "2024-11-05T05:11:44.245373Z",
+     "iopub.status.busy": "2024-11-07T18:49:24.547497Z",
-     "iopub.status.idle": "2024-11-05T05:11:45.591682Z",
+     "iopub.status.idle": "2024-11-07T18:49:25.888864Z",
-     "shell.execute_reply": "2024-11-05T05:11:45.591184Z"
+     "shell.execute_reply": "2024-11-07T18:49:25.888114Z"
    }
   },
   "outputs": [],

--- a/examples/frontend_language/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb
+++ b/examples/frontend_language/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb
@@ -71,7 +71,7 @@
   "source": [
    "import json\n",
    "import os\n",
-	"from typing import List\n",
+    "from typing import List\n",
    "\n",
    "import chromadb\n",
    "\n",
@@ -80,7 +80,7 @@
    "if not os.path.exists(path_qca):\n",
    "    !wget https://virattt.github.io/datasets/abnb-2023-10k.json -O airbnb-2023-10k-qca.json\n",
    "\n",
-    "with open(path_qca, 'r') as f:\n",
+    "with open(path_qca, \"r\") as f:\n",
    "    question_context_answers = json.load(f)\n",
    "\n",
    "chroma_client = chromadb.PersistentClient()\n",
@@ -88,7 +88,7 @@
    "if collection.count() == 0:\n",
    "    collection.add(\n",
    "        documents=[qca[\"context\"] for qca in question_context_answers],\n",
-    "        ids=[str(i) for i in range(len(question_context_answers))]\n",
+    "        ids=[str(i) for i in range(len(question_context_answers))],\n",
    "    )"
   ],
   "metadata": {
@@ -123,7 +123,7 @@
    "\n",
    "load_dotenv()\n",
    "\n",
-    "os.environ['TOKENIZERS_PARALLELISM'] = \"false\"\n",
+    "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
    "\n",
    "p = Parea(api_key=os.getenv(\"PAREA_API_KEY\"), project_name=\"rag_sglang\")\n",
    "p.integrate_with_sglang()\n",
@@ -150,10 +150,7 @@
   "source": [
    "@trace\n",
    "def retrieval(question: str) -> List[str]:\n",
-    "    return collection.query(\n",
+    "    return collection.query(query_texts=[question], n_results=1)[\"documents\"][0]"
-    "        query_texts=[question],\n",
-    "        n_results=1\n",
-    "    )['documents'][0]"
   ],
   "metadata": {
    "collapsed": false
@@ -176,7 +173,9 @@
    "@function\n",
    "def generation_sglang(s, question: str, *context: str):\n",
    "    context = \"\\n\".join(context)\n",
-    "    s += user(f'Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.')\n",
+    "    s += user(\n",
+    "        f\"Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.\"\n",
+    "    )\n",
    "    s += assistant(gen(\"answer\"))\n",
    "\n",
    "\n",
@@ -223,7 +222,9 @@
    "    return generation(question, *contexts)\n",
    "\n",
    "\n",
-    "rag_pipeline(\"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\")"
+    "rag_pipeline(\n",
+    "    \"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\"\n",
+    ")"
   ]
  },
  {
@@ -271,7 +272,10 @@
   "execution_count": null,
   "outputs": [],
   "source": [
-    "from parea.evals.rag import context_query_relevancy_factory, percent_target_supported_by_context_factory\n",
+    "from parea.evals.rag import (\n",
+    "    context_query_relevancy_factory,\n",
+    "    percent_target_supported_by_context_factory,\n",
+    ")\n",
    "\n",
    "\n",
    "context_relevancy_eval = context_query_relevancy_factory()\n",
@@ -280,10 +284,7 @@
    "\n",
    "@trace(eval_funcs=[context_relevancy_eval, percent_target_supported_by_context])\n",
    "def retrieval(question: str) -> List[str]:\n",
-    "    return collection.query(\n",
+    "    return collection.query(query_texts=[question], n_results=1)[\"documents\"][0]"
-    "        query_texts=[question],\n",
-    "        n_results=1\n",
-    "    )['documents'][0]"
   ],
   "metadata": {
    "collapsed": false
@@ -310,10 +311,13 @@
    "answer_context_faithfulness = answer_context_faithfulness_statement_level_factory()\n",
    "answer_matches_target_llm_grader = answer_matches_target_llm_grader_factory()\n",
    "\n",
+    "\n",
    "@function\n",
    "def generation_sglang(s, question: str, *context: str):\n",
    "    context = \"\\n\".join(context)\n",
-    "    s += user(f'Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.')\n",
+    "    s += user(\n",
+    "        f\"Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.\"\n",
+    "    )\n",
    "    s += assistant(gen(\"answer\", max_tokens=1_000))\n",
    "\n",
    "\n",
@@ -357,7 +361,9 @@
    "    return generation(question, *contexts)\n",
    "\n",
    "\n",
-    "rag_pipeline(\"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\")"
+    "rag_pipeline(\n",
+    "    \"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\"\n",
+    ")"
   ],
   "metadata": {
    "collapsed": false
@@ -402,6 +408,7 @@
   "source": [
    "!pip install nest-asyncio\n",
    "import nest_asyncio\n",
+    "\n",
    "nest_asyncio.apply()"
   ],
   "metadata": {
@@ -461,7 +468,7 @@
   ],
   "source": [
    "e = p.experiment(\n",
-    "    'RAG',\n",
+    "    \"RAG\",\n",
    "    data=[\n",
    "        {\n",
    "            \"question\": qca[\"question\"],\n",
@@ -469,7 +476,7 @@
    "        }\n",
    "        for qca in question_context_answers\n",
    "    ],\n",
-    "    func=rag_pipeline\n",
+    "    func=rag_pipeline,\n",
    ").run()"
   ],
   "metadata": {

--- a/examples/runtime/engine/input_ids.py
+++ b/examples/runtime/engine/input_ids.py
@@ -7,6 +7,7 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
 MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
 def main():
    # Sample prompts.
    prompts = [

--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -39,7 +39,7 @@ class ModelConfig:
        revision: Optional[str] = None,
        context_length: Optional[int] = None,
        model_override_args: Optional[dict] = None,
-        is_embedding: Optional[bool] = None
+        is_embedding: Optional[bool] = None,
    ) -> None:
        # Parse args
        self.model_override_args = json.loads(model_override_args)
@@ -52,7 +52,9 @@ class ModelConfig:
        self.hf_text_config = get_hf_text_config(self.hf_config)
        # Check model type
-        self.is_generation = is_generation_model(self.hf_config.architectures, is_embedding)
+        self.is_generation = is_generation_model(
+            self.hf_config.architectures, is_embedding
+        )
        self.is_multimodal = is_multimodal_model(self.hf_config.architectures)
        self.is_encoder_decoder = is_encoder_decoder_model(self.hf_config.architectures)

--- a/python/sglang/srt/layers/quantization/base_config.py
+++ b/python/sglang/srt/layers/quantization/base_config.py
@@ -122,16 +122,14 @@ class QuantizationConfig(ABC):
        """
        raise NotImplementedError
-def method_has_implemented_embedding(
-        method_class: Type[QuantizeMethodBase]) -> bool:
+def method_has_implemented_embedding(method_class: Type[QuantizeMethodBase]) -> bool:
    """
    Not all quant methods have embedding implemented, so we need to check that
    it exists for our given method. We check this by making sure the function
    has been changed from the base implementation.
    """
-    base_embedding = inspect.getattr_static(QuantizeMethodBase, "embedding",
+    base_embedding = inspect.getattr_static(QuantizeMethodBase, "embedding", None)
-                                            None)
    class_embedding = inspect.getattr_static(method_class, "embedding", None)
-    return (class_embedding is not None
+    return class_embedding is not None and class_embedding is not base_embedding
-            and class_embedding is not base_embedding)
--- a/python/sglang/srt/layers/vocab_parallel_embedding.py
+++ b/python/sglang/srt/layers/vocab_parallel_embedding.py
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -86,8 +86,10 @@ class GenerateReqInput:
            self.parallel_sample_num = self.sampling_params.get("n", 1)
        else:  # isinstance(self.sampling_params, list):
            self.parallel_sample_num = self.sampling_params[0].get("n", 1)
-            assert all(self.parallel_sample_num == sampling_params.get("n", 1) for sampling_params in self.sampling_params), (
+            assert all(
-                "The parallel_sample_num should be the same for all samples in sample params.")
+                self.parallel_sample_num == sampling_params.get("n", 1)
+                for sampling_params in self.sampling_params
+            ), "The parallel_sample_num should be the same for all samples in sample params."
        if self.parallel_sample_num > 1 and self.is_single:
            self.is_single = False

--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -911,8 +911,7 @@ class ScheduleBatch:
            keep_indices = [
                i
                for i in range(len(self.reqs))
-                if not self.reqs[i].finished()
+                if not self.reqs[i].finished() and self.reqs[i] is not being_chunked_req
-                and self.reqs[i] is not being_chunked_req
            ]
        if keep_indices is None or len(keep_indices) == 0:
@@ -1043,6 +1042,7 @@ class ScheduleBatch:
        for req in self.reqs:
            req.started_time = time.time()
 @dataclasses.dataclass
 class ModelWorkerBatch:
    # The batch id

--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -224,8 +224,8 @@ class Scheduler:
        self.forward_ct = 0
        self.forward_ct_decode = 0
        self.num_generated_tokens = 0
-        self.last_stats_tic = time.time() # time of last stats for every iter
+        self.last_stats_tic = time.time()  # time of last stats for every iter
-        self.last_log_tic = time.time() # time of last log for print decode log
+        self.last_log_tic = time.time()  # time of last log for print decode log
        self.stream_interval = server_args.stream_interval
        # Init chunked prefill
@@ -566,9 +566,7 @@ class Scheduler:
            and not self.last_batch.is_empty()
        ):
            if self.being_chunked_req:
-                self.last_batch.filter_batch(
+                self.last_batch.filter_batch(being_chunked_req=self.being_chunked_req)
-                    being_chunked_req=self.being_chunked_req
-                )
                self.tree_cache.cache_unfinished_req(self.being_chunked_req)
                # Inflight request keeps its rid but will get a new req_pool_idx.
                self.req_to_token_pool.free(self.being_chunked_req.req_pool_idx)
@@ -628,9 +626,7 @@ class Scheduler:
        has_inflight = self.being_chunked_req is not None
        if has_inflight:
            self.being_chunked_req.init_next_round_input()
-            self.being_chunked_req = adder.add_inflight_req(
+            self.being_chunked_req = adder.add_inflight_req(self.being_chunked_req)
-                self.being_chunked_req
-            )
        if self.lora_paths:
            lora_set = (
@@ -813,7 +809,8 @@ class Scheduler:
            embeddings = self.tp_worker.forward_batch_embedding(model_worker_batch)
            ret = embeddings, model_worker_batch.bid
        return ret
-    def get_stats(self,batch: ScheduleBatch):
+    def get_stats(self, batch: ScheduleBatch):
        # TODO: get stats for chunked prefill
        now = time.time()
@@ -829,8 +826,8 @@ class Scheduler:
        # set stats from prefill
        if self.stats is not None:
            # new_seq=self.stats.new_seq
-            cache_hit_rate=self.stats.cache_hit_rate
+            cache_hit_rate = self.stats.cache_hit_rate
-            token_usage=self.stats.token_usage
+            token_usage = self.stats.token_usage
        # Iteration stats
        num_prompt_tokens_iter = 0
        num_generation_tokens_iter = 0
@@ -851,15 +848,19 @@ class Scheduler:
        # _, next_token_ids, _ = result
        if batch is not None:
            num_generation_tokens_iter = len(batch.output_ids)
-            gen_throughput = round(num_generation_tokens_iter / (now - self.last_stats_tic), 2)
+            gen_throughput = round(
+                num_generation_tokens_iter / (now - self.last_stats_tic), 2
+            )
            for i, req in enumerate(batch.reqs):
                # NOTE: Batch forward mode is extend befor start decode,
                if batch.forward_mode.is_extend():
-                    num_prompt_tokens_iter=len(batch.input_ids)+sum(batch.prefix_lens)
+                    num_prompt_tokens_iter = len(batch.input_ids) + sum(
+                        batch.prefix_lens
+                    )
                    time_to_first_tokens_iter.append(now - req.started_time)
                else:
-                    time_per_output_tokens_iter.append(now-self.last_stats_tic)
+                    time_per_output_tokens_iter.append(now - self.last_stats_tic)
                if req.finished():
                    time_e2e_requests.append(now - req.created_time)
@@ -867,9 +868,10 @@ class Scheduler:
                    num_prompt_tokens_requests.append(len(req.origin_input_ids))
                    num_generation_tokens_requests.append(len(req.output_ids))
                    finished_reason_requests.append(
-                            req.finished_reason.to_json()
+                        req.finished_reason.to_json()
-                            if req.finished_reason is not None
+                        if req.finished_reason is not None
-                            else None)
+                        else None
+                    )
        return Stats(
            new_seq=new_seq,
@@ -893,7 +895,7 @@ class Scheduler:
            max_running_requests=self.max_running_requests,
        )
-    def log_stats(self,stats:Stats):
+    def log_stats(self, stats: Stats):
        self.metrics_collector.log_stats(stats)
    def process_batch_result(self, batch: ScheduleBatch, result):
@@ -1003,9 +1005,7 @@ class Scheduler:
            if req.is_retracted:
                continue
-            if self.server_args.enable_overlap_schedule and (
+            if self.server_args.enable_overlap_schedule and (req.finished()):
-                req.finished()
-            ):
                self.token_to_kv_pool.free(batch.out_cache_loc[i : i + 1])
                continue
@@ -1031,7 +1031,10 @@ class Scheduler:
        self.token_to_kv_pool.free_group_end()
        self.forward_ct_decode = (self.forward_ct_decode + 1) % (1 << 30)
-        if self.tp_rank == 0 and self.forward_ct_decode % self.server_args.decode_log_interval == 0:
+        if (
+            self.tp_rank == 0
+            and self.forward_ct_decode % self.server_args.decode_log_interval == 0
+        ):
            self.print_decode_stats()
    def add_logprob_return_values(

--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -215,7 +215,7 @@ class TokenizerManager:
                logprob_start_len,
                top_logprobs_num,
                obj.stream,
-                obj.lora_path
+                obj.lora_path,
            )
        elif isinstance(obj, EmbeddingReqInput):
            tokenized_obj = TokenizedEmbeddingReqInput(
@@ -290,7 +290,9 @@ class TokenizerManager:
            # Tokenize all requests
            objs = [obj[i] for i in range(batch_size)]
-            tokenized_objs = await asyncio.gather(*(self._tokenize_one_request(obj) for obj in objs))
+            tokenized_objs = await asyncio.gather(
+                *(self._tokenize_one_request(obj) for obj in objs)
+            )
            # Cache the common prefix for parallel sampling
            for i in range(batch_size):
@@ -322,7 +324,9 @@ class TokenizerManager:
            rid_to_index = {rid: i for i, rid in enumerate(rids)}
            task_map = {asyncio.create_task(gen.__anext__()): gen for gen in generators}
            while task_map:
-                done, _ = await asyncio.wait(task_map.keys(), return_when=asyncio.FIRST_COMPLETED)
+                done, _ = await asyncio.wait(
+                    task_map.keys(), return_when=asyncio.FIRST_COMPLETED
+                )
                for task in done:
                    gen = task_map.pop(task)
@@ -367,7 +371,7 @@ class TokenizerManager:
        if self.server_args.dp_size == 1:
            res = await self.mem_pool_size
            return res.size
-        else: # self.server_args.dp_size > 1
+        else:  # self.server_args.dp_size > 1
            self.mem_pool_size_tmp = []
            res = await self.mem_pool_size
            ret = [r.size for r in res]
@@ -399,7 +403,7 @@ class TokenizerManager:
                        self.server_args.load_format = obj.load_format
                        self.model_path = obj.model_path
                    return result.success, result.message
-                else: # self.server_args.dp_size > 1
+                else:  # self.server_args.dp_size > 1
                    self.model_update_tmp = []
                    result = await self.model_update_result
@@ -470,7 +474,7 @@ class TokenizerManager:
            if isinstance(recv_obj, UpdateWeightReqOutput):
                if self.server_args.dp_size == 1:
                    self.model_update_result.set_result(recv_obj)
-                else: # self.server_args.dp_size > 1
+                else:  # self.server_args.dp_size > 1
                    self.model_update_tmp.append(recv_obj)
                    # set future if the all results are recevied
                    if len(self.model_update_tmp) == self.server_args.dp_size:
@@ -479,7 +483,7 @@ class TokenizerManager:
            elif isinstance(recv_obj, GetMemPoolSizeReqOutput):
                if self.server_args.dp_size == 1:
                    self.mem_pool_size.set_result(recv_obj)
-                else: # self.sever_args.dp_size > 1
+                else:  # self.sever_args.dp_size > 1
                    self.mem_pool_size_tmp.append(recv_obj)
                    # set future if the all results are received
                    if len(self.mem_pool_size_tmp) == self.server_args.dp_size:

--- a/python/sglang/srt/metrics/metrics_collector.py
+++ b/python/sglang/srt/metrics/metrics_collector.py
@@ -130,27 +130,65 @@ class Metrics:
        self.counter_prompt_tokens = Counter(
            name="sglang:prompt_tokens_total",
            documentation="Number of prefill tokens processed.",
-            labelnames=labelnames)
+            labelnames=labelnames,
+        )
        self.counter_generation_tokens = Counter(
            name="sglang:generation_tokens_total",
            documentation="Number of generation tokens processed.",
-            labelnames=labelnames)
+            labelnames=labelnames,
+        )
        self.histogram_time_to_first_token = Histogram(
            name="sglang:time_to_first_token_seconds",
            documentation="Histogram of time to first token in seconds.",
            labelnames=labelnames,
            buckets=[
-                0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
+                0.001,
-                0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 20.0, 25.0, 30.0
+                0.005,
-            ])
+                0.01,
+                0.02,
+                0.04,
+                0.06,
+                0.08,
+                0.1,
+                0.25,
+                0.5,
+                0.75,
+                1.0,
+                2.5,
+                5.0,
+                7.5,
+                10.0,
+                15.0,
+                20.0,
+                25.0,
+                30.0,
+            ],
+        )
        self.histogram_time_per_output_token = Histogram(
            name="sglang:time_per_output_token_seconds",
            documentation="Histogram of time per output token in seconds.",
            labelnames=labelnames,
            buckets=[
-                0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.04, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75,
+                0.005,
-                1.0, 2.5
+                0.01,
-            ])
+                0.015,
+                0.02,
+                0.025,
+                0.03,
+                0.04,
+                0.05,
+                0.075,
+                0.1,
+                0.15,
+                0.2,
+                0.3,
+                0.4,
+                0.5,
+                0.75,
+                1.0,
+                2.5,
+            ],
+        )
        # Request Stats
        #   Metadata
@@ -245,14 +283,19 @@ class PrometheusMetricsCollector(MetricsCollector):
            stats.num_generation_tokens_requests,
        )
-        self._log_counter(self.metrics.counter_prompt_tokens,
+        self._log_counter(
-                          stats.num_prompt_tokens_iter)
+            self.metrics.counter_prompt_tokens, stats.num_prompt_tokens_iter
-        self._log_counter(self.metrics.counter_generation_tokens,
+        )
-                          stats.num_generation_tokens_iter)
+        self._log_counter(
-        self._log_histogram(self.metrics.histogram_time_to_first_token,
+            self.metrics.counter_generation_tokens, stats.num_generation_tokens_iter
-                            stats.time_to_first_tokens_iter)
+        )
-        self._log_histogram(self.metrics.histogram_time_per_output_token,
+        self._log_histogram(
-                            stats.time_per_output_tokens_iter)
+            self.metrics.histogram_time_to_first_token, stats.time_to_first_tokens_iter
+        )
+        self._log_histogram(
+            self.metrics.histogram_time_per_output_token,
+            stats.time_per_output_tokens_iter,
+        )
        # self._log_gauge(self.metrics.gpu_cache_usage_sys, stats.gpu_cache_usage_sys)
        self._log_gauge(self.metrics.num_running_sys, stats.num_running_req)

--- a/python/sglang/srt/models/gpt2.py
+++ b/python/sglang/srt/models/gpt2.py
@@ -28,7 +28,7 @@ from vllm.model_executor.layers.activation import get_act_fn
 from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
-#from sglang.srt.layers.activation import get_act_fn
+# from sglang.srt.layers.activation import get_act_fn
 from sglang.srt.layers.linear import (
    ColumnParallelLinear,
    QKVParallelLinear,
@@ -47,15 +47,14 @@ class GPT2Attention(nn.Module):
        self,
        layer_id: int,
        config: GPT2Config,
-        cache_config = None,
+        cache_config=None,
        quant_config: Optional[QuantizationConfig] = None,
        prefix: str = "",
    ):
        super().__init__()
        self.hidden_size = config.hidden_size
        total_num_heads = config.num_attention_heads
-        tensor_model_parallel_world_size = (
+        tensor_model_parallel_world_size = get_tensor_model_parallel_world_size()
-            get_tensor_model_parallel_world_size())
        assert total_num_heads % tensor_model_parallel_world_size == 0
        self.num_heads = total_num_heads // tensor_model_parallel_world_size
        self.head_dim = self.hidden_size // total_num_heads
@@ -76,11 +75,13 @@ class GPT2Attention(nn.Module):
            quant_config=quant_config,
            prefix=f"{prefix}.c_proj",
        )
-        self.attn = RadixAttention(self.num_heads,
+        self.attn = RadixAttention(
-                              self.head_dim,
+            self.num_heads,
-                              scaling=self.scale,
+            self.head_dim,
-                              num_kv_heads=total_num_heads,
+            scaling=self.scale,
-                              layer_id=layer_id)
+            num_kv_heads=total_num_heads,
+            layer_id=layer_id,
+        )
    def forward(
        self,
@@ -119,10 +120,14 @@ class GPT2MLP(nn.Module):
            quant_config=quant_config,
            prefix=f"{prefix}.c_proj",
        )
-        self.act = get_act_fn(config.activation_function, quant_config,
+        self.act = get_act_fn(
-                              intermediate_size)
+            config.activation_function, quant_config, intermediate_size
+        )
-    def forward(self, hidden_states: torch.Tensor,) -> torch.Tensor:
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
        hidden_states, _ = self.c_fc(hidden_states)
        hidden_states = self.act(hidden_states)
        hidden_states, _ = self.c_proj(hidden_states)
@@ -135,27 +140,20 @@ class GPT2Block(nn.Module):
        self,
        layer_id: int,
        config: GPT2Config,
-        cache_config = None,
+        cache_config=None,
        quant_config: Optional[QuantizationConfig] = None,
        prefix: str = "",
    ):
        super().__init__()
        hidden_size = config.hidden_size
-        inner_dim = (config.n_inner if config.n_inner is not None else 4 *
+        inner_dim = config.n_inner if config.n_inner is not None else 4 * hidden_size
-                     hidden_size)
        self.ln_1 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
-        self.attn = GPT2Attention(layer_id,
+        self.attn = GPT2Attention(
-                                  config,
+            layer_id, config, cache_config, quant_config, prefix=f"{prefix}.attn"
-                                  cache_config,
+        )
-                                  quant_config,
-                                  prefix=f"{prefix}.attn")
        self.ln_2 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
-        self.mlp = GPT2MLP(inner_dim,
+        self.mlp = GPT2MLP(inner_dim, config, quant_config, prefix=f"{prefix}.mlp")
-                           config,
-                           quant_config,
-                           prefix=f"{prefix}.mlp")
    def forward(
        self,
@@ -179,13 +177,12 @@ class GPT2Block(nn.Module):
        return hidden_states
 class GPT2Model(nn.Module):
    def __init__(
        self,
        config: GPT2Config,
-        cache_config = None,
+        cache_config=None,
        quant_config: Optional[QuantizationConfig] = None,
        prefix: str = "",
    ):
@@ -229,16 +226,15 @@ class GPT2LMHeadModel(nn.Module):
    def __init__(
        self,
        config: GPT2Config,
-        cache_config = None,
+        cache_config=None,
        quant_config: Optional[QuantizationConfig] = None,
    ):
        super().__init__()
        self.config = config
        self.quant_config = quant_config
-        self.transformer = GPT2Model(config,
+        self.transformer = GPT2Model(
-                                     cache_config,
+            config, cache_config, quant_config, prefix="transformer"
-                                     quant_config,
+        )
-                                     prefix="transformer")
        self.lm_head = self.transformer.wte
        self.logits_processor = LogitsProcessor(config)
@@ -254,8 +250,6 @@ class GPT2LMHeadModel(nn.Module):
            input_ids, hidden_states, self.lm_head.weight, forward_batch
        )
    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
        params_dict = dict(self.named_parameters(remove_duplicate=False))
        for name, loaded_weight in weights:
@@ -280,8 +274,8 @@ class GPT2LMHeadModel(nn.Module):
                if not name.endswith(".weight"):
                    continue
                loaded_weight = loaded_weight.t()
-            weight_loader = getattr(param, "weight_loader",
+            weight_loader = getattr(param, "weight_loader", default_weight_loader)
-                                    default_weight_loader)
            weight_loader(param, loaded_weight)
 EntryClass = GPT2LMHeadModel
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -419,6 +419,7 @@ def launch_engine(
    for i in range(len(scheduler_pipe_readers)):
        scheduler_pipe_readers[i].recv()
 def add_prometheus_middleware(app: FastAPI):
    # Adapted from https://github.com/vllm-project/vllm/blob/v0.6.1/vllm/entrypoints/openai/api_server.py#L216
    from prometheus_client import CollectorRegistry, make_asgi_app, multiprocess
@@ -490,6 +491,7 @@ def launch_server(
    finally:
        t.join()
 def _set_prometheus_env():
    # Set prometheus multiprocess directory
    # sglang uses prometheus multiprocess mode
@@ -506,6 +508,7 @@ def _set_prometheus_env():
        os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_multiproc_dir.name
    logger.debug(f"PROMETHEUS_MULTIPROC_DIR: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")
 def _set_envs_and_config(server_args: ServerArgs):
    # Set global environments
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -763,8 +766,8 @@ class Engine:
        # runtime server default log level is log
        # offline engine works in scripts, so we set it to error
-        if 'log_level' not in kwargs:
+        if "log_level" not in kwargs:
-            kwargs['log_level'] = 'error'
+            kwargs["log_level"] = "error"
        server_args = ServerArgs(*args, **kwargs)
        launch_engine(server_args=server_args)