Unverified Commit c77c1e05 authored by Chayenne's avatar Chayenne Committed by GitHub
Browse files

fix black in pre-commit (#1940)

parent dca87ec3
......@@ -30,6 +30,6 @@ repos:
rev: 24.10.0
hooks:
- id: black
additional_dependencies: ['.[jupyter]']
types: [python, jupyter]
types_or: [python, jupyter]
types: [python]
- id: black-jupyter
types: [jupyter]
......@@ -34,10 +34,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:08.536886Z",
"iopub.status.busy": "2024-11-05T05:08:08.536763Z",
"iopub.status.idle": "2024-11-05T05:08:34.725831Z",
"shell.execute_reply": "2024-11-05T05:08:34.725316Z"
"iopub.execute_input": "2024-11-07T18:44:42.063503Z",
"iopub.status.busy": "2024-11-07T18:44:42.063379Z",
"iopub.status.idle": "2024-11-07T18:45:07.255300Z",
"shell.execute_reply": "2024-11-07T18:45:07.254547Z"
}
},
"outputs": [],
......@@ -73,10 +73,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:34.727530Z",
"iopub.status.busy": "2024-11-05T05:08:34.727333Z",
"iopub.status.idle": "2024-11-05T05:08:35.359784Z",
"shell.execute_reply": "2024-11-05T05:08:35.359090Z"
"iopub.execute_input": "2024-11-07T18:45:07.258292Z",
"iopub.status.busy": "2024-11-07T18:45:07.257710Z",
"iopub.status.idle": "2024-11-07T18:45:07.611559Z",
"shell.execute_reply": "2024-11-07T18:45:07.610842Z"
}
},
"outputs": [],
......@@ -101,10 +101,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.362286Z",
"iopub.status.busy": "2024-11-05T05:08:35.362140Z",
"iopub.status.idle": "2024-11-05T05:08:35.368711Z",
"shell.execute_reply": "2024-11-05T05:08:35.368220Z"
"iopub.execute_input": "2024-11-07T18:45:07.613911Z",
"iopub.status.busy": "2024-11-07T18:45:07.613746Z",
"iopub.status.idle": "2024-11-07T18:45:07.620286Z",
"shell.execute_reply": "2024-11-07T18:45:07.619779Z"
}
},
"outputs": [],
......@@ -132,10 +132,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.371313Z",
"iopub.status.busy": "2024-11-05T05:08:35.370877Z",
"iopub.status.idle": "2024-11-05T05:08:35.376712Z",
"shell.execute_reply": "2024-11-05T05:08:35.376230Z"
"iopub.execute_input": "2024-11-07T18:45:07.622407Z",
"iopub.status.busy": "2024-11-07T18:45:07.622267Z",
"iopub.status.idle": "2024-11-07T18:45:07.628290Z",
"shell.execute_reply": "2024-11-07T18:45:07.627793Z"
}
},
"outputs": [],
......@@ -164,10 +164,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.378982Z",
"iopub.status.busy": "2024-11-05T05:08:35.378597Z",
"iopub.status.idle": "2024-11-05T05:08:35.391820Z",
"shell.execute_reply": "2024-11-05T05:08:35.391336Z"
"iopub.execute_input": "2024-11-07T18:45:07.630585Z",
"iopub.status.busy": "2024-11-07T18:45:07.630235Z",
"iopub.status.idle": "2024-11-07T18:45:07.643498Z",
"shell.execute_reply": "2024-11-07T18:45:07.643007Z"
}
},
"outputs": [],
......@@ -183,10 +183,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.393748Z",
"iopub.status.busy": "2024-11-05T05:08:35.393606Z",
"iopub.status.idle": "2024-11-05T05:08:35.398645Z",
"shell.execute_reply": "2024-11-05T05:08:35.398145Z"
"iopub.execute_input": "2024-11-07T18:45:07.645336Z",
"iopub.status.busy": "2024-11-07T18:45:07.645196Z",
"iopub.status.idle": "2024-11-07T18:45:07.650363Z",
"shell.execute_reply": "2024-11-07T18:45:07.649837Z"
}
},
"outputs": [],
......@@ -211,10 +211,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.400683Z",
"iopub.status.busy": "2024-11-05T05:08:35.400419Z",
"iopub.status.idle": "2024-11-05T05:08:35.406146Z",
"shell.execute_reply": "2024-11-05T05:08:35.405661Z"
"iopub.execute_input": "2024-11-07T18:45:07.652212Z",
"iopub.status.busy": "2024-11-07T18:45:07.652076Z",
"iopub.status.idle": "2024-11-07T18:45:07.658633Z",
"shell.execute_reply": "2024-11-07T18:45:07.658119Z"
}
},
"outputs": [],
......@@ -241,10 +241,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.408176Z",
"iopub.status.busy": "2024-11-05T05:08:35.407884Z",
"iopub.status.idle": "2024-11-05T05:08:35.413587Z",
"shell.execute_reply": "2024-11-05T05:08:35.413108Z"
"iopub.execute_input": "2024-11-07T18:45:07.660468Z",
"iopub.status.busy": "2024-11-07T18:45:07.660325Z",
"iopub.status.idle": "2024-11-07T18:45:07.666476Z",
"shell.execute_reply": "2024-11-07T18:45:07.665984Z"
}
},
"outputs": [],
......@@ -271,10 +271,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:35.416090Z",
"iopub.status.busy": "2024-11-05T05:08:35.415793Z",
"iopub.status.idle": "2024-11-05T05:08:36.552549Z",
"shell.execute_reply": "2024-11-05T05:08:36.551870Z"
"iopub.execute_input": "2024-11-07T18:45:07.668242Z",
"iopub.status.busy": "2024-11-07T18:45:07.668108Z",
"iopub.status.idle": "2024-11-07T18:45:08.725709Z",
"shell.execute_reply": "2024-11-07T18:45:08.725021Z"
}
},
"outputs": [],
......@@ -296,10 +296,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:36.554823Z",
"iopub.status.busy": "2024-11-05T05:08:36.554680Z",
"iopub.status.idle": "2024-11-05T05:08:38.053945Z",
"shell.execute_reply": "2024-11-05T05:08:38.053034Z"
"iopub.execute_input": "2024-11-07T18:45:08.727865Z",
"iopub.status.busy": "2024-11-07T18:45:08.727721Z",
"iopub.status.idle": "2024-11-07T18:45:11.165841Z",
"shell.execute_reply": "2024-11-07T18:45:11.165282Z"
}
},
"outputs": [],
......@@ -335,10 +335,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:08:38.056783Z",
"iopub.status.busy": "2024-11-05T05:08:38.056497Z",
"iopub.status.idle": "2024-11-05T05:09:04.436030Z",
"shell.execute_reply": "2024-11-05T05:09:04.435311Z"
"iopub.execute_input": "2024-11-07T18:45:11.167853Z",
"iopub.status.busy": "2024-11-07T18:45:11.167711Z",
"iopub.status.idle": "2024-11-07T18:45:39.542988Z",
"shell.execute_reply": "2024-11-07T18:45:39.542135Z"
}
},
"outputs": [],
......@@ -360,10 +360,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:09:04.438987Z",
"iopub.status.busy": "2024-11-05T05:09:04.438568Z",
"iopub.status.idle": "2024-11-05T05:09:04.485291Z",
"shell.execute_reply": "2024-11-05T05:09:04.484829Z"
"iopub.execute_input": "2024-11-07T18:45:39.545416Z",
"iopub.status.busy": "2024-11-07T18:45:39.545005Z",
"iopub.status.idle": "2024-11-07T18:45:39.588793Z",
"shell.execute_reply": "2024-11-07T18:45:39.588054Z"
}
},
"outputs": [],
......@@ -392,10 +392,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:09:04.487191Z",
"iopub.status.busy": "2024-11-05T05:09:04.486929Z",
"iopub.status.idle": "2024-11-05T05:09:25.553481Z",
"shell.execute_reply": "2024-11-05T05:09:25.552747Z"
"iopub.execute_input": "2024-11-07T18:45:39.590729Z",
"iopub.status.busy": "2024-11-07T18:45:39.590446Z",
"iopub.status.idle": "2024-11-07T18:45:59.660376Z",
"shell.execute_reply": "2024-11-07T18:45:59.659992Z"
}
},
"outputs": [],
......@@ -419,10 +419,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:09:25.555813Z",
"iopub.status.busy": "2024-11-05T05:09:25.555666Z",
"iopub.status.idle": "2024-11-05T05:09:26.354372Z",
"shell.execute_reply": "2024-11-05T05:09:26.353693Z"
"iopub.execute_input": "2024-11-07T18:45:59.661779Z",
"iopub.status.busy": "2024-11-07T18:45:59.661641Z",
"iopub.status.idle": "2024-11-07T18:46:00.475726Z",
"shell.execute_reply": "2024-11-07T18:46:00.475269Z"
}
},
"outputs": [],
......@@ -445,10 +445,7 @@
"prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n",
"\n",
"url = \"http://localhost:30030/classify\"\n",
"data = {\n",
" \"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \n",
" \"text\": prompts\n",
"}\n",
"data = {\"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \"text\": prompts}\n",
"\n",
"responses = requests.post(url, json=data).json()\n",
"for response in responses:\n",
......@@ -460,10 +457,10 @@
"execution_count": 15,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:09:26.356532Z",
"iopub.status.busy": "2024-11-05T05:09:26.356327Z",
"iopub.status.idle": "2024-11-05T05:09:26.396590Z",
"shell.execute_reply": "2024-11-05T05:09:26.395914Z"
"iopub.execute_input": "2024-11-07T18:46:00.477283Z",
"iopub.status.busy": "2024-11-07T18:46:00.477025Z",
"iopub.status.idle": "2024-11-07T18:46:00.525758Z",
"shell.execute_reply": "2024-11-07T18:46:00.525236Z"
}
},
"outputs": [],
......
......@@ -35,10 +35,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:21:27.503026Z",
"iopub.status.busy": "2024-11-05T05:21:27.502741Z",
"iopub.status.idle": "2024-11-05T05:21:49.554631Z",
"shell.execute_reply": "2024-11-05T05:21:49.553690Z"
"iopub.execute_input": "2024-11-07T18:46:04.789536Z",
"iopub.status.busy": "2024-11-07T18:46:04.789418Z",
"iopub.status.idle": "2024-11-07T18:46:27.038169Z",
"shell.execute_reply": "2024-11-07T18:46:27.037540Z"
}
},
"outputs": [],
......@@ -64,10 +64,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:21:49.558275Z",
"iopub.status.busy": "2024-11-05T05:21:49.558110Z",
"iopub.status.idle": "2024-11-05T05:21:52.717287Z",
"shell.execute_reply": "2024-11-05T05:21:52.716842Z"
"iopub.execute_input": "2024-11-07T18:46:27.040005Z",
"iopub.status.busy": "2024-11-07T18:46:27.039872Z",
"iopub.status.idle": "2024-11-07T18:46:30.203840Z",
"shell.execute_reply": "2024-11-07T18:46:30.203368Z"
}
},
"outputs": [],
......@@ -99,10 +99,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:21:52.721738Z",
"iopub.status.busy": "2024-11-05T05:21:52.720908Z",
"iopub.status.idle": "2024-11-05T05:22:01.770341Z",
"shell.execute_reply": "2024-11-05T05:22:01.769510Z"
"iopub.execute_input": "2024-11-07T18:46:30.205880Z",
"iopub.status.busy": "2024-11-07T18:46:30.205719Z",
"iopub.status.idle": "2024-11-07T18:46:39.256561Z",
"shell.execute_reply": "2024-11-07T18:46:39.255880Z"
}
},
"outputs": [],
......@@ -137,10 +137,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:01.772662Z",
"iopub.status.busy": "2024-11-05T05:22:01.772377Z",
"iopub.status.idle": "2024-11-05T05:22:04.897499Z",
"shell.execute_reply": "2024-11-05T05:22:04.896867Z"
"iopub.execute_input": "2024-11-07T18:46:39.259464Z",
"iopub.status.busy": "2024-11-07T18:46:39.259309Z",
"iopub.status.idle": "2024-11-07T18:46:42.384955Z",
"shell.execute_reply": "2024-11-07T18:46:42.384378Z"
}
},
"outputs": [],
......@@ -179,10 +179,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:04.899754Z",
"iopub.status.busy": "2024-11-05T05:22:04.899478Z",
"iopub.status.idle": "2024-11-05T05:22:13.970245Z",
"shell.execute_reply": "2024-11-05T05:22:13.969779Z"
"iopub.execute_input": "2024-11-07T18:46:42.387431Z",
"iopub.status.busy": "2024-11-07T18:46:42.387279Z",
"iopub.status.idle": "2024-11-07T18:46:51.448572Z",
"shell.execute_reply": "2024-11-07T18:46:51.447781Z"
}
},
"outputs": [],
......@@ -216,10 +216,10 @@
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:13.972039Z",
"iopub.status.busy": "2024-11-05T05:22:13.971846Z",
"iopub.status.idle": "2024-11-05T05:22:14.027421Z",
"shell.execute_reply": "2024-11-05T05:22:14.027003Z"
"iopub.execute_input": "2024-11-07T18:46:51.451177Z",
"iopub.status.busy": "2024-11-07T18:46:51.450952Z",
"iopub.status.idle": "2024-11-07T18:46:51.497530Z",
"shell.execute_reply": "2024-11-07T18:46:51.496850Z"
}
},
"outputs": [],
......
......@@ -39,10 +39,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:09:30.637832Z",
"iopub.status.busy": "2024-11-05T05:09:30.637709Z",
"iopub.status.idle": "2024-11-05T05:09:58.830158Z",
"shell.execute_reply": "2024-11-05T05:09:58.829395Z"
"iopub.execute_input": "2024-11-07T18:46:54.813876Z",
"iopub.status.busy": "2024-11-07T18:46:54.813741Z",
"iopub.status.idle": "2024-11-07T18:47:24.015527Z",
"shell.execute_reply": "2024-11-07T18:47:24.014987Z"
}
},
"outputs": [],
......@@ -79,10 +79,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:09:58.833008Z",
"iopub.status.busy": "2024-11-05T05:09:58.832805Z",
"iopub.status.idle": "2024-11-05T05:10:00.187146Z",
"shell.execute_reply": "2024-11-05T05:10:00.186657Z"
"iopub.execute_input": "2024-11-07T18:47:24.018153Z",
"iopub.status.busy": "2024-11-07T18:47:24.017755Z",
"iopub.status.idle": "2024-11-07T18:47:25.374821Z",
"shell.execute_reply": "2024-11-07T18:47:25.374397Z"
}
},
"outputs": [],
......@@ -119,10 +119,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:00.189444Z",
"iopub.status.busy": "2024-11-05T05:10:00.189289Z",
"iopub.status.idle": "2024-11-05T05:10:03.291891Z",
"shell.execute_reply": "2024-11-05T05:10:03.291173Z"
"iopub.execute_input": "2024-11-07T18:47:25.376617Z",
"iopub.status.busy": "2024-11-07T18:47:25.376495Z",
"iopub.status.idle": "2024-11-07T18:47:28.482537Z",
"shell.execute_reply": "2024-11-07T18:47:28.482125Z"
}
},
"outputs": [],
......@@ -165,10 +165,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:03.294389Z",
"iopub.status.busy": "2024-11-05T05:10:03.294237Z",
"iopub.status.idle": "2024-11-05T05:10:03.469357Z",
"shell.execute_reply": "2024-11-05T05:10:03.468661Z"
"iopub.execute_input": "2024-11-07T18:47:28.484819Z",
"iopub.status.busy": "2024-11-07T18:47:28.484673Z",
"iopub.status.idle": "2024-11-07T18:47:28.659814Z",
"shell.execute_reply": "2024-11-07T18:47:28.659435Z"
}
},
"outputs": [],
......@@ -198,10 +198,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:03.471573Z",
"iopub.status.busy": "2024-11-05T05:10:03.471430Z",
"iopub.status.idle": "2024-11-05T05:10:04.977081Z",
"shell.execute_reply": "2024-11-05T05:10:04.976391Z"
"iopub.execute_input": "2024-11-07T18:47:28.661844Z",
"iopub.status.busy": "2024-11-07T18:47:28.661710Z",
"iopub.status.idle": "2024-11-07T18:47:30.168922Z",
"shell.execute_reply": "2024-11-07T18:47:30.168600Z"
}
},
"outputs": [],
......@@ -234,10 +234,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:04.979428Z",
"iopub.status.busy": "2024-11-05T05:10:04.979272Z",
"iopub.status.idle": "2024-11-05T05:10:08.568761Z",
"shell.execute_reply": "2024-11-05T05:10:08.568355Z"
"iopub.execute_input": "2024-11-07T18:47:30.171319Z",
"iopub.status.busy": "2024-11-07T18:47:30.171176Z",
"iopub.status.idle": "2024-11-07T18:47:33.760113Z",
"shell.execute_reply": "2024-11-07T18:47:33.759713Z"
}
},
"outputs": [],
......@@ -273,10 +273,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:08.571102Z",
"iopub.status.busy": "2024-11-05T05:10:08.570964Z",
"iopub.status.idle": "2024-11-05T05:10:23.214087Z",
"shell.execute_reply": "2024-11-05T05:10:23.213664Z"
"iopub.execute_input": "2024-11-07T18:47:33.762729Z",
"iopub.status.busy": "2024-11-07T18:47:33.762590Z",
"iopub.status.idle": "2024-11-07T18:47:34.255316Z",
"shell.execute_reply": "2024-11-07T18:47:34.254907Z"
}
},
"outputs": [],
......@@ -297,7 +297,10 @@
"response = client.chat.completions.create(\n",
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Give me the information of the capital of France in the JSON format.\",\n",
" },\n",
" ],\n",
" temperature=0,\n",
" max_tokens=128,\n",
......@@ -322,10 +325,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:23.216229Z",
"iopub.status.busy": "2024-11-05T05:10:23.216076Z",
"iopub.status.idle": "2024-11-05T05:10:23.884236Z",
"shell.execute_reply": "2024-11-05T05:10:23.883897Z"
"iopub.execute_input": "2024-11-07T18:47:34.257393Z",
"iopub.status.busy": "2024-11-07T18:47:34.257246Z",
"iopub.status.idle": "2024-11-07T18:47:34.413506Z",
"shell.execute_reply": "2024-11-07T18:47:34.413172Z"
}
},
"outputs": [],
......@@ -365,10 +368,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:23.886276Z",
"iopub.status.busy": "2024-11-05T05:10:23.886136Z",
"iopub.status.idle": "2024-11-05T05:10:23.905880Z",
"shell.execute_reply": "2024-11-05T05:10:23.905529Z"
"iopub.execute_input": "2024-11-07T18:47:34.414816Z",
"iopub.status.busy": "2024-11-07T18:47:34.414541Z",
"iopub.status.idle": "2024-11-07T18:47:34.431341Z",
"shell.execute_reply": "2024-11-07T18:47:34.431081Z"
}
},
"outputs": [],
......@@ -427,10 +430,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:23.907468Z",
"iopub.status.busy": "2024-11-05T05:10:23.907247Z",
"iopub.status.idle": "2024-11-05T05:10:26.920212Z",
"shell.execute_reply": "2024-11-05T05:10:26.919865Z"
"iopub.execute_input": "2024-11-07T18:47:34.432325Z",
"iopub.status.busy": "2024-11-07T18:47:34.432208Z",
"iopub.status.idle": "2024-11-07T18:47:37.444337Z",
"shell.execute_reply": "2024-11-07T18:47:37.444000Z"
}
},
"outputs": [],
......@@ -482,10 +485,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:26.922675Z",
"iopub.status.busy": "2024-11-05T05:10:26.922413Z",
"iopub.status.idle": "2024-11-05T05:10:51.961703Z",
"shell.execute_reply": "2024-11-05T05:10:51.960846Z"
"iopub.execute_input": "2024-11-07T18:47:37.445894Z",
"iopub.status.busy": "2024-11-07T18:47:37.445744Z",
"iopub.status.idle": "2024-11-07T18:48:02.482532Z",
"shell.execute_reply": "2024-11-07T18:48:02.482042Z"
}
},
"outputs": [],
......@@ -565,10 +568,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:10:51.964749Z",
"iopub.status.busy": "2024-11-05T05:10:51.964215Z",
"iopub.status.idle": "2024-11-05T05:11:05.023450Z",
"shell.execute_reply": "2024-11-05T05:11:05.023101Z"
"iopub.execute_input": "2024-11-07T18:48:02.485206Z",
"iopub.status.busy": "2024-11-07T18:48:02.485064Z",
"iopub.status.idle": "2024-11-07T18:48:15.521489Z",
"shell.execute_reply": "2024-11-07T18:48:15.521156Z"
}
},
"outputs": [],
......@@ -660,10 +663,10 @@
"execution_count": 13,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:05.024877Z",
"iopub.status.busy": "2024-11-05T05:11:05.024561Z",
"iopub.status.idle": "2024-11-05T05:11:06.358695Z",
"shell.execute_reply": "2024-11-05T05:11:06.357635Z"
"iopub.execute_input": "2024-11-07T18:48:15.522794Z",
"iopub.status.busy": "2024-11-07T18:48:15.522657Z",
"iopub.status.idle": "2024-11-07T18:48:16.875740Z",
"shell.execute_reply": "2024-11-07T18:48:16.874847Z"
}
},
"outputs": [],
......
......@@ -35,10 +35,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:17.227174Z",
"iopub.status.busy": "2024-11-05T05:22:17.226952Z",
"iopub.status.idle": "2024-11-05T05:22:42.445791Z",
"shell.execute_reply": "2024-11-05T05:22:42.444980Z"
"iopub.execute_input": "2024-11-07T18:48:21.128020Z",
"iopub.status.busy": "2024-11-07T18:48:21.127898Z",
"iopub.status.idle": "2024-11-07T18:48:45.310371Z",
"shell.execute_reply": "2024-11-07T18:48:45.309469Z"
}
},
"outputs": [],
......@@ -72,10 +72,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:42.448147Z",
"iopub.status.busy": "2024-11-05T05:22:42.447775Z",
"iopub.status.idle": "2024-11-05T05:22:42.495311Z",
"shell.execute_reply": "2024-11-05T05:22:42.495027Z"
"iopub.execute_input": "2024-11-07T18:48:45.313506Z",
"iopub.status.busy": "2024-11-07T18:48:45.313123Z",
"iopub.status.idle": "2024-11-07T18:48:45.364918Z",
"shell.execute_reply": "2024-11-07T18:48:45.364155Z"
}
},
"outputs": [],
......@@ -106,10 +106,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:42.496666Z",
"iopub.status.busy": "2024-11-05T05:22:42.496524Z",
"iopub.status.idle": "2024-11-05T05:22:42.540687Z",
"shell.execute_reply": "2024-11-05T05:22:42.540060Z"
"iopub.execute_input": "2024-11-07T18:48:45.367776Z",
"iopub.status.busy": "2024-11-07T18:48:45.367490Z",
"iopub.status.idle": "2024-11-07T18:48:45.411386Z",
"shell.execute_reply": "2024-11-07T18:48:45.411134Z"
}
},
"outputs": [],
......@@ -140,10 +140,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:42.542551Z",
"iopub.status.busy": "2024-11-05T05:22:42.542282Z",
"iopub.status.idle": "2024-11-05T05:22:42.928542Z",
"shell.execute_reply": "2024-11-05T05:22:42.928181Z"
"iopub.execute_input": "2024-11-07T18:48:45.412462Z",
"iopub.status.busy": "2024-11-07T18:48:45.412351Z",
"iopub.status.idle": "2024-11-07T18:48:45.768796Z",
"shell.execute_reply": "2024-11-07T18:48:45.768406Z"
}
},
"outputs": [],
......@@ -176,10 +176,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:42.930093Z",
"iopub.status.busy": "2024-11-05T05:22:42.929954Z",
"iopub.status.idle": "2024-11-05T05:22:44.799945Z",
"shell.execute_reply": "2024-11-05T05:22:44.799562Z"
"iopub.execute_input": "2024-11-07T18:48:45.770227Z",
"iopub.status.busy": "2024-11-07T18:48:45.770106Z",
"iopub.status.idle": "2024-11-07T18:48:47.447065Z",
"shell.execute_reply": "2024-11-07T18:48:47.446733Z"
}
},
"outputs": [],
......@@ -208,10 +208,10 @@
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:44.801418Z",
"iopub.status.busy": "2024-11-05T05:22:44.801192Z",
"iopub.status.idle": "2024-11-05T05:22:45.094634Z",
"shell.execute_reply": "2024-11-05T05:22:45.093950Z"
"iopub.execute_input": "2024-11-07T18:48:47.448510Z",
"iopub.status.busy": "2024-11-07T18:48:47.448337Z",
"iopub.status.idle": "2024-11-07T18:48:47.743336Z",
"shell.execute_reply": "2024-11-07T18:48:47.742276Z"
}
},
"outputs": [],
......
......@@ -39,10 +39,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:22:49.320999Z",
"iopub.status.busy": "2024-11-05T05:22:49.320880Z",
"iopub.status.idle": "2024-11-05T05:23:21.537478Z",
"shell.execute_reply": "2024-11-05T05:23:21.536956Z"
"iopub.execute_input": "2024-11-07T18:43:47.311708Z",
"iopub.status.busy": "2024-11-07T18:43:47.311517Z",
"iopub.status.idle": "2024-11-07T18:44:18.512576Z",
"shell.execute_reply": "2024-11-07T18:44:18.511909Z"
}
},
"outputs": [],
......@@ -78,10 +78,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:23:21.539953Z",
"iopub.status.busy": "2024-11-05T05:23:21.539100Z",
"iopub.status.idle": "2024-11-05T05:23:25.880179Z",
"shell.execute_reply": "2024-11-05T05:23:25.879744Z"
"iopub.execute_input": "2024-11-07T18:44:18.515678Z",
"iopub.status.busy": "2024-11-07T18:44:18.515314Z",
"iopub.status.idle": "2024-11-07T18:44:22.880793Z",
"shell.execute_reply": "2024-11-07T18:44:22.880303Z"
}
},
"outputs": [],
......@@ -129,10 +129,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:23:25.881742Z",
"iopub.status.busy": "2024-11-05T05:23:25.881595Z",
"iopub.status.idle": "2024-11-05T05:23:26.758503Z",
"shell.execute_reply": "2024-11-05T05:23:26.758084Z"
"iopub.execute_input": "2024-11-07T18:44:22.883309Z",
"iopub.status.busy": "2024-11-07T18:44:22.883160Z",
"iopub.status.idle": "2024-11-07T18:44:27.048810Z",
"shell.execute_reply": "2024-11-07T18:44:27.048074Z"
}
},
"outputs": [],
......@@ -176,10 +176,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:23:26.760098Z",
"iopub.status.busy": "2024-11-05T05:23:26.759955Z",
"iopub.status.idle": "2024-11-05T05:23:27.849510Z",
"shell.execute_reply": "2024-11-05T05:23:27.849117Z"
"iopub.execute_input": "2024-11-07T18:44:27.051312Z",
"iopub.status.busy": "2024-11-07T18:44:27.051190Z",
"iopub.status.idle": "2024-11-07T18:44:32.358097Z",
"shell.execute_reply": "2024-11-07T18:44:32.357628Z"
}
},
"outputs": [],
......@@ -227,10 +227,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:23:27.850994Z",
"iopub.status.busy": "2024-11-05T05:23:27.850864Z",
"iopub.status.idle": "2024-11-05T05:23:31.609137Z",
"shell.execute_reply": "2024-11-05T05:23:31.608748Z"
"iopub.execute_input": "2024-11-07T18:44:32.359532Z",
"iopub.status.busy": "2024-11-07T18:44:32.359413Z",
"iopub.status.idle": "2024-11-07T18:44:36.164664Z",
"shell.execute_reply": "2024-11-07T18:44:36.164005Z"
}
},
"outputs": [],
......@@ -276,10 +276,10 @@
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:23:31.610683Z",
"iopub.status.busy": "2024-11-05T05:23:31.610560Z",
"iopub.status.idle": "2024-11-05T05:23:32.965146Z",
"shell.execute_reply": "2024-11-05T05:23:32.963922Z"
"iopub.execute_input": "2024-11-07T18:44:36.167123Z",
"iopub.status.busy": "2024-11-07T18:44:36.166535Z",
"iopub.status.idle": "2024-11-07T18:44:37.743761Z",
"shell.execute_reply": "2024-11-07T18:44:37.742510Z"
}
},
"outputs": [],
......
......@@ -31,7 +31,7 @@ extensions = [
]
nbsphinx_allow_errors = True
nbsphinx_execute = 'never'
nbsphinx_execute = "never"
autosectionlabel_prefix_document = True
nbsphinx_allow_directives = True
......@@ -49,7 +49,7 @@ myst_enable_extensions = [
myst_heading_anchors = 3
nbsphinx_kernel_name = 'python3'
nbsphinx_kernel_name = "python3"
nbsphinx_execute_arguments = [
"--InlineBackend.figure_formats={'svg', 'pdf'}",
"--InlineBackend.rc={'figure.dpi': 96}",
......@@ -130,8 +130,10 @@ html_context = {
html_static_path = ["_static"]
html_css_files = ["css/custom_log.css"]
def setup(app):
app.add_css_file('css/custom_log.css')
app.add_css_file("css/custom_log.css")
myst_enable_extensions = [
"dollarmath",
......
......@@ -33,10 +33,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:10.680191Z",
"iopub.status.busy": "2024-11-05T05:11:10.679710Z",
"iopub.status.idle": "2024-11-05T05:11:39.882385Z",
"shell.execute_reply": "2024-11-05T05:11:39.881827Z"
"iopub.execute_input": "2024-11-07T18:48:52.032229Z",
"iopub.status.busy": "2024-11-07T18:48:52.032105Z",
"iopub.status.idle": "2024-11-07T18:49:20.226042Z",
"shell.execute_reply": "2024-11-07T18:49:20.225562Z"
}
},
"outputs": [],
......@@ -49,7 +49,7 @@
")\n",
"\n",
"server_process = execute_shell_command(\n",
"\"\"\"\n",
" \"\"\"\n",
"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
"--port 30000 --host 0.0.0.0\n",
"\"\"\"\n",
......@@ -70,10 +70,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:39.883923Z",
"iopub.status.busy": "2024-11-05T05:11:39.883721Z",
"iopub.status.idle": "2024-11-05T05:11:40.124980Z",
"shell.execute_reply": "2024-11-05T05:11:40.124557Z"
"iopub.execute_input": "2024-11-07T18:49:20.228006Z",
"iopub.status.busy": "2024-11-07T18:49:20.227572Z",
"iopub.status.idle": "2024-11-07T18:49:20.469885Z",
"shell.execute_reply": "2024-11-07T18:49:20.469518Z"
}
},
"outputs": [],
......@@ -101,10 +101,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:40.126564Z",
"iopub.status.busy": "2024-11-05T05:11:40.126369Z",
"iopub.status.idle": "2024-11-05T05:11:40.324316Z",
"shell.execute_reply": "2024-11-05T05:11:40.323693Z"
"iopub.execute_input": "2024-11-07T18:49:20.471956Z",
"iopub.status.busy": "2024-11-07T18:49:20.471811Z",
"iopub.status.idle": "2024-11-07T18:49:20.667997Z",
"shell.execute_reply": "2024-11-07T18:49:20.667630Z"
}
},
"outputs": [],
......@@ -115,9 +115,7 @@
"\n",
"data = {\n",
" \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" \"messages\": [\n",
" {\"role\": \"user\", \"content\": \"What is the capital of France?\"}\n",
" ]\n",
" \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}],\n",
"}\n",
"\n",
"response = requests.post(url, json=data)\n",
......@@ -136,10 +134,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:40.327043Z",
"iopub.status.busy": "2024-11-05T05:11:40.326759Z",
"iopub.status.idle": "2024-11-05T05:11:41.687336Z",
"shell.execute_reply": "2024-11-05T05:11:41.686855Z"
"iopub.execute_input": "2024-11-07T18:49:20.669977Z",
"iopub.status.busy": "2024-11-07T18:49:20.669826Z",
"iopub.status.idle": "2024-11-07T18:49:22.004855Z",
"shell.execute_reply": "2024-11-07T18:49:22.004472Z"
}
},
"outputs": [],
......@@ -171,10 +169,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:41.688676Z",
"iopub.status.busy": "2024-11-05T05:11:41.688527Z",
"iopub.status.idle": "2024-11-05T05:11:42.717140Z",
"shell.execute_reply": "2024-11-05T05:11:42.716452Z"
"iopub.execute_input": "2024-11-07T18:49:22.006983Z",
"iopub.status.busy": "2024-11-07T18:49:22.006858Z",
"iopub.status.idle": "2024-11-07T18:49:23.029098Z",
"shell.execute_reply": "2024-11-07T18:49:23.028697Z"
}
},
"outputs": [],
......@@ -197,7 +195,7 @@
"# Handle the streaming output\n",
"for chunk in response:\n",
" if chunk.choices[0].delta.content:\n",
" print(chunk.choices[0].delta.content, end='', flush=True)"
" print(chunk.choices[0].delta.content, end=\"\", flush=True)"
]
},
{
......@@ -214,10 +212,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:42.720467Z",
"iopub.status.busy": "2024-11-05T05:11:42.720182Z",
"iopub.status.idle": "2024-11-05T05:11:43.480765Z",
"shell.execute_reply": "2024-11-05T05:11:43.480143Z"
"iopub.execute_input": "2024-11-07T18:49:23.031712Z",
"iopub.status.busy": "2024-11-07T18:49:23.031571Z",
"iopub.status.idle": "2024-11-07T18:49:23.787752Z",
"shell.execute_reply": "2024-11-07T18:49:23.787368Z"
}
},
"outputs": [],
......@@ -250,10 +248,10 @@
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:43.483575Z",
"iopub.status.busy": "2024-11-05T05:11:43.483295Z",
"iopub.status.idle": "2024-11-05T05:11:44.242950Z",
"shell.execute_reply": "2024-11-05T05:11:44.242248Z"
"iopub.execute_input": "2024-11-07T18:49:23.789840Z",
"iopub.status.busy": "2024-11-07T18:49:23.789702Z",
"iopub.status.idle": "2024-11-07T18:49:24.545631Z",
"shell.execute_reply": "2024-11-07T18:49:24.545241Z"
}
},
"outputs": [],
......@@ -290,10 +288,10 @@
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2024-11-05T05:11:44.245660Z",
"iopub.status.busy": "2024-11-05T05:11:44.245373Z",
"iopub.status.idle": "2024-11-05T05:11:45.591682Z",
"shell.execute_reply": "2024-11-05T05:11:45.591184Z"
"iopub.execute_input": "2024-11-07T18:49:24.547641Z",
"iopub.status.busy": "2024-11-07T18:49:24.547497Z",
"iopub.status.idle": "2024-11-07T18:49:25.888864Z",
"shell.execute_reply": "2024-11-07T18:49:25.888114Z"
}
},
"outputs": [],
......
......@@ -71,7 +71,7 @@
"source": [
"import json\n",
"import os\n",
"from typing import List\n",
"from typing import List\n",
"\n",
"import chromadb\n",
"\n",
......@@ -80,7 +80,7 @@
"if not os.path.exists(path_qca):\n",
" !wget https://virattt.github.io/datasets/abnb-2023-10k.json -O airbnb-2023-10k-qca.json\n",
"\n",
"with open(path_qca, 'r') as f:\n",
"with open(path_qca, \"r\") as f:\n",
" question_context_answers = json.load(f)\n",
"\n",
"chroma_client = chromadb.PersistentClient()\n",
......@@ -88,7 +88,7 @@
"if collection.count() == 0:\n",
" collection.add(\n",
" documents=[qca[\"context\"] for qca in question_context_answers],\n",
" ids=[str(i) for i in range(len(question_context_answers))]\n",
" ids=[str(i) for i in range(len(question_context_answers))],\n",
" )"
],
"metadata": {
......@@ -123,7 +123,7 @@
"\n",
"load_dotenv()\n",
"\n",
"os.environ['TOKENIZERS_PARALLELISM'] = \"false\"\n",
"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
"\n",
"p = Parea(api_key=os.getenv(\"PAREA_API_KEY\"), project_name=\"rag_sglang\")\n",
"p.integrate_with_sglang()\n",
......@@ -150,10 +150,7 @@
"source": [
"@trace\n",
"def retrieval(question: str) -> List[str]:\n",
" return collection.query(\n",
" query_texts=[question],\n",
" n_results=1\n",
" )['documents'][0]"
" return collection.query(query_texts=[question], n_results=1)[\"documents\"][0]"
],
"metadata": {
"collapsed": false
......@@ -176,7 +173,9 @@
"@function\n",
"def generation_sglang(s, question: str, *context: str):\n",
" context = \"\\n\".join(context)\n",
" s += user(f'Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.')\n",
" s += user(\n",
" f\"Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.\"\n",
" )\n",
" s += assistant(gen(\"answer\"))\n",
"\n",
"\n",
......@@ -223,7 +222,9 @@
" return generation(question, *contexts)\n",
"\n",
"\n",
"rag_pipeline(\"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\")"
"rag_pipeline(\n",
" \"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\"\n",
")"
]
},
{
......@@ -271,7 +272,10 @@
"execution_count": null,
"outputs": [],
"source": [
"from parea.evals.rag import context_query_relevancy_factory, percent_target_supported_by_context_factory\n",
"from parea.evals.rag import (\n",
" context_query_relevancy_factory,\n",
" percent_target_supported_by_context_factory,\n",
")\n",
"\n",
"\n",
"context_relevancy_eval = context_query_relevancy_factory()\n",
......@@ -280,10 +284,7 @@
"\n",
"@trace(eval_funcs=[context_relevancy_eval, percent_target_supported_by_context])\n",
"def retrieval(question: str) -> List[str]:\n",
" return collection.query(\n",
" query_texts=[question],\n",
" n_results=1\n",
" )['documents'][0]"
" return collection.query(query_texts=[question], n_results=1)[\"documents\"][0]"
],
"metadata": {
"collapsed": false
......@@ -310,10 +311,13 @@
"answer_context_faithfulness = answer_context_faithfulness_statement_level_factory()\n",
"answer_matches_target_llm_grader = answer_matches_target_llm_grader_factory()\n",
"\n",
"\n",
"@function\n",
"def generation_sglang(s, question: str, *context: str):\n",
" context = \"\\n\".join(context)\n",
" s += user(f'Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.')\n",
" s += user(\n",
" f\"Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.\"\n",
" )\n",
" s += assistant(gen(\"answer\", max_tokens=1_000))\n",
"\n",
"\n",
......@@ -357,7 +361,9 @@
" return generation(question, *contexts)\n",
"\n",
"\n",
"rag_pipeline(\"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\")"
"rag_pipeline(\n",
" \"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\"\n",
")"
],
"metadata": {
"collapsed": false
......@@ -402,6 +408,7 @@
"source": [
"!pip install nest-asyncio\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()"
],
"metadata": {
......@@ -461,7 +468,7 @@
],
"source": [
"e = p.experiment(\n",
" 'RAG',\n",
" \"RAG\",\n",
" data=[\n",
" {\n",
" \"question\": qca[\"question\"],\n",
......@@ -469,7 +476,7 @@
" }\n",
" for qca in question_context_answers\n",
" ],\n",
" func=rag_pipeline\n",
" func=rag_pipeline,\n",
").run()"
],
"metadata": {
......
......@@ -7,6 +7,7 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
def main():
# Sample prompts.
prompts = [
......
......@@ -39,7 +39,7 @@ class ModelConfig:
revision: Optional[str] = None,
context_length: Optional[int] = None,
model_override_args: Optional[dict] = None,
is_embedding: Optional[bool] = None
is_embedding: Optional[bool] = None,
) -> None:
# Parse args
self.model_override_args = json.loads(model_override_args)
......@@ -52,7 +52,9 @@ class ModelConfig:
self.hf_text_config = get_hf_text_config(self.hf_config)
# Check model type
self.is_generation = is_generation_model(self.hf_config.architectures, is_embedding)
self.is_generation = is_generation_model(
self.hf_config.architectures, is_embedding
)
self.is_multimodal = is_multimodal_model(self.hf_config.architectures)
self.is_encoder_decoder = is_encoder_decoder_model(self.hf_config.architectures)
......
......@@ -122,16 +122,14 @@ class QuantizationConfig(ABC):
"""
raise NotImplementedError
def method_has_implemented_embedding(
method_class: Type[QuantizeMethodBase]) -> bool:
def method_has_implemented_embedding(method_class: Type[QuantizeMethodBase]) -> bool:
"""
Not all quant methods have embedding implemented, so we need to check that
it exists for our given method. We check this by making sure the function
has been changed from the base implementation.
"""
base_embedding = inspect.getattr_static(QuantizeMethodBase, "embedding",
None)
base_embedding = inspect.getattr_static(QuantizeMethodBase, "embedding", None)
class_embedding = inspect.getattr_static(method_class, "embedding", None)
return (class_embedding is not None
and class_embedding is not base_embedding)
return class_embedding is not None and class_embedding is not base_embedding
......@@ -86,8 +86,10 @@ class GenerateReqInput:
self.parallel_sample_num = self.sampling_params.get("n", 1)
else: # isinstance(self.sampling_params, list):
self.parallel_sample_num = self.sampling_params[0].get("n", 1)
assert all(self.parallel_sample_num == sampling_params.get("n", 1) for sampling_params in self.sampling_params), (
"The parallel_sample_num should be the same for all samples in sample params.")
assert all(
self.parallel_sample_num == sampling_params.get("n", 1)
for sampling_params in self.sampling_params
), "The parallel_sample_num should be the same for all samples in sample params."
if self.parallel_sample_num > 1 and self.is_single:
self.is_single = False
......
......@@ -911,8 +911,7 @@ class ScheduleBatch:
keep_indices = [
i
for i in range(len(self.reqs))
if not self.reqs[i].finished()
and self.reqs[i] is not being_chunked_req
if not self.reqs[i].finished() and self.reqs[i] is not being_chunked_req
]
if keep_indices is None or len(keep_indices) == 0:
......@@ -1043,6 +1042,7 @@ class ScheduleBatch:
for req in self.reqs:
req.started_time = time.time()
@dataclasses.dataclass
class ModelWorkerBatch:
# The batch id
......
......@@ -224,8 +224,8 @@ class Scheduler:
self.forward_ct = 0
self.forward_ct_decode = 0
self.num_generated_tokens = 0
self.last_stats_tic = time.time() # time of last stats for every iter
self.last_log_tic = time.time() # time of last log for print decode log
self.last_stats_tic = time.time() # time of last stats for every iter
self.last_log_tic = time.time() # time of last log for print decode log
self.stream_interval = server_args.stream_interval
# Init chunked prefill
......@@ -566,9 +566,7 @@ class Scheduler:
and not self.last_batch.is_empty()
):
if self.being_chunked_req:
self.last_batch.filter_batch(
being_chunked_req=self.being_chunked_req
)
self.last_batch.filter_batch(being_chunked_req=self.being_chunked_req)
self.tree_cache.cache_unfinished_req(self.being_chunked_req)
# Inflight request keeps its rid but will get a new req_pool_idx.
self.req_to_token_pool.free(self.being_chunked_req.req_pool_idx)
......@@ -628,9 +626,7 @@ class Scheduler:
has_inflight = self.being_chunked_req is not None
if has_inflight:
self.being_chunked_req.init_next_round_input()
self.being_chunked_req = adder.add_inflight_req(
self.being_chunked_req
)
self.being_chunked_req = adder.add_inflight_req(self.being_chunked_req)
if self.lora_paths:
lora_set = (
......@@ -813,7 +809,8 @@ class Scheduler:
embeddings = self.tp_worker.forward_batch_embedding(model_worker_batch)
ret = embeddings, model_worker_batch.bid
return ret
def get_stats(self,batch: ScheduleBatch):
def get_stats(self, batch: ScheduleBatch):
# TODO: get stats for chunked prefill
now = time.time()
......@@ -829,8 +826,8 @@ class Scheduler:
# set stats from prefill
if self.stats is not None:
# new_seq=self.stats.new_seq
cache_hit_rate=self.stats.cache_hit_rate
token_usage=self.stats.token_usage
cache_hit_rate = self.stats.cache_hit_rate
token_usage = self.stats.token_usage
# Iteration stats
num_prompt_tokens_iter = 0
num_generation_tokens_iter = 0
......@@ -851,15 +848,19 @@ class Scheduler:
# _, next_token_ids, _ = result
if batch is not None:
num_generation_tokens_iter = len(batch.output_ids)
gen_throughput = round(num_generation_tokens_iter / (now - self.last_stats_tic), 2)
gen_throughput = round(
num_generation_tokens_iter / (now - self.last_stats_tic), 2
)
for i, req in enumerate(batch.reqs):
# NOTE: Batch forward mode is extend befor start decode,
if batch.forward_mode.is_extend():
num_prompt_tokens_iter=len(batch.input_ids)+sum(batch.prefix_lens)
num_prompt_tokens_iter = len(batch.input_ids) + sum(
batch.prefix_lens
)
time_to_first_tokens_iter.append(now - req.started_time)
else:
time_per_output_tokens_iter.append(now-self.last_stats_tic)
time_per_output_tokens_iter.append(now - self.last_stats_tic)
if req.finished():
time_e2e_requests.append(now - req.created_time)
......@@ -867,9 +868,10 @@ class Scheduler:
num_prompt_tokens_requests.append(len(req.origin_input_ids))
num_generation_tokens_requests.append(len(req.output_ids))
finished_reason_requests.append(
req.finished_reason.to_json()
if req.finished_reason is not None
else None)
req.finished_reason.to_json()
if req.finished_reason is not None
else None
)
return Stats(
new_seq=new_seq,
......@@ -893,7 +895,7 @@ class Scheduler:
max_running_requests=self.max_running_requests,
)
def log_stats(self,stats:Stats):
def log_stats(self, stats: Stats):
self.metrics_collector.log_stats(stats)
def process_batch_result(self, batch: ScheduleBatch, result):
......@@ -1003,9 +1005,7 @@ class Scheduler:
if req.is_retracted:
continue
if self.server_args.enable_overlap_schedule and (
req.finished()
):
if self.server_args.enable_overlap_schedule and (req.finished()):
self.token_to_kv_pool.free(batch.out_cache_loc[i : i + 1])
continue
......@@ -1031,7 +1031,10 @@ class Scheduler:
self.token_to_kv_pool.free_group_end()
self.forward_ct_decode = (self.forward_ct_decode + 1) % (1 << 30)
if self.tp_rank == 0 and self.forward_ct_decode % self.server_args.decode_log_interval == 0:
if (
self.tp_rank == 0
and self.forward_ct_decode % self.server_args.decode_log_interval == 0
):
self.print_decode_stats()
def add_logprob_return_values(
......
......@@ -215,7 +215,7 @@ class TokenizerManager:
logprob_start_len,
top_logprobs_num,
obj.stream,
obj.lora_path
obj.lora_path,
)
elif isinstance(obj, EmbeddingReqInput):
tokenized_obj = TokenizedEmbeddingReqInput(
......@@ -290,7 +290,9 @@ class TokenizerManager:
# Tokenize all requests
objs = [obj[i] for i in range(batch_size)]
tokenized_objs = await asyncio.gather(*(self._tokenize_one_request(obj) for obj in objs))
tokenized_objs = await asyncio.gather(
*(self._tokenize_one_request(obj) for obj in objs)
)
# Cache the common prefix for parallel sampling
for i in range(batch_size):
......@@ -322,7 +324,9 @@ class TokenizerManager:
rid_to_index = {rid: i for i, rid in enumerate(rids)}
task_map = {asyncio.create_task(gen.__anext__()): gen for gen in generators}
while task_map:
done, _ = await asyncio.wait(task_map.keys(), return_when=asyncio.FIRST_COMPLETED)
done, _ = await asyncio.wait(
task_map.keys(), return_when=asyncio.FIRST_COMPLETED
)
for task in done:
gen = task_map.pop(task)
......@@ -367,7 +371,7 @@ class TokenizerManager:
if self.server_args.dp_size == 1:
res = await self.mem_pool_size
return res.size
else: # self.server_args.dp_size > 1
else: # self.server_args.dp_size > 1
self.mem_pool_size_tmp = []
res = await self.mem_pool_size
ret = [r.size for r in res]
......@@ -399,7 +403,7 @@ class TokenizerManager:
self.server_args.load_format = obj.load_format
self.model_path = obj.model_path
return result.success, result.message
else: # self.server_args.dp_size > 1
else: # self.server_args.dp_size > 1
self.model_update_tmp = []
result = await self.model_update_result
......@@ -470,7 +474,7 @@ class TokenizerManager:
if isinstance(recv_obj, UpdateWeightReqOutput):
if self.server_args.dp_size == 1:
self.model_update_result.set_result(recv_obj)
else: # self.server_args.dp_size > 1
else: # self.server_args.dp_size > 1
self.model_update_tmp.append(recv_obj)
# set future if the all results are recevied
if len(self.model_update_tmp) == self.server_args.dp_size:
......@@ -479,7 +483,7 @@ class TokenizerManager:
elif isinstance(recv_obj, GetMemPoolSizeReqOutput):
if self.server_args.dp_size == 1:
self.mem_pool_size.set_result(recv_obj)
else: # self.sever_args.dp_size > 1
else: # self.sever_args.dp_size > 1
self.mem_pool_size_tmp.append(recv_obj)
# set future if the all results are received
if len(self.mem_pool_size_tmp) == self.server_args.dp_size:
......
......@@ -130,27 +130,65 @@ class Metrics:
self.counter_prompt_tokens = Counter(
name="sglang:prompt_tokens_total",
documentation="Number of prefill tokens processed.",
labelnames=labelnames)
labelnames=labelnames,
)
self.counter_generation_tokens = Counter(
name="sglang:generation_tokens_total",
documentation="Number of generation tokens processed.",
labelnames=labelnames)
labelnames=labelnames,
)
self.histogram_time_to_first_token = Histogram(
name="sglang:time_to_first_token_seconds",
documentation="Histogram of time to first token in seconds.",
labelnames=labelnames,
buckets=[
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 20.0, 25.0, 30.0
])
0.001,
0.005,
0.01,
0.02,
0.04,
0.06,
0.08,
0.1,
0.25,
0.5,
0.75,
1.0,
2.5,
5.0,
7.5,
10.0,
15.0,
20.0,
25.0,
30.0,
],
)
self.histogram_time_per_output_token = Histogram(
name="sglang:time_per_output_token_seconds",
documentation="Histogram of time per output token in seconds.",
labelnames=labelnames,
buckets=[
0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.04, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75,
1.0, 2.5
])
0.005,
0.01,
0.015,
0.02,
0.025,
0.03,
0.04,
0.05,
0.075,
0.1,
0.15,
0.2,
0.3,
0.4,
0.5,
0.75,
1.0,
2.5,
],
)
# Request Stats
# Metadata
......@@ -245,14 +283,19 @@ class PrometheusMetricsCollector(MetricsCollector):
stats.num_generation_tokens_requests,
)
self._log_counter(self.metrics.counter_prompt_tokens,
stats.num_prompt_tokens_iter)
self._log_counter(self.metrics.counter_generation_tokens,
stats.num_generation_tokens_iter)
self._log_histogram(self.metrics.histogram_time_to_first_token,
stats.time_to_first_tokens_iter)
self._log_histogram(self.metrics.histogram_time_per_output_token,
stats.time_per_output_tokens_iter)
self._log_counter(
self.metrics.counter_prompt_tokens, stats.num_prompt_tokens_iter
)
self._log_counter(
self.metrics.counter_generation_tokens, stats.num_generation_tokens_iter
)
self._log_histogram(
self.metrics.histogram_time_to_first_token, stats.time_to_first_tokens_iter
)
self._log_histogram(
self.metrics.histogram_time_per_output_token,
stats.time_per_output_tokens_iter,
)
# self._log_gauge(self.metrics.gpu_cache_usage_sys, stats.gpu_cache_usage_sys)
self._log_gauge(self.metrics.num_running_sys, stats.num_running_req)
......
......@@ -28,7 +28,7 @@ from vllm.model_executor.layers.activation import get_act_fn
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
#from sglang.srt.layers.activation import get_act_fn
# from sglang.srt.layers.activation import get_act_fn
from sglang.srt.layers.linear import (
ColumnParallelLinear,
QKVParallelLinear,
......@@ -47,15 +47,14 @@ class GPT2Attention(nn.Module):
self,
layer_id: int,
config: GPT2Config,
cache_config = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
super().__init__()
self.hidden_size = config.hidden_size
total_num_heads = config.num_attention_heads
tensor_model_parallel_world_size = (
get_tensor_model_parallel_world_size())
tensor_model_parallel_world_size = get_tensor_model_parallel_world_size()
assert total_num_heads % tensor_model_parallel_world_size == 0
self.num_heads = total_num_heads // tensor_model_parallel_world_size
self.head_dim = self.hidden_size // total_num_heads
......@@ -76,11 +75,13 @@ class GPT2Attention(nn.Module):
quant_config=quant_config,
prefix=f"{prefix}.c_proj",
)
self.attn = RadixAttention(self.num_heads,
self.head_dim,
scaling=self.scale,
num_kv_heads=total_num_heads,
layer_id=layer_id)
self.attn = RadixAttention(
self.num_heads,
self.head_dim,
scaling=self.scale,
num_kv_heads=total_num_heads,
layer_id=layer_id,
)
def forward(
self,
......@@ -119,10 +120,14 @@ class GPT2MLP(nn.Module):
quant_config=quant_config,
prefix=f"{prefix}.c_proj",
)
self.act = get_act_fn(config.activation_function, quant_config,
intermediate_size)
self.act = get_act_fn(
config.activation_function, quant_config, intermediate_size
)
def forward(self, hidden_states: torch.Tensor,) -> torch.Tensor:
def forward(
self,
hidden_states: torch.Tensor,
) -> torch.Tensor:
hidden_states, _ = self.c_fc(hidden_states)
hidden_states = self.act(hidden_states)
hidden_states, _ = self.c_proj(hidden_states)
......@@ -135,27 +140,20 @@ class GPT2Block(nn.Module):
self,
layer_id: int,
config: GPT2Config,
cache_config = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
super().__init__()
hidden_size = config.hidden_size
inner_dim = (config.n_inner if config.n_inner is not None else 4 *
hidden_size)
inner_dim = config.n_inner if config.n_inner is not None else 4 * hidden_size
self.ln_1 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
self.attn = GPT2Attention(layer_id,
config,
cache_config,
quant_config,
prefix=f"{prefix}.attn")
self.attn = GPT2Attention(
layer_id, config, cache_config, quant_config, prefix=f"{prefix}.attn"
)
self.ln_2 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
self.mlp = GPT2MLP(inner_dim,
config,
quant_config,
prefix=f"{prefix}.mlp")
self.mlp = GPT2MLP(inner_dim, config, quant_config, prefix=f"{prefix}.mlp")
def forward(
self,
......@@ -179,13 +177,12 @@ class GPT2Block(nn.Module):
return hidden_states
class GPT2Model(nn.Module):
def __init__(
self,
config: GPT2Config,
cache_config = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
......@@ -229,16 +226,15 @@ class GPT2LMHeadModel(nn.Module):
def __init__(
self,
config: GPT2Config,
cache_config = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
self.config = config
self.quant_config = quant_config
self.transformer = GPT2Model(config,
cache_config,
quant_config,
prefix="transformer")
self.transformer = GPT2Model(
config, cache_config, quant_config, prefix="transformer"
)
self.lm_head = self.transformer.wte
self.logits_processor = LogitsProcessor(config)
......@@ -254,8 +250,6 @@ class GPT2LMHeadModel(nn.Module):
input_ids, hidden_states, self.lm_head.weight, forward_batch
)
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
params_dict = dict(self.named_parameters(remove_duplicate=False))
for name, loaded_weight in weights:
......@@ -280,8 +274,8 @@ class GPT2LMHeadModel(nn.Module):
if not name.endswith(".weight"):
continue
loaded_weight = loaded_weight.t()
weight_loader = getattr(param, "weight_loader",
default_weight_loader)
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, loaded_weight)
EntryClass = GPT2LMHeadModel
......@@ -419,6 +419,7 @@ def launch_engine(
for i in range(len(scheduler_pipe_readers)):
scheduler_pipe_readers[i].recv()
def add_prometheus_middleware(app: FastAPI):
# Adapted from https://github.com/vllm-project/vllm/blob/v0.6.1/vllm/entrypoints/openai/api_server.py#L216
from prometheus_client import CollectorRegistry, make_asgi_app, multiprocess
......@@ -490,6 +491,7 @@ def launch_server(
finally:
t.join()
def _set_prometheus_env():
# Set prometheus multiprocess directory
# sglang uses prometheus multiprocess mode
......@@ -506,6 +508,7 @@ def _set_prometheus_env():
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_multiproc_dir.name
logger.debug(f"PROMETHEUS_MULTIPROC_DIR: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")
def _set_envs_and_config(server_args: ServerArgs):
# Set global environments
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
......@@ -763,8 +766,8 @@ class Engine:
# runtime server default log level is log
# offline engine works in scripts, so we set it to error
if 'log_level' not in kwargs:
kwargs['log_level'] = 'error'
if "log_level" not in kwargs:
kwargs["log_level"] = "error"
server_args = ServerArgs(*args, **kwargs)
launch_engine(server_args=server_args)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment