Unverified Commit f9633fa9 authored by Byron Hsu's avatar Byron Hsu Committed by GitHub
Browse files

[rust] cache-aware DP - approx tree (#1934)

parent 087ab832
import itertools
import json
import random
import string
import threading
import time
from argparse import ArgumentParser
import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenize
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
select_sglang_backend,
)
from sglang.utils import dump_state_text
random.seed(42)
def gen_prompt(tokenizer, token_num):
all_available_tokens = list(tokenizer.get_vocab().values())
selected_tokens = random.choices(all_available_tokens, k=token_num)
ret = tokenizer.decode(selected_tokens)
return ret
def gen_arguments(args, tokenizer):
multi_qas = [
{"system_prompt": gen_prompt(tokenizer, args.system_prompt_len), "qas": []}
for _ in range(args.num_qa)
]
for i in range(args.num_qa):
qas = multi_qas[i]["qas"]
for j in range(args.turns):
qas.append(
{
"prompt": gen_prompt(tokenizer, args.len_q),
"new_tokens": args.len_a,
}
)
return multi_qas
@sgl.function
def multi_turns(s, system_prompt, qas):
s += system_prompt
for qa in qas:
s += qa["prompt"]
s += sgl.gen(max_tokens=qa["new_tokens"], ignore_eos=True)
def main(args):
tokenizer = get_tokenizer(args.tokenizer, trust_remote_code=args.trust_remote_code)
multi_qas = gen_arguments(args, tokenizer)
backend = select_sglang_backend(args)
tic = time.time()
states = multi_turns.run_batch(
multi_qas,
temperature=0,
backend=backend,
num_threads=args.parallel,
progress_bar=True,
)
latency = time.time() - tic
print(f"Latency: {latency:.3f}")
dump_state_text(f"tmp_output_{args.backend}.txt", states)
with open(args.result_file, "a") as fout:
value = {
"task": "multi_turn_system_prompt_chat",
"backend": args.backend,
"num_gpus": 1,
"latency": round(latency, 3),
"num_requests": args.num_qa,
"num_turns": args.turns,
"other": {
"parallel": args.parallel,
},
}
fout.write(json.dumps(value) + "\n")
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--turns", type=int, default=8)
parser.add_argument("--num-qa", type=int, default=128)
parser.add_argument("--system-prompt-len", type=int, default=2048)
parser.add_argument("--len-q", type=int, default=32)
parser.add_argument("--len-a", type=int, default=128)
parser.add_argument(
"--tokenizer", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct"
)
parser.add_argument("--trust-remote-code", action="store_true")
args = add_common_sglang_args_and_parse(parser)
print(args)
main(args)
......@@ -8,7 +8,7 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a"
dependencies = [
"bitflags",
"bitflags 2.6.0",
"bytes",
"futures-core",
"futures-sink",
......@@ -30,8 +30,8 @@ dependencies = [
"actix-service",
"actix-utils",
"ahash",
"base64",
"bitflags",
"base64 0.22.1",
"bitflags 2.6.0",
"brotli",
"bytes",
"bytestring",
......@@ -43,7 +43,7 @@ dependencies = [
"http 0.2.12",
"httparse",
"httpdate",
"itoa 1.0.11",
"itoa",
"language-tags",
"local-channel",
"mime",
......@@ -156,7 +156,7 @@ dependencies = [
"futures-core",
"futures-util",
"impl-more",
"itoa 1.0.11",
"itoa",
"language-tags",
"log",
"mime",
......@@ -239,9 +239,9 @@ dependencies = [
[[package]]
name = "anstream"
version = "0.6.15"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
dependencies = [
"anstyle",
"anstyle-parse",
......@@ -254,36 +254,36 @@ dependencies = [
[[package]]
name = "anstyle"
version = "1.0.8"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anstyle-parse"
version = "0.2.5"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.1"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.4"
version = "3.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
dependencies = [
"anstyle",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
......@@ -310,15 +310,27 @@ dependencies = [
"miniz_oxide",
"object",
"rustc-demangle",
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
name = "base64"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.6.0"
......@@ -384,9 +396,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.1.31"
version = "1.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
checksum = "baee610e9452a8f6f0a1b6194ec09ff9e2d85dea54432acdae41aa0761c95d70"
dependencies = [
"jobserver",
"libc",
......@@ -441,9 +453,22 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "colorchoice"
version = "1.0.2"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.52.0",
]
[[package]]
name = "convert_case"
......@@ -496,6 +521,31 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "crypto-common"
version = "0.1.6"
......@@ -506,6 +556,41 @@ dependencies = [
"typenum",
]
[[package]]
name = "darling"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "deranged"
version = "0.3.11"
......@@ -515,6 +600,37 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "derive_builder"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn",
]
[[package]]
name = "derive_more"
version = "0.99.18"
......@@ -539,16 +655,54 @@ dependencies = [
]
[[package]]
name = "dtoa"
version = "0.4.8"
name = "dirs"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-sys"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.48.0",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.34"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
......@@ -569,6 +723,15 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "esaxx-rs"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
dependencies = [
"cc",
]
[[package]]
name = "fastrand"
version = "2.1.1"
......@@ -743,9 +906,9 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.15.0"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
[[package]]
name = "heck"
......@@ -759,6 +922,23 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "hf-hub"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732"
dependencies = [
"dirs",
"indicatif",
"log",
"native-tls",
"rand",
"serde",
"serde_json",
"thiserror",
"ureq",
]
[[package]]
name = "http"
version = "0.2.12"
......@@ -767,7 +947,7 @@ checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
dependencies = [
"bytes",
"fnv",
"itoa 1.0.11",
"itoa",
]
[[package]]
......@@ -778,7 +958,7 @@ checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258"
dependencies = [
"bytes",
"fnv",
"itoa 1.0.11",
"itoa",
]
[[package]]
......@@ -829,7 +1009,7 @@ dependencies = [
"http 1.1.0",
"http-body",
"httparse",
"itoa 1.0.11",
"itoa",
"pin-project-lite",
"smallvec",
"tokio",
......@@ -871,9 +1051,9 @@ dependencies = [
[[package]]
name = "hyper-util"
version = "0.1.9"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b"
checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
dependencies = [
"bytes",
"futures-channel",
......@@ -888,14 +1068,149 @@ dependencies = [
"tracing",
]
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.5.0"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
dependencies = [
"unicode-bidi",
"unicode-normalization",
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
......@@ -914,12 +1229,34 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-width",
]
[[package]]
name = "indoc"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
]
[[package]]
name = "ipnet"
version = "2.10.1"
......@@ -933,10 +1270,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "0.3.4"
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8324a32baf01e2ae060e9de58ed0bc2320c9a2833491ee36cd3b4c414de4db8c"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
dependencies = [
"either",
]
[[package]]
name = "itoa"
......@@ -968,18 +1317,40 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4345964bb142484797b161f473a503a434de77149dd8c7427788c6e13379388"
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.161"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
[[package]]
name = "libredox"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
dependencies = [
"bitflags 2.6.0",
"libc",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "litemap"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
[[package]]
name = "local-channel"
version = "0.1.5"
......@@ -1013,6 +1384,22 @@ version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "macro_rules_attribute"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13"
dependencies = [
"macro_rules_attribute-proc_macro",
"paste",
]
[[package]]
name = "macro_rules_attribute-proc_macro"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
[[package]]
name = "memchr"
version = "2.7.4"
......@@ -1034,6 +1421,12 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.8.0"
......@@ -1056,6 +1449,27 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "monostate"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e"
dependencies = [
"monostate-impl",
"serde",
]
[[package]]
name = "monostate-impl"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "native-tls"
version = "0.2.12"
......@@ -1074,28 +1488,26 @@ dependencies = [
]
[[package]]
name = "num-conv"
version = "0.1.0"
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.1.43"
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
dependencies = [
"num-traits 0.2.19",
]
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-traits"
version = "0.2.19"
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "object"
......@@ -1112,13 +1524,35 @@ version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "onig"
version = "6.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
dependencies = [
"bitflags 1.3.2",
"libc",
"once_cell",
"onig_sys",
]
[[package]]
name = "onig_sys"
version = "69.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "openssl"
version = "0.10.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5"
dependencies = [
"bitflags",
"bitflags 2.6.0",
"cfg-if",
"foreign-types",
"libc",
......@@ -1156,6 +1590,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "option-ext"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
name = "parking_lot"
version = "0.12.3"
......@@ -1176,7 +1616,7 @@ dependencies = [
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
......@@ -1193,9 +1633,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pin-project-lite"
version = "0.2.14"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff"
[[package]]
name = "pin-utils"
......@@ -1241,9 +1681,9 @@ dependencies = [
[[package]]
name = "pyo3"
version = "0.22.5"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51"
checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884"
dependencies = [
"cfg-if",
"indoc",
......@@ -1259,9 +1699,9 @@ dependencies = [
[[package]]
name = "pyo3-build-config"
version = "0.22.5"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179"
checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38"
dependencies = [
"once_cell",
"target-lexicon",
......@@ -1269,9 +1709,9 @@ dependencies = [
[[package]]
name = "pyo3-ffi"
version = "0.22.5"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d"
checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636"
dependencies = [
"libc",
"pyo3-build-config",
......@@ -1279,9 +1719,9 @@ dependencies = [
[[package]]
name = "pyo3-macros"
version = "0.22.5"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e"
checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
......@@ -1291,9 +1731,9 @@ dependencies = [
[[package]]
name = "pyo3-macros-backend"
version = "0.22.5"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce"
checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe"
dependencies = [
"heck",
"proc-macro2",
......@@ -1341,20 +1781,62 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-cond"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
dependencies = [
"either",
"itertools 0.11.0",
"rayon",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f"
dependencies = [
"bitflags",
"bitflags 2.6.0",
]
[[package]]
name = "redox_users"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
dependencies = [
"getrandom",
"libredox",
"thiserror",
]
[[package]]
name = "regex"
version = "1.11.0"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
......@@ -1387,11 +1869,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.8"
version = "0.12.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b"
checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f"
dependencies = [
"base64",
"base64 0.22.1",
"bytes",
"encoding_rs",
"futures-core",
......@@ -1462,11 +1944,11 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.37"
version = "0.38.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee"
dependencies = [
"bitflags",
"bitflags 2.6.0",
"errno",
"libc",
"linux-raw-sys",
......@@ -1475,11 +1957,13 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.23.15"
version = "0.23.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993"
checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e"
dependencies = [
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki",
"subtle",
......@@ -1539,7 +2023,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags",
"bitflags 2.6.0",
"core-foundation",
"core-foundation-sys",
"libc",
......@@ -1564,18 +2048,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "serde"
version = "1.0.213"
version = "1.0.214"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1"
checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.213"
version = "1.0.214"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5"
checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
dependencies = [
"proc-macro2",
"quote",
......@@ -1584,13 +2068,13 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.1"
version = "1.0.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c62115693d0a9ed8c32d1c760f0fdbe7d4b05cb13c135b9b54137ac0d59fccb"
checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
dependencies = [
"dtoa",
"itoa 0.3.4",
"num-traits 0.1.43",
"itoa",
"memchr",
"ryu",
"serde",
]
......@@ -1601,7 +2085,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
dependencies = [
"form_urlencoded",
"itoa 1.0.11",
"itoa",
"ryu",
"serde",
]
......@@ -1619,6 +2103,7 @@ dependencies = [
"reqwest",
"serde",
"serde_json",
"tokenizers",
]
[[package]]
......@@ -1678,6 +2163,24 @@ version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "spm_precompiled"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
dependencies = [
"base64 0.13.1",
"nom",
"serde",
"unicode-segmentation",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "strsim"
version = "0.11.1"
......@@ -1692,9 +2195,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "2.0.85"
version = "2.0.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56"
checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
dependencies = [
"proc-macro2",
"quote",
......@@ -1710,13 +2213,24 @@ dependencies = [
"futures-core",
]
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "system-configuration"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
"bitflags",
"bitflags 2.6.0",
"core-foundation",
"system-configuration-sys",
]
......@@ -1750,6 +2264,26 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "thiserror"
version = "1.0.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "time"
version = "0.3.36"
......@@ -1757,7 +2291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
dependencies = [
"deranged",
"itoa 1.0.11",
"itoa",
"num-conv",
"powerfmt",
"serde",
......@@ -1782,19 +2316,47 @@ dependencies = [
]
[[package]]
name = "tinyvec"
version = "1.8.0"
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"tinyvec_macros",
"displaydoc",
"zerovec",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
name = "tokenizers"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
checksum = "67b67c92f6d705e2a1d106fb0b28c696f9074901a9c656ee5d9f5de204c39bf7"
dependencies = [
"aho-corasick",
"derive_builder",
"esaxx-rs",
"getrandom",
"hf-hub",
"indicatif",
"itertools 0.12.1",
"lazy_static",
"log",
"macro_rules_attribute",
"monostate",
"onig",
"paste",
"rand",
"rayon",
"rayon-cond",
"regex",
"regex-syntax",
"serde",
"serde_json",
"spm_precompiled",
"thiserror",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode_categories",
]
[[package]]
name = "tokio"
......@@ -1885,12 +2447,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicode-bidi"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893"
[[package]]
name = "unicode-ident"
version = "1.0.13"
......@@ -1898,14 +2454,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "unicode-normalization"
version = "0.1.24"
name = "unicode-normalization-alignments"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
dependencies = [
"tinyvec",
"smallvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unindent"
version = "0.2.3"
......@@ -1918,17 +2492,48 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "ureq"
version = "2.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a"
dependencies = [
"base64 0.22.1",
"flate2",
"log",
"native-tls",
"once_cell",
"rustls",
"rustls-pki-types",
"serde",
"serde_json",
"url",
"webpki-roots",
]
[[package]]
name = "url"
version = "2.5.2"
version = "2.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "utf8parse"
version = "0.2.2"
......@@ -2031,9 +2636,9 @@ checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d"
[[package]]
name = "wasm-streams"
version = "0.4.1"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
......@@ -2052,6 +2657,15 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958"
dependencies = [
"rustls-pki-types",
]
[[package]]
name = "windows-registry"
version = "0.2.0"
......@@ -2060,7 +2674,7 @@ checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
......@@ -2069,7 +2683,7 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
......@@ -2079,7 +2693,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
......@@ -2088,7 +2711,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
......@@ -2097,7 +2720,22 @@ version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
......@@ -2106,28 +2744,46 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc 0.52.6",
"windows_i686_gnu 0.52.6",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
"windows_i686_msvc 0.52.6",
"windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc 0.52.6",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
......@@ -2140,30 +2796,90 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "yoke"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.7.35"
......@@ -2185,12 +2901,55 @@ dependencies = [
"syn",
]
[[package]]
name = "zerofrom"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zeroize"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
[[package]]
name = "zerovec"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "zstd"
version = "0.13.2"
......
......@@ -21,5 +21,6 @@ bytes = "1.8.0"
rand = "0.8.5"
reqwest = { version = "0.12.8", features = ["stream"] }
futures-util = "0.3"
serde_json = "=1.0.1"
serde_json = "1.0"
pyo3 = { version = "0.22.5", features = ["extension-module"] }
tokenizers = { version = "0.20.3", features = ["http"] }
import argparse
import os
import signal
import subprocess
import sys
import time
from typing import Dict, List
import requests
from sglang_router import PolicyType, Router
# Global processes list for cleanup
_processes: List[subprocess.Popen] = []
def cleanup_processes(signum=None, frame=None):
"""Cleanup function to kill all worker processes."""
print("\nCleaning up processes...")
for process in _processes:
try:
# Kill the entire process group
pgid = os.getpgid(process.pid)
os.killpg(pgid, signal.SIGKILL)
process.wait()
except:
pass
sys.exit(1)
# Register signal handlers
signal.signal(signal.SIGINT, cleanup_processes)
signal.signal(signal.SIGTERM, cleanup_processes)
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Launch SGLang Router Server")
parser.add_argument(
"--host", type=str, default="localhost", help="Host address to bind the server"
)
parser.add_argument(
"--port", type=int, default=30000, help="Base port number for workers"
)
parser.add_argument(
"--dp",
type=int,
default=2,
help="Number of worker processes (degree of parallelism)",
)
parser.add_argument(
"--model-path", type=str, required=True, help="Path to the model"
)
parser.add_argument(
"--local-tokenizer-path",
type=str,
required=True,
help="Path to the local tokenizer",
)
return parser.parse_args()
def launch_workers(args) -> tuple[List[subprocess.Popen], List[str]]:
"""Launch all worker processes concurrently using subprocess."""
processes = []
worker_urls = []
# Launch each worker process
for i in range(args.dp):
port = args.port + i
url = f"http://{args.host}:{port}"
worker_urls.append(url)
# TODO: replace this with launch_server, and move this file to sglang/ because it depends on sglang
# We don't
command = f"export CUDA_VISIBLE_DEVICES={i}; python -m sglang.launch_server --model-path {args.model_path} --host {args.host} --port {port}"
print(command)
process = subprocess.Popen(command, shell=True)
processes.append(process)
_processes.append(process) # Add to global list for cleanup
return processes, worker_urls
def wait_for_healthy_workers(worker_urls: List[str], timeout: int = 300) -> bool:
"""Block until all workers are healthy or timeout is reached."""
start_time = time.time()
healthy_workers: Dict[str, bool] = {url: False for url in worker_urls}
while time.time() - start_time < timeout:
print("checking healthiness...")
all_healthy = True
for url in worker_urls:
if not healthy_workers[url]: # Only check workers that aren't healthy yet
try:
response = requests.get(f"{url}/health")
if response.status_code == 200:
print(f"Worker at {url} is healthy")
healthy_workers[url] = True
else:
all_healthy = False
except requests.RequestException:
all_healthy = False
if all_healthy:
print("All workers are healthy!")
return True
time.sleep(5)
# If we get here, we've timed out
unhealthy_workers = [url for url, healthy in healthy_workers.items() if not healthy]
print(f"Timeout waiting for workers: {unhealthy_workers}")
return False
def main():
"""Main function to launch the router and workers."""
args = parse_args()
processes = None
try:
# Launch all workers concurrently
processes, worker_urls = launch_workers(args)
# Block until all workers are healthy
if not wait_for_healthy_workers(worker_urls):
raise RuntimeError("Failed to start all workers")
# Initialize and start the router
router = Router(
worker_urls=worker_urls,
policy=PolicyType.ApproxTree,
tokenizer_path=args.local_tokenizer_path,
)
print("Starting router...")
router.start()
# Keep the main process running
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
print("\nShutting down...")
except Exception as e:
print(f"Error: {e}")
finally:
# Cleanup: Kill all worker processes
if processes:
for process in processes:
process.kill()
if __name__ == "__main__":
main()
from sglang_router import PolicyType, Router
router = Router(
worker_urls=[
"http://localhost:30000",
"http://localhost:30001",
],
policy=PolicyType.ApproxTree,
tokenizer_path="/shared/public/elr-models/meta-llama/Meta-Llama-3.1-8B-Instruct/07eb05b21d191a58c577b4a45982fe0c049d0693/tokenizer.json",
)
router.start()
......@@ -2,6 +2,11 @@
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
## Architecture
1. `src/`: rust impl of the router
2. `py_src/`: lightweight python interafce on top of rust python binding. This will be published as `sglang-router` pypi package
## Installation
WIP. Ideally just
......@@ -83,6 +88,23 @@ $ maturin develop
🛠 Installed sglang_router-0.0.0
```
4. Alternatively, if you don't want to create a venv, you can also build the binding as a wheel and install it
```bash
$ maturin build --interpreter python
...
Compiling pyo3 v0.22.6
Compiling pyo3-macros v0.22.6
Compiling sglang_router v0.0.0 (/home/jobuser/sglang/rust)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.67s
🖨 Copied external shared libraries to package sglang_router.libs directory:
/usr/lib/libssl.so.1.1.1k
/usr/lib/libcrypto.so.1.1.1k
📦 Built wheel for CPython 3.10 to <wheel path>
$ pip install <wheel path>
```
## Usage
1. Launch worker instances
......
sglang @ 760552e0
Subproject commit 760552e068edb58d9cd6e68aa1b714c247027d92
// Python Binding
use pyo3::prelude::*;
pub mod router;
mod server;
pub mod tree;
// Python binding
#[pyclass(eq)]
#[derive(Clone, PartialEq)]
pub enum PolicyType {
Random,
RoundRobin,
ApproxTree,
}
#[pyclass]
struct Router {
host: String,
port: u16,
worker_urls: Vec<String>,
policy: String,
policy: PolicyType,
tokenizer_path: Option<String>,
cache_threshold: Option<f32>,
}
#[pymethods]
impl Router {
#[new]
fn new(host: String, port: u16, worker_urls: Vec<String>, policy: String) -> Self {
Router {
#[pyo3(signature = (
worker_urls,
policy = PolicyType::RoundRobin,
host = String::from("127.0.0.1"),
port = 3001,
tokenizer_path = None,
cache_threshold = Some(0.50)
))]
fn new(
worker_urls: Vec<String>,
policy: PolicyType,
host: String,
port: u16,
tokenizer_path: Option<String>,
cache_threshold: Option<f32>,
) -> PyResult<Self> {
// Validate required parameters for approx_tree policy
if matches!(policy, PolicyType::ApproxTree) {
if tokenizer_path.is_none() {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
"tokenizer_path is required for approx_tree policy",
));
}
}
Ok(Router {
host,
port,
worker_urls,
policy,
}
tokenizer_path,
cache_threshold,
})
}
fn start(&self) -> PyResult<()> {
let host = self.host.clone();
let port = self.port;
let worker_urls = self.worker_urls.clone();
let policy = self.policy.clone();
let policy_config = match &self.policy {
PolicyType::Random => router::PolicyConfig::RandomConfig,
PolicyType::RoundRobin => router::PolicyConfig::RoundRobinConfig,
PolicyType::ApproxTree => router::PolicyConfig::ApproxTreeConfig {
tokenizer_path: self
.tokenizer_path
.clone()
.expect("tokenizer_path is required for approx_tree policy"),
cache_threshold: self
.cache_threshold
.expect("cache_threshold is required for approx_tree policy"),
},
};
actix_web::rt::System::new().block_on(async move {
server::startup(host, port, worker_urls, policy)
server::startup(host, port, worker_urls, policy_config)
.await
.unwrap();
});
......@@ -40,9 +89,9 @@ impl Router {
}
}
// python usage: `from sglang_router import Router`
#[pymodule]
fn sglang_router(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PolicyType>()?;
m.add_class::<Router>()?;
Ok(())
}
// src/main.rs
use clap::builder::PossibleValuesParser;
use clap::Parser;
use clap::ValueEnum;
// declare child modules
mod router;
mod server;
mod tree;
use crate::router::PolicyConfig;
#[derive(Debug, Clone, ValueEnum)]
pub enum PolicyType {
Random,
RoundRobin,
ApproxTree,
}
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
#[arg(long, default_value = "127.0.0.1")]
#[arg(
long,
default_value = "127.0.0.1",
help = "Host address to bind the server to"
)]
host: String,
#[arg(long, default_value_t = 3001)]
#[arg(long, default_value_t = 3001, help = "Port number to listen on")]
port: u16,
#[arg(long, value_delimiter = ',')]
#[arg(
long,
value_delimiter = ',',
help = "Comma-separated list of worker URLs to distribute requests to"
)]
worker_urls: Vec<String>,
#[arg(long, default_value = "round_robin", value_parser = PossibleValuesParser::new(&["round_robin", "random"]))]
policy: String,
#[arg(
long,
default_value_t = PolicyType::RoundRobin,
value_enum,
help = "Load balancing policy to use: random, round_robin, or approx_tree"
)]
policy: PolicyType,
#[arg(
long,
requires = "policy",
required_if_eq("policy", "approx_tree"),
help = "Path to the tokenizer file, required when using approx_tree policy"
)]
tokenizer_path: Option<String>,
#[arg(
long,
default_value = "0.50",
requires = "policy",
required_if_eq("policy", "approx_tree"),
help = "Cache threshold (0.0-1.0) for approx_tree routing. Routes to cached worker if match rate exceeds threshold, otherwise routes to shortest queue worker"
)]
cache_threshold: Option<f32>,
}
impl Args {
fn get_policy_config(&self) -> PolicyConfig {
match self.policy {
PolicyType::Random => PolicyConfig::RandomConfig,
PolicyType::RoundRobin => PolicyConfig::RoundRobinConfig,
PolicyType::ApproxTree => PolicyConfig::ApproxTreeConfig {
tokenizer_path: self
.tokenizer_path
.clone()
.expect("tokenizer_path is required for approx_tree policy"),
cache_threshold: self
.cache_threshold
.expect("cache_threshold is required for approx_tree policy"),
},
}
}
}
#[actix_web::main]
async fn main() -> std::io::Result<()> {
let args = Args::parse();
server::startup(args.host, args.port, args.worker_urls, args.policy).await
let policy_config = args.get_policy_config();
server::startup(args.host, args.port, args.worker_urls, policy_config).await
}
use crate::tree::RadixTree;
use actix_web::http::header::{HeaderValue, CONTENT_TYPE};
use actix_web::{HttpRequest, HttpResponse};
use bytes::Bytes;
use futures_util::TryStreamExt;
use std::collections::HashMap;
use std::fmt::Debug;
use std::sync::atomic::AtomicUsize;
use std::sync::{Arc, Mutex};
use tokenizers::tokenizer::Tokenizer;
#[derive(Debug)]
pub enum Router {
RoundRobin {
worker_urls: Vec<String>,
current_index: std::sync::atomic::AtomicUsize,
current_index: AtomicUsize,
},
Random {
worker_urls: Vec<String>,
},
ApproxTree {
worker_urls: Vec<String>,
// TODO: don't lock the whole tree
url_to_tree: Arc<Mutex<HashMap<String, RadixTree>>>,
tokenizer: Tokenizer,
url_to_count: Arc<Mutex<HashMap<String, usize>>>,
cache_threshold: f32,
},
}
pub enum PolicyConfig {
RandomConfig,
RoundRobinConfig,
ApproxTreeConfig {
tokenizer_path: String,
cache_threshold: f32,
},
}
fn get_token_ids_from_request(body: &Bytes, tokenizer: &Tokenizer) -> Vec<u32> {
// 1. convert body to json
let json = serde_json::from_slice::<serde_json::Value>(body).unwrap();
// 2. get the text field
let text = json.get("text").and_then(|t| t.as_str()).unwrap_or("");
// 3. tokenize the text field
let tokens = tokenizer.encode(text, false).unwrap();
tokens.get_ids().to_vec()
}
impl Router {
pub fn new(worker_urls: Vec<String>, policy: String) -> Self {
match policy.to_lowercase().as_str() {
"random" => Router::Random { worker_urls },
"round_robin" => Router::RoundRobin {
pub fn new(worker_urls: Vec<String>, policy_config: PolicyConfig) -> Self {
match policy_config {
PolicyConfig::RandomConfig => Router::Random { worker_urls },
PolicyConfig::RoundRobinConfig => Router::RoundRobin {
worker_urls,
current_index: std::sync::atomic::AtomicUsize::new(0),
},
_ => panic!(
"Unknown routing policy: {}. The available policies are 'random' and 'round_robin'",
policy
),
PolicyConfig::ApproxTreeConfig {
tokenizer_path,
cache_threshold,
} => {
let mut url_to_tree = HashMap::new();
let mut url_to_count = HashMap::new();
for url in &worker_urls {
url_to_tree.insert(url.clone(), RadixTree::new());
url_to_count.insert(url.clone(), 0);
}
Router::ApproxTree {
worker_urls,
url_to_tree: Arc::new(Mutex::new(url_to_tree)),
// TODO: rust ::from_pretrained cannot load from local file, so use ::from_file to load local file
tokenizer: Tokenizer::from_file(tokenizer_path).unwrap(),
url_to_count: Arc::new(Mutex::new(url_to_count)),
cache_threshold,
}
}
}
}
pub fn get_first(&self) -> Option<String> {
match self {
Router::RoundRobin { worker_urls, .. } | Router::Random { worker_urls } => {
Router::RoundRobin { worker_urls, .. }
| Router::Random { worker_urls }
| Router::ApproxTree { worker_urls, .. } => {
if worker_urls.is_empty() {
None
} else {
......@@ -48,26 +100,96 @@ impl Router {
req: HttpRequest,
body: Bytes,
) -> HttpResponse {
let mut input_ids: Vec<u32> = Vec::new();
if let Router::ApproxTree { tokenizer, .. } = self {
input_ids = get_token_ids_from_request(&body, tokenizer);
}
let worker_url = match self {
Router::RoundRobin {
worker_urls,
current_index,
} => {
current_index
let idx = current_index
.fetch_update(
std::sync::atomic::Ordering::SeqCst,
std::sync::atomic::Ordering::SeqCst,
|x| Some((x + 1) % worker_urls.len()),
)
.expect_err("Error updating index in round robin");
.unwrap();
&worker_urls[current_index.load(std::sync::atomic::Ordering::SeqCst)]
worker_urls[idx].clone()
}
Router::Random { worker_urls } => {
&worker_urls[rand::random::<usize>() % worker_urls.len()]
worker_urls[rand::random::<usize>() % worker_urls.len()].clone()
}
Router::ApproxTree {
worker_urls,
url_to_tree,
url_to_count,
cache_threshold,
..
} => {
// TODO: pipeline the locks. Release one earlier.
let mut max_matched_rate = 0.0;
let mut max_matched_idx = 0;
let locked_url_to_tree = url_to_tree.lock().unwrap();
// 1. Find the highest matched worker
for (i, url) in worker_urls.iter().enumerate() {
let tree = locked_url_to_tree.get(url).unwrap();
let matched = tree.prefix_match(&input_ids[..]).len();
let matched_rate = matched as f32 / input_ids.len() as f32;
if matched_rate > max_matched_rate {
max_matched_rate = matched_rate;
max_matched_idx = i;
}
}
// 2. If the rate is higher than the threshold, select the worker. If not, select the worker with the shortest queue
if max_matched_rate > *cache_threshold {
worker_urls[max_matched_idx].clone()
} else {
// pick the shortest queue from url_to_count
let locked_url_to_count = url_to_count.lock().unwrap();
let mut min_count = std::usize::MAX;
let mut min_count_id = 0;
for (i, url) in worker_urls.iter().enumerate() {
let count = locked_url_to_count.get(url).unwrap();
if *count < min_count {
min_count = *count;
min_count_id = i;
}
}
worker_urls[min_count_id].clone()
}
}
};
if let Router::ApproxTree {
url_to_tree,
url_to_count,
..
} = self
{
// Insert input_ids to the tree
let mut locked_url_to_tree = url_to_tree.lock().unwrap();
let selected_tree = locked_url_to_tree.get_mut(&worker_url).unwrap();
selected_tree.insert(&input_ids[..]);
let mut locked_url_to_count = url_to_count.lock().unwrap();
let count = locked_url_to_count.get_mut(&worker_url).unwrap();
*count += 1;
}
// Check if client requested streaming
let is_stream = serde_json::from_slice::<serde_json::Value>(&body)
.map(|v| v.get("stream").and_then(|s| s.as_bool()).unwrap_or(false))
......@@ -94,11 +216,19 @@ impl Router {
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
if !is_stream {
// TODO: do the correction on the tree based on the cached input_ids
if let Router::ApproxTree { url_to_count, .. } = self {
let mut locked_url_to_count = url_to_count.lock().unwrap();
let count = locked_url_to_count.get_mut(&worker_url).unwrap();
*count -= 1;
}
match res.bytes().await {
Ok(body) => HttpResponse::build(status).body(body.to_vec()),
Err(_) => HttpResponse::InternalServerError().finish(),
}
} else {
// TODO: do the correction on the tree based on the cached input_ids. The streaming might be tricker to handle
HttpResponse::build(status)
.insert_header((CONTENT_TYPE, HeaderValue::from_static("text/event-stream")))
.streaming(res.bytes_stream().map_err(|_| {
......
use crate::router::PolicyConfig;
use crate::router::Router;
use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer, Responder};
use bytes::Bytes;
......@@ -9,9 +10,13 @@ pub struct AppState {
}
impl AppState {
pub fn new(worker_urls: Vec<String>, policy: String, client: reqwest::Client) -> Self {
pub fn new(
worker_urls: Vec<String>,
client: reqwest::Client,
policy_config: PolicyConfig,
) -> Self {
// Create router based on policy
let router = Router::new(worker_urls, policy);
let router = Router::new(worker_urls, policy_config);
Self { router, client }
}
......@@ -40,7 +45,6 @@ async fn forward_request(
#[get("/v1/models")]
async fn v1_model(data: web::Data<AppState>) -> impl Responder {
// TODO: extract forward_to_route
let worker_url = match data.router.get_first() {
Some(url) => url,
None => return HttpResponse::InternalServerError().finish(),
......@@ -59,7 +63,6 @@ async fn get_model_info(data: web::Data<AppState>) -> impl Responder {
forward_request(&data.client, worker_url, "/get_model_info".to_string()).await
}
// no deser and ser, just forward and return
#[post("/generate")]
async fn generate(req: HttpRequest, body: Bytes, data: web::Data<AppState>) -> impl Responder {
data.router.dispatch(&data.client, req, body).await
......@@ -69,7 +72,7 @@ pub async fn startup(
host: String,
port: u16,
worker_urls: Vec<String>,
routing_policy: String,
policy_config: PolicyConfig,
) -> std::io::Result<()> {
println!("Starting server on {}:{}", host, port);
println!("Worker URLs: {:?}", worker_urls);
......@@ -80,7 +83,7 @@ pub async fn startup(
.expect("Failed to create HTTP client");
// Store both worker_urls and client in AppState
let app_state = web::Data::new(AppState::new(worker_urls, routing_policy, client));
let app_state = web::Data::new(AppState::new(worker_urls, client, policy_config));
HttpServer::new(move || {
App::new()
......
use std::collections::HashMap;
use std::mem;
#[derive(Clone)]
#[derive(Debug)]
pub struct Node {
pub children: HashMap<usize, Node>, // the key is first id of the child because each child must have unique first id
pub ids: Vec<usize>,
pub count: usize,
pub children: HashMap<u32, Node>, // the key is first id of the child because each child must have unique first id
pub ids: Vec<u32>,
pub count: u32,
}
#[derive(Debug)]
pub struct RadixTree {
pub root: Node,
}
fn common_prefix_len(a: &[usize], b: &[usize]) -> usize {
fn common_prefix_len(a: &[u32], b: &[u32]) -> usize {
let mut i = 0;
while i < a.len() && i < b.len() && a[i] == b[i] {
i += 1;
......@@ -37,7 +38,7 @@ impl RadixTree {
}
}
pub fn insert(&mut self, input_ids: &[usize]) {
pub fn insert(&mut self, input_ids: &[u32]) {
let mut curr = &mut self.root;
curr.count += 1;
......@@ -93,7 +94,7 @@ impl RadixTree {
}
}
pub fn prefix_match<'a>(&self, input_ids: &'a [usize]) -> &'a [usize] {
pub fn prefix_match<'a>(&self, input_ids: &'a [u32]) -> &'a [u32] {
let mut curr = &self.root;
let mut curr_idx = 0;
......@@ -121,7 +122,7 @@ impl RadixTree {
&input_ids[..curr_idx]
}
pub fn delete(&mut self, input_ids: &[usize]) {
pub fn delete(&mut self, input_ids: &[u32]) {
let mut curr = &mut self.root;
curr.count -= 1;
......
......@@ -67,7 +67,7 @@ fn test_prefix_match_partial() {
fn test_prefix_match_no_match() {
let mut tree = RadixTree::new();
tree.insert(&[1, 2, 3, 4]);
let empty_slices: &[usize] = &[];
let empty_slices: &[u32] = &[];
assert_eq!(tree.prefix_match(&[5, 6, 7]), empty_slices);
}
......@@ -124,7 +124,7 @@ fn test_delete_nonexistent() {
#[test]
fn test_empty_input() {
let mut tree = RadixTree::new();
let empty_slice: &[usize] = &[];
let empty_slice: &[u32] = &[];
tree.insert(empty_slice);
assert_eq!(tree.prefix_match(empty_slice), empty_slice);
tree.delete(empty_slice); // Should not panic
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment