Unverified Commit e520d5b3 authored by Yannic Kilcher's avatar Yannic Kilcher Committed by GitHub
Browse files
parent 1ad3250b
......@@ -38,7 +38,7 @@ to power LLMs api-inference widgets.
## Features
- Token streaming using Server Side Events (SSE)
- Token streaming using Server-Sent Events (SSE)
- [Dynamic batching of incoming requests](https://github.com/huggingface/text-generation-inference/blob/main/router/src/batcher.rs#L88) for increased total throughput
- Quantization with [bitsandbytes](https://github.com/TimDettmers/bitsandbytes)
- [Safetensors](https://github.com/huggingface/safetensors) weight loading
......
......@@ -119,8 +119,8 @@
"tags": [
"Text Generation Inference"
],
"summary": "Generate a stream of token using Server Side Events",
"description": "Generate a stream of token using Server Side Events",
"summary": "Generate a stream of token using Server-Sent Events",
"description": "Generate a stream of token using Server-Sent Events",
"operationId": "generate_stream",
"requestBody": {
"content": {
......
......@@ -151,7 +151,7 @@ async fn generate(
Ok((headers, Json(response)))
}
/// Generate a stream of token using Server Side Events
/// Generate a stream of token using Server-Sent Events
#[utoipa::path(
post,
tag = "Text Generation Inference",
......@@ -199,7 +199,7 @@ async fn generate_stream(
match infer.generate_stream(req.0).await {
Ok(mut response_stream) => {
// Server Side Event stream
// Server-Sent Event stream
while let Some(response) = response_stream.next().await {
match response {
Ok(response) => {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment