"...git@developer.sourcefind.cn:jerrrrry/infinicore.git" did not exist on "a474a6f5dbb378a67c7c2692592072c6d2338baf"
Unverified Commit 16310b26 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

refactor: refactor dynamo serve part-1/N (#788)


Co-authored-by: default avatarishandhanani <ishandhanani@gmail.com>
parent dbdbd5e5
...@@ -81,7 +81,7 @@ Now you should see both workers are ready in Node 1's terminal. ...@@ -81,7 +81,7 @@ Now you should see both workers are ready in Node 1's terminal.
- `Response: {"worker_output":"Which team won 2020 World Series_GeneratedBy_NODE2HOSTNAME","request_id":"id_number"}` - `Response: {"worker_output":"Which team won 2020 World Series_GeneratedBy_NODE2HOSTNAME","request_id":"id_number"}`
``` ```
curl -X 'POST' \ curl -X 'POST' \
'http://localhost:3000/generate' \ 'http://localhost:8000/generate' \
-H 'accept: text/event-stream' \ -H 'accept: text/event-stream' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
-d '{ -d '{
...@@ -89,7 +89,7 @@ curl -X 'POST' \ ...@@ -89,7 +89,7 @@ curl -X 'POST' \
"request_id":"id_number" "request_id":"id_number"
}' }'
curl -X 'POST' \ curl -X 'POST' \
'http://localhost:3000/generate' \ 'http://localhost:8000/generate' \
-H 'accept: text/event-stream' \ -H 'accept: text/event-stream' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
-d '{ -d '{
...@@ -100,7 +100,7 @@ curl -X 'POST' \ ...@@ -100,7 +100,7 @@ curl -X 'POST' \
6. Then modify the prompt and you will notice prompts with similar prefix will be routed to the same worker due to the simply routing algorithm used in this demo. For example, following query will be routed to the worker proceesed "Tell me a joke" prompt. 6. Then modify the prompt and you will notice prompts with similar prefix will be routed to the same worker due to the simply routing algorithm used in this demo. For example, following query will be routed to the worker proceesed "Tell me a joke" prompt.
``` ```
curl -X 'POST' \ curl -X 'POST' \
'http://localhost:3000/generate' \ 'http://localhost:8000/generate' \
-H 'accept: text/event-stream' \ -H 'accept: text/event-stream' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
-d '{ -d '{
...@@ -134,7 +134,7 @@ dynamo serve components.prefill_worker:PrefillWorker ...@@ -134,7 +134,7 @@ dynamo serve components.prefill_worker:PrefillWorker
3. Query the Frontend. This time decode workers push requests to the prefill queue, and prefill worker pulles task from the queue to simulate the prefill task. The actual prefill is skipped in this demo. 3. Query the Frontend. This time decode workers push requests to the prefill queue, and prefill worker pulles task from the queue to simulate the prefill task. The actual prefill is skipped in this demo.
``` ```
curl -X 'POST' \ curl -X 'POST' \
'http://localhost:3000/generate' \ 'http://localhost:8000/generate' \
-H 'accept: text/event-stream' \ -H 'accept: text/event-stream' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
-d '{ -d '{
......
...@@ -19,16 +19,21 @@ import sys ...@@ -19,16 +19,21 @@ import sys
from components.processor import Processor from components.processor import Processor
from components.utils import GeneralRequest from components.utils import GeneralRequest
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from dynamo.sdk import api, depends, service from dynamo.sdk import depends, dynamo_endpoint, service
from dynamo.sdk.lib.image import DYNAMO_IMAGE from dynamo.sdk.lib.image import DYNAMO_IMAGE
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
app = FastAPI(title="Hello World LLM")
@service( @service(
workers=1, dynamo={"enabled": True, "namespace": "dynamo-demo"},
image=DYNAMO_IMAGE, image=DYNAMO_IMAGE,
app=app,
) )
class Frontend: class Frontend:
processor = depends(Processor) processor = depends(Processor)
...@@ -41,12 +46,16 @@ class Frontend: ...@@ -41,12 +46,16 @@ class Frontend:
logger.debug(f"Received signal {signum}, shutting down...") logger.debug(f"Received signal {signum}, shutting down...")
sys.exit(0) sys.exit(0)
@api @dynamo_endpoint(is_api=True)
async def generate(self, prompt, request_id): # from request body keys async def generate(self, prompt, request_id): # from request body keys
"""Stream results from the pipeline.""" """Stream results from the pipeline."""
logger.info(f"Received: {prompt=},{request_id=}") logger.info(f"Received: {prompt=},{request_id=}")
frontend_request = GeneralRequest(
prompt=prompt, request_id=request_id async def content_generator():
).model_dump_json() frontend_request = GeneralRequest(
async for response in self.processor.processor_generate(frontend_request): prompt=prompt, request_id=request_id
yield f"Response: {response}\n" ).model_dump_json()
async for response in self.processor.processor_generate(frontend_request):
yield f"Response: {response}\n"
return StreamingResponse(content_generator())
...@@ -15,9 +15,11 @@ ...@@ -15,9 +15,11 @@
import logging import logging
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel from pydantic import BaseModel
from dynamo.sdk import DYNAMO_IMAGE, api, depends, dynamo_endpoint, service from dynamo.sdk import DYNAMO_IMAGE, depends, dynamo_endpoint, service
from dynamo.sdk.lib.config import ServiceConfig from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.logging import configure_server_logging from dynamo.sdk.lib.logging import configure_server_logging
...@@ -102,10 +104,17 @@ class Middle: ...@@ -102,10 +104,17 @@ class Middle:
yield f"Middle: {response}" yield f"Middle: {response}"
app = FastAPI(title="Hello World!")
@service( @service(
dynamo={"enabled": True, "namespace": "inference"},
image=DYNAMO_IMAGE, image=DYNAMO_IMAGE,
) # Regular HTTP API app=app,
)
class Frontend: class Frontend:
"""A simple frontend HTTP API that forwards requests to the dynamo graph."""
middle = depends(Middle) middle = depends(Middle)
def __init__(self) -> None: def __init__(self) -> None:
...@@ -119,12 +128,13 @@ class Frontend: ...@@ -119,12 +128,13 @@ class Frontend:
logger.info(f"Frontend config message: {self.message}") logger.info(f"Frontend config message: {self.message}")
logger.info(f"Frontend config port: {self.port}") logger.info(f"Frontend config port: {self.port}")
@api @dynamo_endpoint(is_api=True)
async def generate(self, text): async def generate(self, request: RequestType):
"""Stream results from the pipeline.""" """Stream results from the pipeline."""
logger.info(f"Frontend received: {text}") logger.info(f"Frontend received: {request.text}")
logger.info(f"Frontend received type: {type(text)}")
txt = RequestType(text=text) async def content_generator():
logger.info(f"Frontend sending: {type(txt)}") async for response in self.middle.generate(request.model_dump_json()):
async for response in self.middle.generate(txt.model_dump_json()): yield f"Frontend: {response}"
yield f"Frontend: {response}"
return StreamingResponse(content_generator())
...@@ -19,6 +19,7 @@ from pathlib import Path ...@@ -19,6 +19,7 @@ from pathlib import Path
from components.processor import Processor from components.processor import Processor
from components.worker import VllmWorker from components.worker import VllmWorker
from fastapi import FastAPI
from pydantic import BaseModel from pydantic import BaseModel
from dynamo import sdk from dynamo import sdk
...@@ -49,9 +50,14 @@ class FrontendConfig(BaseModel): ...@@ -49,9 +50,14 @@ class FrontendConfig(BaseModel):
# todo this should be called ApiServer # todo this should be called ApiServer
@service( @service(
dynamo={
"enabled": True,
"namespace": "dynamo",
},
resources={"cpu": "10", "memory": "20Gi"}, resources={"cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
image=DYNAMO_IMAGE, image=DYNAMO_IMAGE,
app=FastAPI(title="LLM Example"),
) )
class Frontend: class Frontend:
worker = depends(VllmWorker) worker = depends(VllmWorker)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment