Unverified Commit 960dc896 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: replace hard coded dynamo namespace with env var (#3048)


Signed-off-by: default avatarBiswa Panda <biswa.panda@gmail.com>
parent f77511ff
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import argparse import argparse
import logging import logging
import os
import sys import sys
from typing import Optional from typing import Optional
...@@ -17,7 +18,8 @@ from dynamo.runtime.logging import configure_dynamo_logging ...@@ -17,7 +18,8 @@ from dynamo.runtime.logging import configure_dynamo_logging
from . import __version__ from . import __version__
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
configure_dynamo_logging() configure_dynamo_logging()
......
...@@ -20,7 +20,9 @@ from dynamo.sglang import __version__ ...@@ -20,7 +20,9 @@ from dynamo.sglang import __version__
configure_dynamo_logging() configure_dynamo_logging()
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
DYNAMO_ARGS: Dict[str, Dict[str, Any]] = { DYNAMO_ARGS: Dict[str, Dict[str, Any]] = {
"endpoint": { "endpoint": {
"flags": ["--endpoint"], "flags": ["--endpoint"],
......
...@@ -176,8 +176,9 @@ class VllmEncodeWorker: ...@@ -176,8 +176,9 @@ class VllmEncodeWorker:
@classmethod @classmethod
def parse_args(cls) -> Tuple[argparse.Namespace, Config]: def parse_args(cls) -> Tuple[argparse.Namespace, Config]:
DEFAULT_ENDPOINT = "dyn://dynamo.encoder.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_DOWNSTREAM_ENDPOINT = "dyn://dynamo.llm.generate" DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.encoder.generate"
DEFAULT_DOWNSTREAM_ENDPOINT = f"dyn://{DYN_NAMESPACE}.llm.generate"
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="vLLM based encoder for Dynamo LLM." description="vLLM based encoder for Dynamo LLM."
......
...@@ -63,8 +63,9 @@ class Processor(ProcessMixIn): ...@@ -63,8 +63,9 @@ class Processor(ProcessMixIn):
@classmethod @classmethod
def parse_args(cls) -> Tuple[argparse.Namespace, Config]: def parse_args(cls) -> Tuple[argparse.Namespace, Config]:
DEFAULT_ENDPOINT = "dyn://dynamo.processor.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_DOWNSTREAM_ENDPOINT = "dyn://dynamo.encoder.generate" DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.processor.generate"
DEFAULT_DOWNSTREAM_ENDPOINT = f"dyn://{DYN_NAMESPACE}.encoder.generate"
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="vLLM based processor for Dynamo LLM." description="vLLM based processor for Dynamo LLM."
......
...@@ -217,8 +217,9 @@ class VllmEncodeWorker: ...@@ -217,8 +217,9 @@ class VllmEncodeWorker:
@classmethod @classmethod
def parse_args(cls) -> Tuple[argparse.Namespace, Config]: def parse_args(cls) -> Tuple[argparse.Namespace, Config]:
DEFAULT_ENDPOINT = "dyn://dynamo.encoder.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_DOWNSTREAM_ENDPOINT = "dyn://dynamo.llm.generate" DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.encoder.generate"
DEFAULT_DOWNSTREAM_ENDPOINT = f"dyn://{DYN_NAMESPACE}.llm.generate"
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="vLLM based encoder for Dynamo LLM." description="vLLM based encoder for Dynamo LLM."
......
...@@ -84,17 +84,23 @@ class VllmBaseWorker: ...@@ -84,17 +84,23 @@ class VllmBaseWorker:
# use endpoint_overwrite to set the default endpoint based on worker type # use endpoint_overwrite to set the default endpoint based on worker type
def endpoint_overwrite(args): def endpoint_overwrite(args):
DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
# default endpoint for this worker # default endpoint for this worker
if args.worker_type == "prefill": if args.worker_type == "prefill":
args.endpoint = args.endpoint or "dyn://dynamo.llm.generate" args.endpoint = args.endpoint or f"dyn://{DYN_NAMESPACE}.llm.generate"
elif args.worker_type == "decode": elif args.worker_type == "decode":
args.endpoint = args.endpoint or "dyn://dynamo.decoder.generate" args.endpoint = (
args.endpoint or f"dyn://{DYN_NAMESPACE}.decoder.generate"
)
elif args.worker_type == "encode_prefill": elif args.worker_type == "encode_prefill":
args.endpoint = args.endpoint or "dyn://dynamo.encoder.generate" args.endpoint = (
args.endpoint or f"dyn://{DYN_NAMESPACE}.encoder.generate"
)
# set downstream endpoint for disaggregated workers # set downstream endpoint for disaggregated workers
if args.enable_disagg: if args.enable_disagg:
args.downstream_endpoint = ( args.downstream_endpoint = (
args.downstream_endpoint or "dyn://dynamo.decoder.generate" args.downstream_endpoint
or f"dyn://{DYN_NAMESPACE}.decoder.generate"
) )
return args return args
......
...@@ -29,7 +29,8 @@ from vllm.engine.arg_utils import AsyncEngineArgs ...@@ -29,7 +29,8 @@ from vllm.engine.arg_utils import AsyncEngineArgs
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
class Config: class Config:
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
import argparse import argparse
import asyncio import asyncio
import os
import sys import sys
import sglang import sglang
...@@ -30,7 +31,8 @@ from sglang.srt.server_args import ServerArgs ...@@ -30,7 +31,8 @@ from sglang.srt.server_args import ServerArgs
from dynamo.llm import ModelInput, ModelType, register_llm from dynamo.llm import ModelInput, ModelType, register_llm
from dynamo.runtime import DistributedRuntime, dynamo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B" DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
DEFAULT_TEMPERATURE = 0.7 DEFAULT_TEMPERATURE = 0.7
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
import argparse import argparse
import asyncio import asyncio
import os
import sys import sys
import sglang import sglang
...@@ -21,7 +22,8 @@ from sglang.srt.server_args import ServerArgs ...@@ -21,7 +22,8 @@ from sglang.srt.server_args import ServerArgs
from dynamo.runtime import DistributedRuntime, dynamo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B" DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
DEFAULT_TEMPERATURE = 0.7 DEFAULT_TEMPERATURE = 0.7
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
import argparse import argparse
import asyncio import asyncio
import os
import sys import sys
import time import time
...@@ -34,7 +35,8 @@ from sglang.srt.server_args import ServerArgs ...@@ -34,7 +35,8 @@ from sglang.srt.server_args import ServerArgs
from dynamo.llm import ModelInput, ModelType, register_llm from dynamo.llm import ModelInput, ModelType, register_llm
from dynamo.runtime import DistributedRuntime, dynamo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B" DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
import argparse import argparse
import asyncio import asyncio
import os
import sys import sys
import uvloop import uvloop
...@@ -42,7 +43,8 @@ from vllm.inputs import TokensPrompt ...@@ -42,7 +43,8 @@ from vllm.inputs import TokensPrompt
from dynamo.llm import ModelInput, ModelType, register_llm from dynamo.llm import ModelInput, ModelType, register_llm
from dynamo.runtime import DistributedRuntime, dynamo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
DEFAULT_ENDPOINT = f"dyn://{DYN_NAMESPACE}.backend.generate"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B" DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
DEFAULT_TEMPERATURE = 0.7 DEFAULT_TEMPERATURE = 0.7
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment