Unverified Commit c675fd1b authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: prefillqueue stream name in load-planner (#1377)

parent 5c9a2d49
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
# Source of truth for planner defaults # Source of truth for planner defaults
class PlannerDefaults: class PlannerDefaults:
namespace = "dynamo" namespace = "dynamo"
served_model_name = "vllm"
environment = "local" environment = "local"
no_operation = False no_operation = False
log_dir = None log_dir = None
......
...@@ -110,7 +110,6 @@ dynamo serve graphs.disagg:Frontend -f disagg.yaml --Planner.environment=local - ...@@ -110,7 +110,6 @@ dynamo serve graphs.disagg:Frontend -f disagg.yaml --Planner.environment=local -
Configuration options: Configuration options:
* `namespace` (str, default: "dynamo"): Target namespace for planner operations * `namespace` (str, default: "dynamo"): Target namespace for planner operations
* `environment` (str, default: "local"): Target environment (local, kubernetes) * `environment` (str, default: "local"): Target environment (local, kubernetes)
* `served-model-name` (str, default: "vllm"): Target model name
* `no-operation` (bool, default: false): Run in observation mode only * `no-operation` (bool, default: false): Run in observation mode only
* `log-dir` (str, default: None): Tensorboard log directory * `log-dir` (str, default: None): Tensorboard log directory
* `adjustment-interval` (int, default: 30): Seconds between adjustments * `adjustment-interval` (int, default: 30): Seconds between adjustments
......
...@@ -54,7 +54,6 @@ dynamo serve graphs.disagg_router:Frontend -f disagg_1p1d.yml ...@@ -54,7 +54,6 @@ dynamo serve graphs.disagg_router:Frontend -f disagg_1p1d.yml
genai-perf profile \ genai-perf profile \
--tokenizer deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --tokenizer deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
-m deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ -m deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--service-kind openai \
--endpoint-type chat \ --endpoint-type chat \
--url http://localhost:8000 \ --url http://localhost:8000 \
--streaming \ --streaming \
......
...@@ -64,7 +64,7 @@ class Planner: ...@@ -64,7 +64,7 @@ class Planner:
self._prefill_queue_nats_server = os.getenv( self._prefill_queue_nats_server = os.getenv(
"NATS_SERVER", "nats://localhost:4222" "NATS_SERVER", "nats://localhost:4222"
) )
self._prefill_queue_stream_name = self.args.served_model_name self._prefill_queue_stream_name = f"{self.namespace}_prefill_queue"
self.prefill_client: Any | None = None self.prefill_client: Any | None = None
self.workers_client: Any | None = None self.workers_client: Any | None = None
...@@ -411,12 +411,6 @@ if __name__ == "__main__": ...@@ -411,12 +411,6 @@ if __name__ == "__main__":
default=PlannerDefaults.namespace, default=PlannerDefaults.namespace,
help="Namespace planner will look at", help="Namespace planner will look at",
) )
parser.add_argument(
"--served-model-name",
type=str,
default=PlannerDefaults.served_model_name,
help="Model name that is being served (used for prefill queue name)",
)
parser.add_argument( parser.add_argument(
"--no-operation", "--no-operation",
action="store_true", action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment