Commit 32cd0048 authored by xuxz's avatar xuxz
Browse files

adaption DYNAMO for DCU

parent b950ec54
...@@ -148,7 +148,7 @@ class LocalConnector(PlannerConnector): ...@@ -148,7 +148,7 @@ class LocalConnector(PlannerConnector):
if not available_gpus: if not available_gpus:
raise ValueError("No GPUs available for allocation") raise ValueError("No GPUs available for allocation")
gpu_id = available_gpus[0] gpu_id = available_gpus[0]
watcher_env["CUDA_VISIBLE_DEVICES"] = gpu_id watcher_env["HIP_VISIBLE_DEVICES"] = gpu_id
watcher_env["DYNAMO_SERVICE_CONFIG"] = service_config watcher_env["DYNAMO_SERVICE_CONFIG"] = service_config
......
...@@ -207,7 +207,7 @@ class ResourceAllocator: ...@@ -207,7 +207,7 @@ class ResourceAllocator:
# Generate environment variables for each worker # Generate environment variables for each worker
for _ in range(num_workers): for _ in range(num_workers):
env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))} env_vars = {"HIP_VISIBLE_DEVICES": ",".join(map(str, assigned))}
resource_envs.append(env_vars) resource_envs.append(env_vars)
else: else:
logger.info( logger.info(
...@@ -221,7 +221,7 @@ class ResourceAllocator: ...@@ -221,7 +221,7 @@ class ResourceAllocator:
) )
# Generate environment variables for this worker # Generate environment variables for this worker
env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))} env_vars = {"HIP_VISIBLE_DEVICES": ",".join(map(str, assigned))}
# If we have comprehensive GPU stats, log them # If we have comprehensive GPU stats, log them
try: try:
...@@ -242,7 +242,8 @@ class ResourceAllocator: ...@@ -242,7 +242,8 @@ class ResourceAllocator:
logger.debug(f"Failed to get GPU stats: {e}") logger.debug(f"Failed to get GPU stats: {e}")
resource_envs.append(env_vars) resource_envs.append(env_vars)
# else:
# resource_envs = config["envs"]
logger.info( logger.info(
f"Final resource allocation - workers: {num_workers}, envs: {resource_envs}" f"Final resource allocation - workers: {num_workers}, envs: {resource_envs}"
) )
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common: Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
block-size: 64 block-size: 16
max-model-len: 16384 max-model-len: 16384
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo.Processor.chat/completions endpoint: dynamo.Processor.chat/completions
port: 8000 port: 8000
......
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common: Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
block-size: 64 block-size: 16
max-model-len: 16384 max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}' kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo.Processor.chat/completions endpoint: dynamo.Processor.chat/completions
port: 8000 port: 8000
...@@ -29,21 +29,26 @@ Processor: ...@@ -29,21 +29,26 @@ Processor:
VllmWorker: VllmWorker:
remote-prefill: true remote-prefill: true
conditional-disagg: true conditional-disagg: false
max-local-prefill-length: 10 max-local-prefill-length: 10
max-prefill-queue-size: 2 max-prefill-queue-size: 64
tensor-parallel-size: 1
enable-prefix-caching: false
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config] common-configs: [model, block-size, max-model-len, kv-transfer-config]
PrefillWorker: PrefillWorker:
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
tensor-parallel-size: 1
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config] common-configs: [model, block-size, max-model-len, kv-transfer-config]
Planner: Planner:
......
...@@ -13,14 +13,14 @@ ...@@ -13,14 +13,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common: Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
block-size: 64 block-size: 16
max-model-len: 16384 max-model-len: 16384
router: kv router: kv
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}' kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo.Processor.chat/completions endpoint: dynamo.Processor.chat/completions
port: 8000 port: 8000
...@@ -34,9 +34,9 @@ Router: ...@@ -34,9 +34,9 @@ Router:
VllmWorker: VllmWorker:
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
remote-prefill: true remote-prefill: true
conditional-disagg: true conditional-disagg: false
max-local-prefill-length: 10 max-local-prefill-length: 10
max-prefill-queue-size: 2 max-prefill-queue-size: 64
tensor-parallel-size: 1 tensor-parallel-size: 1
enable-prefix-caching: true enable-prefix-caching: true
ServiceArgs: ServiceArgs:
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
Common: Common:
model: deepseek-ai/DeepSeek-R1 model: deepseek-ai/DeepSeek-R1
block-size: 64 block-size: 16
max-model-len: 16384 max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}' kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
tensor-parallel-size: 16 tensor-parallel-size: 16
......
...@@ -228,9 +228,9 @@ async fn add_model( ...@@ -228,9 +228,9 @@ async fn add_model(
endpoint_name endpoint_name
); );
if model_name.starts_with('/') { // if model_name.starts_with('/') {
raise!("Model name '{}' cannot start with a slash", model_name); // raise!("Model name '{}' cannot start with a slash", model_name);
} // }
let parts: Vec<&str> = endpoint_name.split('.').collect(); let parts: Vec<&str> = endpoint_name.split('.').collect();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment