Commit 32cd0048 authored by xuxz's avatar xuxz
Browse files

adaption DYNAMO for DCU

parent b950ec54
......@@ -148,7 +148,7 @@ class LocalConnector(PlannerConnector):
if not available_gpus:
raise ValueError("No GPUs available for allocation")
gpu_id = available_gpus[0]
watcher_env["CUDA_VISIBLE_DEVICES"] = gpu_id
watcher_env["HIP_VISIBLE_DEVICES"] = gpu_id
watcher_env["DYNAMO_SERVICE_CONFIG"] = service_config
......
......@@ -207,7 +207,7 @@ class ResourceAllocator:
# Generate environment variables for each worker
for _ in range(num_workers):
env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))}
env_vars = {"HIP_VISIBLE_DEVICES": ",".join(map(str, assigned))}
resource_envs.append(env_vars)
else:
logger.info(
......@@ -221,7 +221,7 @@ class ResourceAllocator:
)
# Generate environment variables for this worker
env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))}
env_vars = {"HIP_VISIBLE_DEVICES": ",".join(map(str, assigned))}
# If we have comprehensive GPU stats, log them
try:
......@@ -242,7 +242,8 @@ class ResourceAllocator:
logger.debug(f"Failed to get GPU stats: {e}")
resource_envs.append(env_vars)
# else:
# resource_envs = config["envs"]
logger.info(
f"Final resource allocation - workers: {num_workers}, envs: {resource_envs}"
)
......
......@@ -13,12 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
model: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
block-size: 16
max-model-len: 16384
Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo.Processor.chat/completions
port: 8000
......
......@@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
model: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
block-size: 16
max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo.Processor.chat/completions
port: 8000
......@@ -29,21 +29,26 @@ Processor:
VllmWorker:
remote-prefill: true
conditional-disagg: true
conditional-disagg: false
max-local-prefill-length: 10
max-prefill-queue-size: 2
max-prefill-queue-size: 64
tensor-parallel-size: 1
enable-prefix-caching: false
ServiceArgs:
workers: 1
resources:
gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config]
PrefillWorker:
max-num-batched-tokens: 16384
tensor-parallel-size: 1
ServiceArgs:
workers: 1
resources:
gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config]
Planner:
......
......@@ -13,14 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
model: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
block-size: 16
max-model-len: 16384
router: kv
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name: /models/deepseek-r1/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo.Processor.chat/completions
port: 8000
......@@ -34,9 +34,9 @@ Router:
VllmWorker:
max-num-batched-tokens: 16384
remote-prefill: true
conditional-disagg: true
conditional-disagg: false
max-local-prefill-length: 10
max-prefill-queue-size: 2
max-prefill-queue-size: 64
tensor-parallel-size: 1
enable-prefix-caching: true
ServiceArgs:
......
......@@ -14,7 +14,7 @@
# limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1
block-size: 64
block-size: 16
max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
tensor-parallel-size: 16
......
......@@ -228,9 +228,9 @@ async fn add_model(
endpoint_name
);
if model_name.starts_with('/') {
raise!("Model name '{}' cannot start with a slash", model_name);
}
// if model_name.starts_with('/') {
// raise!("Model name '{}' cannot start with a slash", model_name);
// }
let parts: Vec<&str> = endpoint_name.split('.').collect();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment