# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest from tests.utils.deployment_graph import ( DeploymentGraph, Payload, chat_completions_response_handler, ) # Initial payload used for testing # initial deployment readiness. text_prompt = "Tell me a short joke about AI." text_payload = Payload( payload_chat={ "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "messages": [ { "role": "user", "content": text_prompt, # Shorter prompt } ], "max_tokens": 150, "temperature": 0.1, # "seed": 10, "ignore_eos": True, "min_tokens": 150, "stream": False, }, expected_log=[], expected_response=["AI"], ) # Each Deployment Graph contains # the dynamo serve module and configuration as well # as the endpoint for interaction deployment_graphs = { "agg-tp-1-dp-1": ( DeploymentGraph( module="graphs.agg:Frontend", config="/workspace/tests/fault_tolerance/configs/agg_tp_1_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_1, pytest.mark.vllm], ), text_payload, ), "agg-tp-1-dp-8": ( DeploymentGraph( module="graphs.agg:Frontend", config="/workspace/tests/fault_tolerance/configs/agg_tp_1_dp_8.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_8, pytest.mark.vllm], ), text_payload, ), "agg-tp-1-dp-4": ( DeploymentGraph( module="graphs.agg:Frontend", config="/workspace/tests/fault_tolerance/configs/agg_tp_1_dp_4.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_4, pytest.mark.vllm], ), text_payload, ), "agg-tp-2-dp-1": ( DeploymentGraph( module="graphs.agg:Frontend", config="/workspace/tests/fault_tolerance/configs/agg_tp_2_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_2, pytest.mark.vllm], ), text_payload, ), "agg-tp-2-dp-2": ( DeploymentGraph( module="graphs.agg:Frontend", config="/workspace/tests/fault_tolerance/configs/agg_tp_2_dp_2.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_4, pytest.mark.vllm], ), text_payload, ), "agg-tp-2-dp-4": ( DeploymentGraph( module="graphs.agg:Frontend", config="/workspace/tests/fault_tolerance/configs/agg_tp_2_dp_4.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_8, pytest.mark.vllm], ), text_payload, ), "disagg-p-tp-1-dp-1-d-tp-1-dp-1": ( DeploymentGraph( module="graphs.disagg:Frontend", config="/workspace/tests/fault_tolerance/configs/disagg_p_tp_1_dp_1_d_tp_1_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_2, pytest.mark.vllm], ), text_payload, ), "disagg-p-tp-1-dp-4-d-tp-4-dp-1": ( DeploymentGraph( module="graphs.disagg:Frontend", config="/workspace/tests/fault_tolerance/configs/disagg_p_tp_1_dp_4_d_tp_4_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_8, pytest.mark.vllm], ), text_payload, ), "disagg-p-tp-2-dp-2-d-tp-4-dp-1": ( DeploymentGraph( module="graphs.disagg:Frontend", config="/workspace/tests/fault_tolerance/configs/disagg_p_tp_2_dp_2_d_tp_4_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_8, pytest.mark.vllm], ), text_payload, ), "disagg-p-tp-2-dp-1-d-tp-4-dp-1": ( DeploymentGraph( module="graphs.disagg:Frontend", config="/workspace/tests/fault_tolerance/configs/disagg_p_tp_2_dp_1_d_tp_4_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_8, pytest.mark.vllm], ), text_payload, ), "disagg-p-tp-1-dp-2-d-tp-2-dp-1": ( DeploymentGraph( module="graphs.disagg:Frontend", config="/workspace/tests/fault_tolerance/configs/disagg_p_tp_1_dp_2_d_tp_2_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_4, pytest.mark.vllm], ), text_payload, ), "disagg-p-tp-1-dp-1-d-tp-2-dp-1": ( DeploymentGraph( module="graphs.disagg:Frontend", config="/workspace/tests/fault_tolerance/configs/disagg_p_tp_1_dp_1_d_tp_2_dp_1.yaml", directory="/workspace/examples/llm", endpoints=["v1/chat/completions"], response_handlers=[chat_completions_response_handler], marks=[pytest.mark.gpu_4, pytest.mark.vllm], ), text_payload, ), } # Each failure scenaro contains a list of failure injections # Each failure injection has a time in seconds after the pervious injection and # a list of failures to inject including the number of failures for each type. # Failures are currently process termination. # # Example: # # "prefill_worker": [[30, [("dynamo_prefillworker", 1)]]], # # terminates 1 prefill worker after 30 seconds failure_scenarios = { "decode_worker": [[30, [("dynamo_vllmworker", 1)]]], "prefill_worker": [[30, [("dynamo_prefillworker", 1)]]], "frontend": [[30, [("dynamo_frontend", 1)]]], "processor": [[30, [("dynamo_processor", 1)]]], "vllm_worker": [[30, [("vllm_worker", 1)]]], "none": [], } @pytest.fixture(params=list(failure_scenarios.keys())) def failures(request): return failure_scenarios[request.param] @pytest.fixture(params=list(deployment_graphs.keys())) def deployment_graph_test(request): """ Fixture that provides different deployment graph test configurations. """ return deployment_graphs[request.param]