Unverified Commit a462280e authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: use resource and workers hints from decorators and service args (#1044)

parent 641234cd
......@@ -80,6 +80,10 @@ class BentoServiceAdapter(ServiceMixin, ServiceInterface[T]):
)
image = kwargs.get("image")
envs = kwargs.get("envs", [])
# attributes from decorators
for attr in ["workers", "resources"]:
if attr in kwargs:
config[attr] = kwargs[attr]
self.image = image
# Get service args from environment if available
service_args = self._get_service_args(name)
......@@ -90,13 +94,14 @@ class BentoServiceAdapter(ServiceMixin, ServiceInterface[T]):
config[key] = value
# Extract and apply specific args if needed
if "resources" in service_args:
config["resources"] = service_args["resources"]
if "workers" in service_args:
config["workers"] = service_args["workers"]
if "envs" in service_args and envs:
envs.extend(service_args["envs"])
elif "envs" in service_args:
envs = service_args["envs"]
# Initialize BentoML service
self._bentoml_service = BentoService(
config=config,
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Middle:
ServiceArgs:
workers: 2
resources:
cpu: "1"
Backend:
ServiceArgs:
workers: 3
resources:
cpu: "1"
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import subprocess
import time
from collections import Counter
import pytest
from typer.testing import CliRunner
from dynamo.sdk.cli.cli import cli
pytestmark = pytest.mark.pre_merge
runner = CliRunner()
@pytest.fixture(scope="module", autouse=True)
def setup_and_teardown():
# Setup code
nats_server = subprocess.Popen(["nats-server", "-js"])
etcd = subprocess.Popen(["etcd"])
print("Setting up resources")
# Run the serve command in dry-run mode with CLI runner to check it's working
result = runner.invoke(
cli,
[
"serve",
"pipeline:Frontend",
"--working-dir",
"deploy/sdk/src/dynamo/sdk/tests",
"-f",
"deploy/sdk/src/dynamo/sdk/tests/config.yaml",
"--dry-run",
],
)
# Now start the actual server using subprocess for the real integration test
server = subprocess.Popen(
[
"dynamo",
"serve",
"pipeline:Frontend",
"--working-dir",
"deploy/sdk/src/dynamo/sdk/tests",
"-f",
"deploy/sdk/src/dynamo/sdk/tests/config.yaml",
]
)
time.sleep(3)
yield result
# Teardown code
print("Tearing down resources")
server.terminate()
server.wait()
nats_server.terminate()
nats_server.wait()
etcd.terminate()
etcd.wait()
async def test_pipeline(setup_and_teardown):
import asyncio
import aiohttp
# Check the CLI command ran successfully
result = setup_and_teardown
assert result.exit_code == 0
import psutil
# Capture list of subprocesses (children of current process)
current_process = psutil.Process()
child_processes = list(current_process.children(recursive=True))
# Assert their name and command line
service_count: Counter[str] = Counter()
for proc in child_processes:
try:
cmd = proc.cmdline()
if "--service-name" in " ".join(cmd):
idx = cmd.index("--service-name")
service_name = cmd[idx + 1]
service_count[service_name] += 1
# assert "dynamo" in name.lower() or "dynamo" in " ".join(cmdline).lower()
# assert any("serve" in arg for arg in cmdline)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
assert service_count["Frontend"] == 1
assert service_count["Backend"] == 3
assert service_count["Middle"] == 2
# Clean the output to check for expected content
clean_output = re.sub(r"\x1b\[[0-9;]*m", "", result.output)
assert "Service Configuration:" in clean_output
max_retries = 5
for attempt in range(max_retries):
try:
async with aiohttp.ClientSession() as session:
async with session.post(
"http://localhost:8000/generate",
json={"text": "federer-is-the-greatest-tennis-player-of-all-time"},
headers={"accept": "text/event-stream"},
) as resp:
assert resp.status == 200
text = await resp.text()
assert (
"federer-is-the-greatest-tennis-player-of-all-time-mid-back"
in text
)
break
except Exception as e:
if attempt == max_retries - 1:
raise
print(f"Attempt {attempt + 1} failed, retrying... {e}")
await asyncio.sleep(3)
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from dynamo.sdk.cli.utils import configure_target_environment
from dynamo.sdk.core.runner import TargetEnum
pytestmark = pytest.mark.pre_merge
@pytest.fixture(scope="module", autouse=True)
def setup_and_teardown():
configure_target_environment(TargetEnum.BENTO)
yield
configure_target_environment(TargetEnum.DYNAMO)
def test_gpu_resources(setup_and_teardown):
"""Test resource configurations"""
from _bentoml_sdk import Service as BentoService
from dynamo.sdk import service
@service(
resources={"cpu": "2", "gpu": "1", "memory": "4Gi"},
dynamo={"namespace": "test"},
)
class MyService:
def __init__(self) -> None:
pass
svc: BentoService = MyService.get_bentoml_service() # type: ignore
assert svc.config["resources"]["cpu"] == "2"
assert svc.config["resources"]["gpu"] == "1"
assert svc.config["resources"]["memory"] == "4Gi"
......@@ -24,7 +24,7 @@ Frontend:
Middle:
message: "moon"
ServiceArgs:
workers: 1
workers: 2
resources:
cpu: "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment