# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. services: nats-server: image: nats command: [ "-js", "--trace" ] ports: - 4222:4222 - 6222:6222 - 8222:8222 etcd-server: image: bitnami/etcd environment: - ALLOW_NONE_AUTHENTICATION=yes ports: - 2379:2379 - 2380:2380 prometheus: image: prom/prometheus:latest container_name: prometheus volumes: - ./metrics/prometheus.yml:/etc/prometheus/prometheus.yml command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' # These provide the web console functionality - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' restart: unless-stopped # TODO: Use more explicit networking setup when metrics is containerized #ports: # - "9090:9090" #networks: # - monitoring network_mode: "host" profiles: [metrics] grafana: image: grafana/grafana-enterprise:latest container_name: grafana volumes: - ./metrics/grafana.json:/etc/grafana/provisioning/dashboards/llm-worker-dashboard.json - ./metrics/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml - ./metrics/grafana-dashboard-providers.yml:/etc/grafana/provisioning/dashboards/dashboard-providers.yml environment: # Port 3000 is used by "dynamo serve", so use 3001 - GF_SERVER_HTTP_PORT=3001 - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=admin - GF_USERS_ALLOW_SIGN_UP=false - GF_INSTALL_PLUGINS=grafana-piechart-panel # Default min interval is 5s, but can be configured lower - GF_DASHBOARDS_MIN_REFRESH_INTERVAL=2s restart: unless-stopped # TODO: Use more explicit networking setup when metrics is containerized #ports: # - "3001:3001" #networks: # - monitoring network_mode: "host" profiles: [metrics] depends_on: - prometheus