# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. apiVersion: apps/v1 kind: Deployment metadata: name: dynamo-deepseek-epp namespace: default labels: app: dynamo-deepseek-epp spec: replicas: 1 selector: matchLabels: app: dynamo-deepseek-epp template: metadata: labels: app: dynamo-deepseek-epp spec: # Conservatively, this timeout should mirror the longest grace period of the pods within the pool terminationGracePeriodSeconds: 130 containers: - name: epp image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main imagePullPolicy: Always args: - -poolName - "dynamo-deepseek" - "-poolNamespace" - "default" - -v - "4" - --zap-encoder - "json" - -grpcPort - "9002" - -grpcHealthPort - "9003" ports: - containerPort: 9002 - containerPort: 9003 - name: metrics containerPort: 9090 livenessProbe: grpc: port: 9003 service: inference-extension initialDelaySeconds: 5 periodSeconds: 10 readinessProbe: grpc: port: 9003 service: inference-extension initialDelaySeconds: 5 periodSeconds: 10