# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import time
import uuid

import uvloop

from dynamo.llm import KserveGrpcService, PythonAsyncEngine
from dynamo.runtime import DistributedRuntime, dynamo_worker


class MockCompletionEngine:
    def __init__(self, model_name: str) -> None:
        self.model_name = model_name

    def generate(self, request):
        created = int(time.time())
        request_id = f"cmpl-{uuid.uuid4()}"
        print(f"{created} | Received request: {request}")

        async def generator():
            response = {
                "id": request_id,
                "object": "text_completion",
                "created": created,
                "model": self.model_name,
                "choices": [
                    {
                        "index": 0,
                        "text": "Mock completion response from Dynamo.",
                        "logprobs": None,
                        "finish_reason": "stop",
                    }
                ],
                "usage": {
                    "prompt_tokens": 5,
                    "completion_tokens": 5,
                    "total_tokens": 10,
                },
            }
            yield response

        return generator()


@dynamo_worker()
async def worker(runtime: DistributedRuntime):
    model_name = "mock_model"
    checksum = "mdcsum"

    loop = asyncio.get_running_loop()
    python_engine = MockCompletionEngine(model_name)
    engine = PythonAsyncEngine(python_engine.generate, loop)

    host = "0.0.0.0"
    port = 8787
    service = KserveGrpcService(port=port, host=host)
    service.add_completions_model(model_name, checksum, engine)

    print("Starting KServe gRPC service...")
    shutdown_signal = service.run(runtime.child_token())

    try:
        print(
            f"Serving endpoint: {host}:{port} inference.GRPCInferenceService/ModelInfer"
        )
        print(f"Serving completions models: {service.list_completions_models()}")
        await shutdown_signal
    except KeyboardInterrupt:
        pass
    except Exception as err:  # pragma: no cover - example logging
        print(f"Unexpected error occurred: {err}")
    finally:
        print("Shutting down worker...")
        runtime.shutdown()


if __name__ == "__main__":
    uvloop.install()
    asyncio.run(worker())