"vscode:/vscode.git/clone" did not exist on "1e37c1033fac4e4bf7406342ce7805a38ed132db"
rpc.py 12.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""GPU Memory Service RPC Client.

Low-level RPC client stub. The client provides a simple interface for acquiring
locks and performing allocation operations. The socket connection IS the lock.

This module has NO PyTorch dependency.

Usage:
    # Writer (acquires RW lock in constructor)
    with GMSRPCClient(socket_path, lock_type=RequestedLockType.RW) as client:
        alloc_id, aligned_size = client.allocate(size=1024*1024)
        fd = client.export(alloc_id)
        # ... write weights using fd ...
        client.commit()
    # Lock released on exit

    # Reader (acquires RO lock in constructor)
    client = GMSRPCClient(socket_path, lock_type=RequestedLockType.RO)
    if client.committed:  # Check if weights are valid
        allocations = client.list_allocations()
        for alloc in allocations:
            fd = client.export(alloc["allocation_id"])
            # ... import and map fd ...
    # Keep connection open during inference!
    # client.close() only when done with inference
"""

import logging
import socket
from typing import Dict, List, Optional, Tuple, Type, TypeVar

from gpu_memory_service.common.protocol.messages import (
    AllocateRequest,
    AllocateResponse,
    ClearAllRequest,
    ClearAllResponse,
    CommitRequest,
    CommitResponse,
    ErrorResponse,
    ExportRequest,
    FreeRequest,
    FreeResponse,
    GetAllocationRequest,
    GetAllocationResponse,
    GetAllocationStateRequest,
    GetAllocationStateResponse,
    GetLockStateRequest,
    GetLockStateResponse,
    GetStateHashRequest,
    GetStateHashResponse,
    HandshakeRequest,
    HandshakeResponse,
    ListAllocationsRequest,
    ListAllocationsResponse,
    MetadataDeleteRequest,
    MetadataDeleteResponse,
    MetadataGetRequest,
    MetadataGetResponse,
    MetadataListRequest,
    MetadataListResponse,
    MetadataPutRequest,
    MetadataPutResponse,
)
from gpu_memory_service.common.protocol.wire import recv_message_sync, send_message_sync
from gpu_memory_service.common.types import (
    RW_REQUIRED,
    GrantedLockType,
    RequestedLockType,
)

T = TypeVar("T")

logger = logging.getLogger(__name__)


class GMSRPCClient:
    """GPU Memory Service RPC Client.

    CRITICAL: Socket connection IS the lock.
    - Constructor blocks until lock is acquired
    - close() releases the lock
    - committed property tells readers if weights are valid

    For writers (lock_type=RequestedLockType.RW):
        - Use context manager (with statement) for automatic lock release
        - Call commit() after weights are written
        - Call clear_all() before loading new model

    For readers (lock_type=RequestedLockType.RO):
        - Check committed property after construction
        - Keep connection open during inference lifetime
        - Only call close() when shutting down or allowing weight updates
    """

    def __init__(
        self,
        socket_path: str,
        lock_type: RequestedLockType = RequestedLockType.RO,
        timeout_ms: Optional[int] = None,
    ):
        """Connect to Allocation Server and acquire lock.

        Args:
            socket_path: Path to server's Unix domain socket
            lock_type: Requested lock type (RW, RO, or RW_OR_RO)
            timeout_ms: Timeout in milliseconds for lock acquisition.
                        None means wait indefinitely.

        Raises:
            ConnectionError: If connection fails
            TimeoutError: If timeout_ms expires waiting for lock
        """
        self.socket_path = socket_path
        self._requested_lock_type = lock_type
        self._socket: Optional[socket.socket] = None
        self._recv_buffer = bytearray()
        self._committed = False
        self._granted_lock_type: Optional[GrantedLockType] = None

        # Connect and acquire lock
        self._connect(timeout_ms=timeout_ms)

    def _connect(self, timeout_ms: Optional[int]) -> None:
        """Connect to server and perform handshake (lock acquisition)."""
        self._socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        try:
            self._socket.connect(self.socket_path)
        except FileNotFoundError:
132
133
            self._socket.close()
            self._socket = None
134
135
            raise ConnectionError(f"Server not running at {self.socket_path}") from None
        except Exception as e:
136
137
            self._socket.close()
            self._socket = None
138
139
            raise ConnectionError(f"Failed to connect: {e}") from e

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
        # Handshake I/O — clean up socket on any failure
        try:
            request = HandshakeRequest(
                lock_type=self._requested_lock_type,
                timeout_ms=timeout_ms,
            )
            send_message_sync(self._socket, request)

            # May block waiting for lock
            response, _, self._recv_buffer = recv_message_sync(
                self._socket, self._recv_buffer
            )
        except Exception:
            self._socket.close()
            self._socket = None
            raise
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

        if isinstance(response, ErrorResponse):
            self._socket.close()
            self._socket = None
            raise ConnectionError(f"Handshake error: {response.error}")

        if not isinstance(response, HandshakeResponse):
            self._socket.close()
            self._socket = None
            raise ConnectionError(f"Unexpected response: {type(response)}")

        if not response.success:
            self._socket.close()
            self._socket = None
            raise TimeoutError("Timeout waiting for lock")

        self._committed = response.committed
        # Store granted lock type (may differ from requested for rw_or_ro mode)
        if response.granted_lock_type is not None:
            self._granted_lock_type = response.granted_lock_type
        elif self._requested_lock_type == RequestedLockType.RW:
            self._granted_lock_type = GrantedLockType.RW
        else:
            self._granted_lock_type = GrantedLockType.RO
        logger.info(
            f"Connected with {self._requested_lock_type.value} lock (granted={self._granted_lock_type.value}), "
            f"committed={self._committed}"
        )

    @property
    def committed(self) -> bool:
        """Check if weights are committed (valid)."""
        return self._committed

    @property
    def lock_type(self) -> Optional[GrantedLockType]:
        """Get the lock type actually granted by the server.

        For rw_or_ro mode, this tells you whether RW or RO was granted.
        """
        return self._granted_lock_type

    @property
    def is_connected(self) -> bool:
        """Check if client is connected."""
        return self._socket is not None

    def _send_recv(self, request) -> Tuple[object, int]:
        """Send request and receive response. Returns (response, fd)."""
        if not self._socket:
            raise RuntimeError("Client not connected")

        send_message_sync(self._socket, request)
        response, fd, self._recv_buffer = recv_message_sync(
            self._socket, self._recv_buffer
        )

        if isinstance(response, ErrorResponse):
            raise RuntimeError(f"Server error: {response.error}")

        return response, fd

    def _call(self, request, response_type: Type[T]) -> T:
        """Send request, validate response type, return typed response."""
        if type(request) in RW_REQUIRED and self.lock_type != GrantedLockType.RW:
            raise RuntimeError("Operation requires RW connection")
        response, _ = self._send_recv(request)
        if not isinstance(response, response_type):
            raise RuntimeError(f"Unexpected response: {type(response)}")
        return response

    def get_lock_state(self) -> GetLockStateResponse:
        return self._call(GetLockStateRequest(), GetLockStateResponse)

    def get_allocation_state(self) -> GetAllocationStateResponse:
        return self._call(GetAllocationStateRequest(), GetAllocationStateResponse)

    def is_ready(self) -> bool:
        return self.committed

    def commit(self) -> bool:
        """Commit weights and release RW lock. Returns True on success."""
        if CommitRequest in RW_REQUIRED and self.lock_type != GrantedLockType.RW:
            raise RuntimeError("Operation requires RW connection")

        try:
            response, _ = self._send_recv(CommitRequest())
            ok = isinstance(response, CommitResponse) and response.success
        except (ConnectionResetError, BrokenPipeError, OSError) as e:
            # Server closes RW socket as part of commit
            logger.debug(
                f"Commit saw socket error ({type(e).__name__}); verifying via RO connect"
            )
            self.close()
            try:
                ro = GMSRPCClient(
                    self.socket_path, lock_type=RequestedLockType.RO, timeout_ms=1000
                )
                try:
                    ok = ro.committed
                finally:
                    ro.close()
            except TimeoutError:
                ok = False

        if ok:
            self._committed = True
            self.close()
            logger.info("Committed weights and released RW connection")
            return True

        return False

    def allocate(self, size: int, tag: str = "default") -> Tuple[str, int]:
        """Returns (allocation_id, aligned_size)."""
        r = self._call(AllocateRequest(size=size, tag=tag), AllocateResponse)
        return r.allocation_id, r.aligned_size

    def export(self, allocation_id: str) -> int:
        """Export allocation as POSIX FD. Caller must close."""
        _, fd = self._send_recv(ExportRequest(allocation_id=allocation_id))
        if fd < 0:
            raise RuntimeError("No FD received from server")
        return fd

    def get_allocation(self, allocation_id: str) -> GetAllocationResponse:
        return self._call(
            GetAllocationRequest(allocation_id=allocation_id), GetAllocationResponse
        )

    def list_allocations(self, tag: Optional[str] = None) -> List[Dict]:
        return self._call(
            ListAllocationsRequest(tag=tag), ListAllocationsResponse
        ).allocations

    def free(self, allocation_id: str) -> bool:
        return self._call(
            FreeRequest(allocation_id=allocation_id), FreeResponse
        ).success

    def clear_all(self) -> int:
        return self._call(ClearAllRequest(), ClearAllResponse).cleared_count

    def metadata_put(
        self, key: str, allocation_id: str, offset_bytes: int, value: bytes
    ) -> bool:
        req = MetadataPutRequest(
            key=key, allocation_id=allocation_id, offset_bytes=offset_bytes, value=value
        )
        return self._call(req, MetadataPutResponse).success

    def metadata_get(self, key: str) -> Optional[tuple[str, int, bytes]]:
        """Returns (allocation_id, offset_bytes, value) or None if not found."""
        r = self._call(MetadataGetRequest(key=key), MetadataGetResponse)
        return (r.allocation_id, r.offset_bytes, r.value) if r.found else None

    def metadata_delete(self, key: str) -> bool:
        return self._call(
            MetadataDeleteRequest(key=key), MetadataDeleteResponse
        ).deleted

    def metadata_list(self, prefix: str = "") -> List[str]:
        return self._call(MetadataListRequest(prefix=prefix), MetadataListResponse).keys

    def get_memory_layout_hash(self) -> str:
        """Get state hash (hash of allocations + metadata). Empty if not committed."""
        return self._call(
            GetStateHashRequest(), GetStateHashResponse
        ).memory_layout_hash

    def close(self) -> None:
        """Close connection and release lock."""
        if self._socket:
            try:
                self._socket.close()
            except Exception:
                pass
            self._socket = None
            lock_str = self.lock_type.value if self.lock_type else "unknown"
            logger.info(f"Closed {lock_str} connection")

    def __enter__(self) -> "GMSRPCClient":
        """Context manager entry."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        """Context manager exit."""
        self.close()

    def __del__(self):
        """Destructor: warn if connection not closed."""
        if self._socket:
            logger.warning("GMSRPCClient not closed properly")