Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4636ecaf
Unverified
Commit
4636ecaf
authored
Jan 27, 2026
by
Qi Wang
Committed by
GitHub
Jan 27, 2026
Browse files
feat: implement EncoderCacheManager (#5632)
parent
bc76247d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
427 additions
and
0 deletions
+427
-0
components/src/dynamo/common/memory/__init__.py
components/src/dynamo/common/memory/__init__.py
+8
-0
components/src/dynamo/common/memory/encoder_cache_manager.py
components/src/dynamo/common/memory/encoder_cache_manager.py
+176
-0
components/src/dynamo/common/tests/memory/test_encoder_cache_manager.py
.../dynamo/common/tests/memory/test_encoder_cache_manager.py
+243
-0
No files found.
components/src/dynamo/common/memory/__init__.py
0 → 100644
View file @
4636ecaf
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Memory management utilities for Dynamo components."""
from
dynamo.common.memory.encoder_cache_manager
import
EncoderCacheManager
__all__
=
[
"EncoderCacheManager"
]
components/src/dynamo/common/memory/encoder_cache_manager.py
0 → 100644
View file @
4636ecaf
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Encoder Cache Manager
A simple LRU cache for encoder embeddings (tensors).
Maps content hash keys to tensors with capacity-based eviction.
Usage:
cache = EncoderCacheManager(capacity_bytes=4 * 1024**3) # 4GB
# Store embedding
cache.set("abc123", embedding_tensor)
# Retrieve embedding
tensor = cache.get("abc123") # Returns None if not found
"""
import
logging
from
collections
import
OrderedDict
from
typing
import
Optional
import
torch
logger
=
logging
.
getLogger
(
__name__
)
class
EncoderCacheManager
:
"""
LRU cache for encoder embeddings.
Stores tensors keyed by content hash with automatic eviction
when capacity is exceeded.
Thread Safety:
This class is NOT thread-safe. It is designed to run within a single
thread (e.g., an asyncio event loop). All access must be from the same
thread to avoid race conditions. This is intentional to keep the
implementation simple and avoid locking overhead.
"""
def
__init__
(
self
,
capacity_bytes
:
int
):
"""
Initialize the encoder cache.
Args:
capacity_bytes: Maximum cache capacity in bytes.
"""
if
capacity_bytes
<=
0
:
raise
ValueError
(
"capacity_bytes must be positive"
)
self
.
_cache
:
OrderedDict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
self
.
_capacity_bytes
=
capacity_bytes
self
.
_current_bytes
=
0
# Stats
self
.
_hits
=
0
self
.
_misses
=
0
logger
.
info
(
f
"EncoderCacheManager initialized: capacity=
{
capacity_bytes
/
1024
**
3
:.
2
f
}
GB"
)
@
staticmethod
def
_tensor_size
(
tensor
:
torch
.
Tensor
)
->
int
:
"""Calculate tensor size in bytes.
Args:
tensor: Must be a contiguous tensor.
Returns:
Size of the tensor in bytes.
Raises:
AssertionError: If tensor is not contiguous.
"""
assert
(
tensor
.
is_contiguous
()
),
"Tensor must be contiguous for accurate size calculation"
return
tensor
.
element_size
()
*
tensor
.
numel
()
def
get
(
self
,
key
:
str
)
->
Optional
[
torch
.
Tensor
]:
"""
Get a tensor from the cache.
If found, the entry is moved to the end (most recently used).
Args:
key: Cache key (typically content hash).
Returns:
The cached tensor, or None if not found.
"""
if
key
not
in
self
.
_cache
:
self
.
_misses
+=
1
return
None
# Move to end (most recently used)
self
.
_cache
.
move_to_end
(
key
)
self
.
_hits
+=
1
return
self
.
_cache
[
key
]
def
set
(
self
,
key
:
str
,
tensor
:
torch
.
Tensor
)
->
bool
:
"""
Store a tensor in the cache.
If the key already exists, the old value is replaced.
If adding the tensor would exceed capacity, LRU entries are evicted.
If the tensor itself is larger than capacity, it is not stored.
Args:
key: Cache key (typically content hash).
tensor: Tensor to cache.
Returns:
True if the tensor was stored, False if it was too large.
"""
size
=
self
.
_tensor_size
(
tensor
)
# Don't cache if single tensor exceeds capacity
if
size
>
self
.
_capacity_bytes
:
logger
.
warning
(
f
"Tensor too large to cache:
{
size
/
1024
**
2
:.
1
f
}
MB > "
f
"
{
self
.
_capacity_bytes
/
1024
**
3
:.
2
f
}
GB capacity"
)
return
False
# If key exists, remove old entry first
if
key
in
self
.
_cache
:
old_tensor
=
self
.
_cache
.
pop
(
key
)
self
.
_current_bytes
-=
self
.
_tensor_size
(
old_tensor
)
# Evict LRU entries until we have space
while
self
.
_current_bytes
+
size
>
self
.
_capacity_bytes
and
self
.
_cache
:
evicted_key
,
evicted_tensor
=
self
.
_cache
.
popitem
(
last
=
False
)
evicted_size
=
self
.
_tensor_size
(
evicted_tensor
)
self
.
_current_bytes
-=
evicted_size
logger
.
debug
(
f
"Evicted key=
{
evicted_key
[:
16
]
}
..., size=
{
evicted_size
/
1024
**
2
:.
2
f
}
MB"
)
# Store new entry
self
.
_cache
[
key
]
=
tensor
self
.
_current_bytes
+=
size
logger
.
debug
(
f
"Cached key=
{
key
[:
16
]
if
len
(
key
)
>
16
else
key
}
, "
f
"size=
{
size
/
1024
**
2
:.
2
f
}
MB, "
f
"total=
{
self
.
_current_bytes
/
1024
**
3
:.
3
f
}
GB"
)
return
True
@
property
def
stats
(
self
)
->
dict
:
"""
Get cache statistics.
Returns:
Dictionary with cache stats including entries, memory usage,
hit/miss counts, and hit rate.
"""
total_requests
=
self
.
_hits
+
self
.
_misses
hit_rate
=
self
.
_hits
/
total_requests
if
total_requests
>
0
else
0.0
return
{
"entries"
:
len
(
self
.
_cache
),
"current_bytes"
:
self
.
_current_bytes
,
"capacity_bytes"
:
self
.
_capacity_bytes
,
"utilization"
:
self
.
_current_bytes
/
self
.
_capacity_bytes
if
self
.
_capacity_bytes
>
0
else
0
,
"hits"
:
self
.
_hits
,
"misses"
:
self
.
_misses
,
"hit_rate"
:
hit_rate
,
}
components/src/dynamo/common/tests/memory/test_encoder_cache_manager.py
0 → 100644
View file @
4636ecaf
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Unit tests for EncoderCacheManager."""
import
pytest
import
torch
from
dynamo.common.memory.encoder_cache_manager
import
EncoderCacheManager
class
TestEncoderCacheManagerInit
:
"""Tests for initialization."""
def
test_init_valid_capacity
(
self
):
"""Test initialization with valid capacity."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
)
assert
cache
.
stats
[
"capacity_bytes"
]
==
1024
assert
cache
.
stats
[
"current_bytes"
]
==
0
assert
cache
.
stats
[
"entries"
]
==
0
def
test_init_invalid_capacity_zero
(
self
):
"""Test initialization with zero capacity raises error."""
with
pytest
.
raises
(
ValueError
,
match
=
"capacity_bytes must be positive"
):
EncoderCacheManager
(
capacity_bytes
=
0
)
def
test_init_invalid_capacity_negative
(
self
):
"""Test initialization with negative capacity raises error."""
with
pytest
.
raises
(
ValueError
,
match
=
"capacity_bytes must be positive"
):
EncoderCacheManager
(
capacity_bytes
=-
100
)
class
TestEncoderCacheManagerBasicOperations
:
"""Tests for basic get/set operations."""
def
test_set_and_get
(
self
):
"""Test basic set and get operations."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
# 1MB
tensor
=
torch
.
randn
(
100
,
100
)
# ~40KB for float32
result
=
cache
.
set
(
"key1"
,
tensor
)
assert
result
is
True
retrieved
=
cache
.
get
(
"key1"
)
assert
retrieved
is
not
None
assert
torch
.
equal
(
retrieved
,
tensor
)
def
test_get_nonexistent_key
(
self
):
"""Test get returns None for nonexistent key."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
result
=
cache
.
get
(
"nonexistent"
)
assert
result
is
None
def
test_set_overwrites_existing_key
(
self
):
"""Test set overwrites existing key."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
tensor1
=
torch
.
randn
(
10
,
10
)
tensor2
=
torch
.
randn
(
10
,
10
)
cache
.
set
(
"key1"
,
tensor1
)
cache
.
set
(
"key1"
,
tensor2
)
retrieved
=
cache
.
get
(
"key1"
)
assert
torch
.
equal
(
retrieved
,
tensor2
)
assert
cache
.
stats
[
"entries"
]
==
1
class
TestEncoderCacheManagerLRUEviction
:
"""Tests for LRU eviction behavior."""
def
test_eviction_when_full
(
self
):
"""Test LRU eviction when cache is full."""
# Small capacity to force eviction
tensor_size
=
10
*
10
*
4
# 400 bytes for float32
capacity
=
tensor_size
*
2
+
100
# Room for ~2 tensors
cache
=
EncoderCacheManager
(
capacity_bytes
=
capacity
)
t1
=
torch
.
randn
(
10
,
10
)
t2
=
torch
.
randn
(
10
,
10
)
t3
=
torch
.
randn
(
10
,
10
)
cache
.
set
(
"key1"
,
t1
)
cache
.
set
(
"key2"
,
t2
)
# Adding third should evict first (LRU)
cache
.
set
(
"key3"
,
t3
)
assert
cache
.
get
(
"key1"
)
is
None
# Evicted
assert
cache
.
get
(
"key2"
)
is
not
None
assert
cache
.
get
(
"key3"
)
is
not
None
def
test_get_updates_lru_order
(
self
):
"""Test that get() updates LRU order."""
tensor_size
=
10
*
10
*
4
# 400 bytes
capacity
=
tensor_size
*
2
+
100
# Room for ~2 tensors
cache
=
EncoderCacheManager
(
capacity_bytes
=
capacity
)
t1
=
torch
.
randn
(
10
,
10
)
t2
=
torch
.
randn
(
10
,
10
)
t3
=
torch
.
randn
(
10
,
10
)
cache
.
set
(
"key1"
,
t1
)
cache
.
set
(
"key2"
,
t2
)
# Access key1, making key2 the LRU
cache
.
get
(
"key1"
)
# Adding third should evict key2 (now LRU)
cache
.
set
(
"key3"
,
t3
)
assert
cache
.
get
(
"key1"
)
is
not
None
# Not evicted (recently accessed)
assert
cache
.
get
(
"key2"
)
is
None
# Evicted (LRU)
assert
cache
.
get
(
"key3"
)
is
not
None
def
test_tensor_too_large_for_cache
(
self
):
"""Test that tensor larger than capacity is not cached."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
100
)
# Very small
tensor
=
torch
.
randn
(
100
,
100
)
# ~40KB, way larger than capacity
result
=
cache
.
set
(
"key1"
,
tensor
)
assert
result
is
False
assert
cache
.
get
(
"key1"
)
is
None
assert
cache
.
stats
[
"entries"
]
==
0
class
TestEncoderCacheManagerSizeTracking
:
"""Tests for memory size tracking."""
def
test_current_bytes_tracking
(
self
):
"""Test that current_bytes is tracked correctly."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
t1
=
torch
.
randn
(
10
,
10
)
# 400 bytes
t2
=
torch
.
randn
(
20
,
20
)
# 1600 bytes
expected_size_1
=
t1
.
element_size
()
*
t1
.
numel
()
expected_size_2
=
t2
.
element_size
()
*
t2
.
numel
()
cache
.
set
(
"key1"
,
t1
)
assert
cache
.
stats
[
"current_bytes"
]
==
expected_size_1
cache
.
set
(
"key2"
,
t2
)
assert
cache
.
stats
[
"current_bytes"
]
==
expected_size_1
+
expected_size_2
def
test_size_updated_on_overwrite
(
self
):
"""Test that size is updated correctly when overwriting."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
small_tensor
=
torch
.
randn
(
10
,
10
)
# 400 bytes
large_tensor
=
torch
.
randn
(
20
,
20
)
# 1600 bytes
cache
.
set
(
"key1"
,
small_tensor
)
initial_size
=
cache
.
stats
[
"current_bytes"
]
cache
.
set
(
"key1"
,
large_tensor
)
expected_size
=
large_tensor
.
element_size
()
*
large_tensor
.
numel
()
assert
cache
.
stats
[
"current_bytes"
]
==
expected_size
assert
cache
.
stats
[
"current_bytes"
]
>
initial_size
class
TestEncoderCacheManagerStats
:
"""Tests for statistics tracking."""
def
test_hit_miss_tracking
(
self
):
"""Test hit and miss counting."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
tensor
=
torch
.
randn
(
10
,
10
)
cache
.
set
(
"key1"
,
tensor
)
# Misses
cache
.
get
(
"nonexistent1"
)
cache
.
get
(
"nonexistent2"
)
# Hits
cache
.
get
(
"key1"
)
cache
.
get
(
"key1"
)
cache
.
get
(
"key1"
)
stats
=
cache
.
stats
assert
stats
[
"hits"
]
==
3
assert
stats
[
"misses"
]
==
2
assert
stats
[
"hit_rate"
]
==
3
/
5
def
test_stats_content
(
self
):
"""Test stats dictionary contains expected keys."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
tensor
=
torch
.
randn
(
10
,
10
)
cache
.
set
(
"key1"
,
tensor
)
stats
=
cache
.
stats
assert
"entries"
in
stats
assert
"current_bytes"
in
stats
assert
"capacity_bytes"
in
stats
assert
"utilization"
in
stats
assert
"hits"
in
stats
assert
"misses"
in
stats
assert
"hit_rate"
in
stats
assert
stats
[
"entries"
]
==
1
assert
stats
[
"capacity_bytes"
]
==
1024
*
1024
def
test_utilization_calculation
(
self
):
"""Test utilization is calculated correctly."""
capacity
=
1000
cache
=
EncoderCacheManager
(
capacity_bytes
=
capacity
)
# Create tensor of known size
# float32 = 4 bytes, so 25 elements = 100 bytes
tensor
=
torch
.
zeros
(
25
,
dtype
=
torch
.
float32
)
cache
.
set
(
"key1"
,
tensor
)
stats
=
cache
.
stats
expected_utilization
=
100
/
capacity
assert
abs
(
stats
[
"utilization"
]
-
expected_utilization
)
<
0.001
class
TestEncoderCacheManagerContiguousTensor
:
"""Tests for contiguous tensor requirement."""
def
test_set_contiguous_tensor_succeeds
(
self
):
"""Test that contiguous tensors can be cached."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
tensor
=
torch
.
randn
(
10
,
10
)
assert
tensor
.
is_contiguous
()
result
=
cache
.
set
(
"key1"
,
tensor
)
assert
result
is
True
def
test_set_non_contiguous_tensor_raises
(
self
):
"""Test that non-contiguous tensors raise AssertionError."""
cache
=
EncoderCacheManager
(
capacity_bytes
=
1024
*
1024
)
# Create a non-contiguous tensor via transpose
tensor
=
torch
.
randn
(
10
,
20
).
t
()
assert
not
tensor
.
is_contiguous
()
with
pytest
.
raises
(
AssertionError
,
match
=
"Tensor must be contiguous"
):
cache
.
set
(
"key1"
,
tensor
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment