__init__.py 1.34 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

"""
TeaCache: Timestep Embedding Aware Cache for diffusion model acceleration.

TeaCache speeds up diffusion inference by reusing transformer block computations
when consecutive timestep embeddings are similar.

This implementation uses a hooks-based approach that requires zero changes to
model code. Model developers only need to add an extractor function to support
new models.

Usage:
    from vllm_omni import Omni

    omni = Omni(
        model="Qwen/Qwen-Image",
        cache_backend="tea_cache",
        cache_config={"rel_l1_thresh": 0.2}
    )
    images = omni.generate("a cat")

    # Alternative: Using environment variable
    # export DIFFUSION_CACHE_BACKEND=tea_cache
"""

from vllm_omni.diffusion.cache.teacache.backend import TeaCacheBackend
from vllm_omni.diffusion.cache.teacache.config import TeaCacheConfig
from vllm_omni.diffusion.cache.teacache.extractors import (
    CacheContext,
    register_extractor,
)
from vllm_omni.diffusion.cache.teacache.hook import TeaCacheHook, apply_teacache_hook
from vllm_omni.diffusion.cache.teacache.state import TeaCacheState

__all__ = [
    "TeaCacheBackend",
    "TeaCacheConfig",
    "TeaCacheState",
    "TeaCacheHook",
    "apply_teacache_hook",
    "register_extractor",
    "CacheContext",
]