Unverified Commit c3598d02 authored by Martin Hickey's avatar Martin Hickey Committed by GitHub
Browse files

[Misc] Remove deprecated items that are due for removal (#36006)


Signed-off-by: default avatarMartin Hickey <martin.hickey@ie.ibm.com>
parent 57c629e9
...@@ -92,24 +92,6 @@ class CacheConfig: ...@@ -92,24 +92,6 @@ class CacheConfig:
benefits before turning this on.\n benefits before turning this on.\n
- "xxhash_cbor" combines canonical CBOR serialization with xxHash for - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
reproducible hashing. Requires the optional ``xxhash`` package.""" reproducible hashing. Requires the optional ``xxhash`` package."""
cpu_offload_gb: float = Field(default=0, ge=0)
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
no offloading. Intuitively, this argument can be seen as a virtual way to
increase the GPU memory size. For example, if you have one 24 GB GPU and
set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
Note that this requires fast CPU-GPU interconnect, as part of the model is
loaded from CPU memory to GPU memory on the fly in each model forward pass.
DEPRECATED: This field is deprecated and will be removed in v0.16.
Please use OffloadConfig.uva.cpu_offload_gb instead.
"""
cpu_offload_params: set[str] = Field(default_factory=set)
"""The set of parameter name segments to target for CPU offloading.
DEPRECATED: This field is deprecated and will be removed in v0.16.
Please use OffloadConfig.uva.cpu_offload_params instead.
"""
calculate_kv_scales: bool = False calculate_kv_scales: bool = False
"""This enables dynamic calculation of `k_scale` and `v_scale` when """This enables dynamic calculation of `k_scale` and `v_scale` when
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
......
...@@ -381,13 +381,6 @@ class CompilationConfig: ...@@ -381,13 +381,6 @@ class CompilationConfig:
certain small batchsizes, where inductor is good at optimizing. certain small batchsizes, where inductor is good at optimizing.
""" """
# Top-level Compilation control
level: int = Field(default=None)
"""
Level is deprecated and will be removed in the next release,
either 0.12.0 or 0.11.2 whichever is soonest.
Please use mode. Currently all levels are mapped to mode.
"""
# Top-level Compilation control # Top-level Compilation control
mode: CompilationMode = Field(default=None) mode: CompilationMode = Field(default=None)
"""The compilation approach used for torch.compile-based compilation of the """The compilation approach used for torch.compile-based compilation of the
...@@ -801,17 +794,6 @@ class CompilationConfig: ...@@ -801,17 +794,6 @@ class CompilationConfig:
return handler(value) return handler(value)
def __post_init__(self) -> None: def __post_init__(self) -> None:
if self.level is not None:
logger.warning(
"Level is deprecated and will be removed in the next release,"
"either 0.12.0 or 0.11.2 whichever is soonest."
"Use mode instead."
"If both level and mode are given,"
"only mode will be used."
)
if self.mode is None:
self.mode = self.level
count_none = self.custom_ops.count("none") count_none = self.custom_ops.count("none")
count_all = self.custom_ops.count("all") count_all = self.custom_ops.count("all")
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'" assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
......
...@@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): ...@@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
mm_items.get_all_counts(), mm_items.get_all_counts(),
) )
for modality, prompt_updates in mm_prompt_updates.items():
for item_idx, item_prompt_updates in enumerate(prompt_updates):
if len(item_prompt_updates) > 1:
logger.warning_once(
"Detected %d prompt updates for `mm_items[%r][%s]`. "
"Multiple prompt updates per item is now "
"deprecated and may be removed in v0.13. "
"Instead, please specify dynamic update targets "
"in the same prompt update definition by passing "
"a function to `PromptUpdate.target`.",
len(prompt_updates),
modality,
item_idx,
)
return mm_prompt_updates return mm_prompt_updates
def _find_mm_placeholders( def _find_mm_placeholders(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment