[`PreTrainedModel`] Wrap `cuda` and `to` method correctly (#25206)

wrap `cuda` and `to` method correctly

[`PreTrainedModel`] Wrap `cuda` and `to` method correctly (#25206)
wrap `cuda` and `to` method correctly
59dcea3f · Younes Belkada · GitHub · 67b85f24 · 59dcea3f
Unverified Commit 59dcea3f authored Jul 31, 2023 by Younes Belkada Committed by GitHub Jul 31, 2023
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

src/transformers/modeling_utils.py src/transformers/modeling_utils.py +3 -1

No files found.
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -25,7 +25,7 @@ import tempfile
 import warnings
 from contextlib import contextmanager
 from dataclasses import dataclass
-from functools import partial
+from functools import partial, wraps
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union

 import torch
@@ -1912,6 +1912,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
            mem = mem + mem_bufs
        return mem

+    @wraps(torch.nn.Module.cuda)
    def cuda(self, *args, **kwargs):
        # Checks if the model has been loaded in 8-bit
        if getattr(self, "is_quantized", False):
@@ -1922,6 +1923,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
        else:
            return super().cuda(*args, **kwargs)

+    @wraps(torch.nn.Module.to)
    def to(self, *args, **kwargs):
        # Checks if the model has been loaded in 8-bit
        if getattr(self, "is_quantized", False):