add experimental warning (#9412)

d9e848c1 · Stas Bekman · GitHub · 29acabd8 · d9e848c1 · d9e848c1
Unverified Commit d9e848c1 authored Jan 05, 2021 by Stas Bekman Committed by GitHub Jan 05, 2021
Showing with 4 additions and 0 deletions

src/transformers/models/gpt2/modeling_gpt2.py src/transformers/models/gpt2/modeling_gpt2.py +2 -0

src/transformers/models/t5/modeling_t5.py src/transformers/models/t5/modeling_t5.py +2 -0

No files found.
--- a/src/transformers/models/gpt2/modeling_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_gpt2.py
@@ -480,6 +480,8 @@ GPT2_INPUTS_DOCSTRING = r"""
            Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
 """
 PARALLELIZE_DOCSTRING = r"""
+    This is an experimental feature and is a subject to change at a moment's notice.
    Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
    it will evenly distribute blocks across all devices.

--- a/src/transformers/models/t5/modeling_t5.py
+++ b/src/transformers/models/t5/modeling_t5.py
@@ -179,6 +179,8 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
 # - PreTrainedModel for the models (it-self a sub-class of torch.nn.Module)
 ####################################################
 PARALLELIZE_DOCSTRING = r"""
+    This is an experimental feature and is a subject to change at a moment's notice.
    Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
    it will evenly distribute blocks across all devices.