# Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
# Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
waiting_alloc_memory=(
(np.prod(t.shape)*size_dtype)/256+1)*256*1.2
((np.prod(t.shape)*size_dtype)/256+1)*256*1.2
)
gpu_memory_available=core.gpu_memory_available()
ifgpu_memory_available<waiting_alloc_memory:
# Copy param / Tensor to cpu
t_used=t._copy_to(paddle.CPUPlace(),
blocking)# k-v type will error
t_used=t._copy_to(
paddle.CPUPlace(),blocking
)# k-v type will error
# Release mem of t
t.value().get_tensor()._clear()
else:
...
...
@@ -1653,7 +1795,8 @@ class Layer(object):
# 2. cast param / Tensor to dtype
ifdtypeisnotNoneanddtype!=t_used.dtype:
withpaddle.fluid.framework._dygraph_place_guard(
place=t_used.place):
place=t_used.place
):
t_casted=t_used.cast(dtype=dtype)
else:
t_casted=t_used
...
...
@@ -1671,12 +1814,14 @@ class Layer(object):
returnt
def_to_impl(self,
device=None,
dtype=None,
blocking=None,
include_sublayers=True,
floating_only=False):
def_to_impl(
self,
device=None,
dtype=None,
blocking=None,
include_sublayers=True,
floating_only=False,
):
'''
Cast the parameters and buffers of Layer by the give device, dtype and blocking.
...
...
@@ -1689,7 +1834,7 @@ class Layer(object):
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.
floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.