# Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
# Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
# Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
# Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
waiting_alloc_memory=(
waiting_alloc_memory=(
(np.prod(t.shape)*size_dtype)/256+1)*256*1.2
((np.prod(t.shape)*size_dtype)/256+1)*256*1.2
)
gpu_memory_available=core.gpu_memory_available()
gpu_memory_available=core.gpu_memory_available()
ifgpu_memory_available<waiting_alloc_memory:
ifgpu_memory_available<waiting_alloc_memory:
# Copy param / Tensor to cpu
# Copy param / Tensor to cpu
t_used=t._copy_to(paddle.CPUPlace(),
t_used=t._copy_to(
blocking)# k-v type will error
paddle.CPUPlace(),blocking
)# k-v type will error
# Release mem of t
# Release mem of t
t.value().get_tensor()._clear()
t.value().get_tensor()._clear()
else:
else:
...
@@ -1653,7 +1795,8 @@ class Layer(object):
...
@@ -1653,7 +1795,8 @@ class Layer(object):
# 2. cast param / Tensor to dtype
# 2. cast param / Tensor to dtype
ifdtypeisnotNoneanddtype!=t_used.dtype:
ifdtypeisnotNoneanddtype!=t_used.dtype:
withpaddle.fluid.framework._dygraph_place_guard(
withpaddle.fluid.framework._dygraph_place_guard(
place=t_used.place):
place=t_used.place
):
t_casted=t_used.cast(dtype=dtype)
t_casted=t_used.cast(dtype=dtype)
else:
else:
t_casted=t_used
t_casted=t_used
...
@@ -1671,12 +1814,14 @@ class Layer(object):
...
@@ -1671,12 +1814,14 @@ class Layer(object):
returnt
returnt
def_to_impl(self,
def_to_impl(
device=None,
self,
dtype=None,
device=None,
blocking=None,
dtype=None,
include_sublayers=True,
blocking=None,
floating_only=False):
include_sublayers=True,
floating_only=False,
):
'''
'''
Cast the parameters and buffers of Layer by the give device, dtype and blocking.
Cast the parameters and buffers of Layer by the give device, dtype and blocking.
...
@@ -1689,7 +1834,7 @@ class Layer(object):
...
@@ -1689,7 +1834,7 @@ class Layer(object):
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.
include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.
floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.
floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.