[gemini] polish code (#855)

f0e65455 · HELSON · GitHub · 29159d9b · f0e65455 · f0e65455
Unverified Commit f0e65455 authored Apr 25, 2022 by HELSON Committed by GitHub Apr 25, 2022
Showing with 6 additions and 8 deletions

colossalai/gemini/stateful_tensor_mgr.py colossalai/gemini/stateful_tensor_mgr.py +2 -3

colossalai/gemini/tensor_placement_policy.py colossalai/gemini/tensor_placement_policy.py +4 -5

No files found.
--- a/colossalai/gemini/stateful_tensor_mgr.py
+++ b/colossalai/gemini/stateful_tensor_mgr.py
@@ -42,7 +42,7 @@ class StatefulTensorMgr(object):
        by mem_stats_collector, which should belongs to a Sharded Model.
        """
        # find stateful tensor in state COMPUTE
-        cuda_demand = 0
+        cuda_demand = StatefulTensor.GST_MGR.state_mem['cpu'][TensorState.COMPUTE]
        move_to_cuda_tensor_list = []
        hold_cuda_tensor_list = []
        for tensor in self._stateful_tensor_list:
@@ -55,7 +55,6 @@ class StatefulTensorMgr(object):
            elif tensor.device.type == 'cpu':
                if tensor.state == TensorState.COMPUTE:
                    move_to_cuda_tensor_list.append(tensor)
-                    cuda_demand += colo_tensor_mem_usage(tensor.payload)[1]
            else:
                raise RuntimeError
        self._cpu_gpu_move_volume += self._tensor_placement_policy.evict_tensors(hold_cuda_tensor_list,
@@ -66,7 +65,7 @@ class StatefulTensorMgr(object):
        # move COMPUTE tensors to CUDA
        for t in move_to_cuda_tensor_list:
            colo_model_data_tensor_move_inline(t, get_current_device())
-            self._cpu_gpu_move_volume += t.payload.numel() * t.payload.element_size()
+            self._cpu_gpu_move_volume += t.payload_size

    @property
    def cpu_gpu_move_volume(self):

--- a/colossalai/gemini/tensor_placement_policy.py
+++ b/colossalai/gemini/tensor_placement_policy.py
@@ -76,7 +76,6 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
        Returns:
            int: the volume of memory that is evicted
        """
-        volume = 0
        cuda_capacity = colo_device_memory_capacity(get_current_device())
        used_cuda_model_data = StatefulTensor.GST_MGR.total_mem['cuda']
        if warmup:
@@ -88,11 +87,12 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
            cuda_capacity *= self._steady_cuda_cap_ratio
        total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period
        avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data
+
+        freed_cuda_model_data = 0
        if avail_cuda_model_data < cuda_demand:
            # Move cuda_demand - avail_cuda_model_data volume of tensors
            # to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
            to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
-            freed_cuda_model_data = 0
            to_free_tensor_list = hold_cuda_tensor_list
            if not warmup:
                next_compute_idx = {t: len(compute_list) for t in hold_cuda_tensor_list}
@@ -104,15 +104,14 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
            for t in to_free_tensor_list:
                if freed_cuda_model_data >= to_free_cuda_model_data:
                    break
-                freed_cuda_model_data += colo_tensor_mem_usage(t)[0]
+                freed_cuda_model_data += t.payload_size
                colo_model_data_tensor_move_inline(t, torch.device('cpu'))
-                volume += t.payload.numel() * t.payload.element_size()
            if freed_cuda_model_data < to_free_cuda_model_data:
                raise RuntimeError(
                    f"Adjust layout failed! No enough CUDA memory! Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}"
                )

-        return volume
+        return freed_cuda_model_data


 class TensorPlacementPolicyFactory: