import numpy as np import torch import random import logging import os import torch.multiprocessing as mp import torch.distributed as dist import subprocess import pickle import shutil def check_numpy_to_torch(x): if isinstance(x, np.ndarray): return torch.from_numpy(x).float(), True return x, False def limit_period(val, offset=0.5, period=np.pi): val, is_numpy = check_numpy_to_torch(val) ans = val - torch.floor(val / period + offset) * period return ans.numpy() if is_numpy else ans def drop_info_with_name(info, name): ret_info = {} keep_indices = [i for i, x in enumerate(info['name']) if x != name] for key in info.keys(): ret_info[key] = info[key][keep_indices] return ret_info def rotate_points_along_z(points, angle): """ Args: points: (B, N, 3 + C) angle: (B), angle along z-axis, angle increases x ==> y Returns: """ points, is_numpy = check_numpy_to_torch(points) angle, _ = check_numpy_to_torch(angle) cosa = torch.cos(angle) sina = torch.sin(angle) zeros = angle.new_zeros(points.shape[0]) ones = angle.new_ones(points.shape[0]) rot_matrix = torch.stack(( cosa, sina, zeros, -sina, cosa, zeros, zeros, zeros, ones ), dim=1).view(-1, 3, 3).float() points_rot = torch.matmul(points[:, :, 0:3], rot_matrix) points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1) return points_rot.numpy() if is_numpy else points_rot def mask_points_by_range(points, limit_range): mask = (points[:, 0] >= limit_range[0]) & (points[:, 0] <= limit_range[3]) \ & (points[:, 1] >= limit_range[1]) & (points[:, 1] <= limit_range[4]) return mask def get_voxel_centers(voxel_coords, downsample_times, voxel_size, point_cloud_range): """ Args: voxel_coords: (N, 3) downsample_times: voxel_size: point_cloud_range: Returns: """ assert voxel_coords.shape[1] == 3 voxel_centers = voxel_coords[:, [2, 1, 0]].float() # (xyz) voxel_size = torch.tensor(voxel_size, device=voxel_centers.device).float() * downsample_times pc_range = torch.tensor(point_cloud_range[0:3], device=voxel_centers.device).float() voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range return voxel_centers def create_logger(log_file=None, rank=0, log_level=logging.INFO): logger = logging.getLogger(__name__) logger.setLevel(log_level if rank == 0 else 'ERROR') formatter = logging.Formatter('%(asctime)s %(levelname)5s %(message)s') console = logging.StreamHandler() console.setLevel(log_level if rank == 0 else 'ERROR') console.setFormatter(formatter) logger.addHandler(console) if log_file is not None: file_handler = logging.FileHandler(filename=log_file) file_handler.setLevel(log_level if rank == 0 else 'ERROR') file_handler.setFormatter(formatter) logger.addHandler(file_handler) return logger def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def keep_arrays_by_name(gt_names, used_classes): inds = [i for i, x in enumerate(gt_names) if x in used_classes] inds = np.array(inds, dtype=np.int64) return inds def init_dist_slurm(batch_size, tcp_port, local_rank, backend='nccl'): """ modified from https://github.com/open-mmlab/mmdetection Args: batch_size: tcp_port: backend: Returns: """ proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput('scontrol show hostname {} | head -n1'.format(node_list)) os.environ['MASTER_PORT'] = str(tcp_port) os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend) total_gpus = dist.get_world_size() assert batch_size % total_gpus == 0, 'Batch size should be matched with GPUS: (%d, %d)' % (batch_size, total_gpus) batch_size_each_gpu = batch_size // total_gpus rank = dist.get_rank() return batch_size_each_gpu, rank def init_dist_pytorch(batch_size, tcp_port, local_rank, backend='nccl'): if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') num_gpus = torch.cuda.device_count() torch.cuda.set_device(local_rank % num_gpus) dist.init_process_group( backend=backend, init_method='tcp://127.0.0.1:%d' % tcp_port, rank=local_rank, world_size=num_gpus ) assert batch_size % num_gpus == 0, 'Batch size should be matched with GPUS: (%d, %d)' % (batch_size, num_gpus) batch_size_each_gpu = batch_size // num_gpus rank = dist.get_rank() return batch_size_each_gpu, rank def get_dist_info(): if torch.__version__ < '1.0': initialized = dist._initialized else: if dist.is_available(): initialized = dist.is_initialized() else: initialized = False if initialized: rank = dist.get_rank() world_size = dist.get_world_size() else: rank = 0 world_size = 1 return rank, world_size def merge_results_dist(result_part, size, tmpdir): rank, world_size = get_dist_info() os.makedirs(tmpdir, exist_ok=True) dist.barrier() pickle.dump(result_part, open(os.path.join(tmpdir, 'result_part_{}.pkl'.format(rank)), 'wb')) dist.barrier() if rank != 0: return None part_list = [] for i in range(world_size): part_file = os.path.join(tmpdir, 'result_part_{}.pkl'.format(i)) part_list.append(pickle.load(open(part_file, 'rb'))) ordered_results = [] for res in zip(*part_list): ordered_results.extend(list(res)) ordered_results = ordered_results[:size] shutil.rmtree(tmpdir) return ordered_results