"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "7b0ba4820a7546520da4b099fc6c523d5b6d3383"
Unverified Commit c60a17b6 authored by Zaida Zhou's avatar Zaida Zhou Committed by GitHub
Browse files

[Docs] Fix the format of return (#1462)

* [Docs] Fix the format of return

* replace List with list

* format the documentation of optimizer

* Update ops docstring (#2

)

* update ops docstring

* fix typos
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>
parent 44e7eee8
...@@ -246,7 +246,7 @@ class PetrelBackend(BaseStorageBackend): ...@@ -246,7 +246,7 @@ class PetrelBackend(BaseStorageBackend):
Returns: Returns:
bool: Return ``True`` if ``filepath`` points to a directory, bool: Return ``True`` if ``filepath`` points to a directory,
``False`` otherwise. ``False`` otherwise.
""" """
if not has_method(self._client, 'isdir'): if not has_method(self._client, 'isdir'):
raise NotImplementedError( raise NotImplementedError(
...@@ -266,7 +266,7 @@ class PetrelBackend(BaseStorageBackend): ...@@ -266,7 +266,7 @@ class PetrelBackend(BaseStorageBackend):
Returns: Returns:
bool: Return ``True`` if ``filepath`` points to a file, ``False`` bool: Return ``True`` if ``filepath`` points to a file, ``False``
otherwise. otherwise.
""" """
if not has_method(self._client, 'contains'): if not has_method(self._client, 'contains'):
raise NotImplementedError( raise NotImplementedError(
...@@ -598,7 +598,7 @@ class HardDiskBackend(BaseStorageBackend): ...@@ -598,7 +598,7 @@ class HardDiskBackend(BaseStorageBackend):
Returns: Returns:
bool: Return ``True`` if ``filepath`` points to a directory, bool: Return ``True`` if ``filepath`` points to a directory,
``False`` otherwise. ``False`` otherwise.
""" """
return osp.isdir(filepath) return osp.isdir(filepath)
...@@ -610,7 +610,7 @@ class HardDiskBackend(BaseStorageBackend): ...@@ -610,7 +610,7 @@ class HardDiskBackend(BaseStorageBackend):
Returns: Returns:
bool: Return ``True`` if ``filepath`` points to a file, ``False`` bool: Return ``True`` if ``filepath`` points to a file, ``False``
otherwise. otherwise.
""" """
return osp.isfile(filepath) return osp.isfile(filepath)
...@@ -839,8 +839,8 @@ class FileClient: ...@@ -839,8 +839,8 @@ class FileClient:
's3' 's3'
Returns: Returns:
str | None: Return the prefix of uri if the uri contains '://' str | None: Return the prefix of uri if the uri contains '://' else
else ``None``. ``None``.
""" """
assert is_filepath(uri) assert is_filepath(uri)
uri = str(uri) uri = str(uri)
...@@ -987,7 +987,7 @@ class FileClient: ...@@ -987,7 +987,7 @@ class FileClient:
Returns: Returns:
bytes | memoryview: Expected bytes object or a memory view of the bytes | memoryview: Expected bytes object or a memory view of the
bytes object. bytes object.
""" """
return self.client.get(filepath) return self.client.get(filepath)
...@@ -1060,7 +1060,7 @@ class FileClient: ...@@ -1060,7 +1060,7 @@ class FileClient:
Returns: Returns:
bool: Return ``True`` if ``filepath`` points to a directory, bool: Return ``True`` if ``filepath`` points to a directory,
``False`` otherwise. ``False`` otherwise.
""" """
return self.client.isdir(filepath) return self.client.isdir(filepath)
......
...@@ -84,9 +84,12 @@ class AssignScoreWithK(Function): ...@@ -84,9 +84,12 @@ class AssignScoreWithK(Function):
grad_out (torch.Tensor): (B, out_dim, npoint, K) grad_out (torch.Tensor): (B, out_dim, npoint, K)
Returns: Returns:
grad_scores (torch.Tensor): (B, npoint, K, M) tuple[torch.Tensor]: A tuple contains five elements. The first one
grad_point_features (torch.Tensor): (B, N, M, out_dim) is the gradient of ``scores`` whose shape is (B, npoint, K, M). The
grad_center_features (torch.Tensor): (B, N, M, out_dim) second is the gradient of ``point_features`` whose shape is
(B, N, M, out_dim). The third is the gradient of
``center_features`` with the shape of (B, N, M, out_dim). The last
two are ``None``.
""" """
_, point_features, center_features, scores, knn_idx = ctx.saved_tensors _, point_features, center_features, scores, knn_idx = ctx.saved_tensors
......
...@@ -19,11 +19,12 @@ class BallQuery(Function): ...@@ -19,11 +19,12 @@ class BallQuery(Function):
max_radius (float): maximum radius of the balls. max_radius (float): maximum radius of the balls.
sample_num (int): maximum number of features in the balls. sample_num (int): maximum number of features in the balls.
xyz (Tensor): (B, N, 3) xyz coordinates of the features. xyz (Tensor): (B, N, 3) xyz coordinates of the features.
center_xyz (Tensor): (B, npoint, 3) centers of the ball query. center_xyz (torch.Tensor): (B, npoint, 3) centers of the ball
query.
Returns: Returns:
Tensor: (B, npoint, nsample) tensor with the indices of torch.Tensor: (B, npoint, nsample) tensor with the indices of the
the features that form the query balls. features that form the query balls.
""" """
assert center_xyz.is_contiguous() assert center_xyz.is_contiguous()
assert xyz.is_contiguous() assert xyz.is_contiguous()
......
...@@ -12,14 +12,16 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): ...@@ -12,14 +12,16 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
bboxes1 and bboxes2. bboxes1 and bboxes2.
Args: Args:
bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format or empty. bboxes1 (torch.Tensor): shape (m, 4) in <x1, y1, x2, y2> format or
bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format or empty. empty.
If aligned is ``True``, then m and n must be equal. bboxes2 (torch.Tensor): shape (n, 4) in <x1, y1, x2, y2> format or
empty. If aligned is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union) or iof (intersection over mode (str): "iou" (intersection over union) or iof (intersection over
foreground). foreground).
Returns: Returns:
ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
``False``, the shape of ious is (m, n) else (m, 1).
Example: Example:
>>> bboxes1 = torch.FloatTensor([ >>> bboxes1 = torch.FloatTensor([
......
...@@ -72,17 +72,17 @@ class BorderAlign(nn.Module): ...@@ -72,17 +72,17 @@ class BorderAlign(nn.Module):
For each border line (e.g. top, left, bottom or right) of each box, For each border line (e.g. top, left, bottom or right) of each box,
border_align does the following: border_align does the following:
1. uniformly samples `pool_size`+1 positions on this line, involving \
the start and end points. 1. uniformly samples ``pool_size`` +1 positions on this line, involving
2. the corresponding features on these points are computed by \ the start and end points.
bilinear interpolation. 2. the corresponding features on these points are computed by bilinear
3. max pooling over all the `pool_size`+1 positions are used for \ interpolation.
computing pooled feature. 3. max pooling over all the ``pool_size`` +1 positions are used for
computing pooled feature.
Args: Args:
pool_size (int): number of positions sampled over the boxes' borders pool_size (int): number of positions sampled over the boxes' borders
(e.g. top, bottom, left, right). (e.g. top, bottom, left, right).
""" """
def __init__(self, pool_size): def __init__(self, pool_size):
...@@ -98,8 +98,8 @@ class BorderAlign(nn.Module): ...@@ -98,8 +98,8 @@ class BorderAlign(nn.Module):
boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).
Returns: Returns:
Tensor: Pooled features with shape [N,C,H*W,4]. The order is torch.Tensor: Pooled features with shape [N,C,H*W,4]. The order is
(top,left,bottom,right) for the last dimension. (top,left,bottom,right) for the last dimension.
""" """
return border_align(input, boxes, self.pool_size) return border_align(input, boxes, self.pool_size)
......
...@@ -14,18 +14,19 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): ...@@ -14,18 +14,19 @@ def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False):
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
bboxes1 and bboxes2. bboxes1 and bboxes2.
Arguments: Args:
boxes1 (Tensor): rotated bboxes 1. \ boxes1 (torch.Tensor): rotated bboxes 1. It has shape (N, 5),
It has shape (N, 5), indicating (x, y, w, h, theta) for each row. indicating (x, y, w, h, theta) for each row. Note that theta is in
Note that theta is in radian. radian.
boxes2 (Tensor): rotated bboxes 2. \ boxes2 (torch.Tensor): rotated bboxes 2. It has shape (M, 5),
It has shape (M, 5), indicating (x, y, w, h, theta) for each row. indicating (x, y, w, h, theta) for each row. Note that theta is in
Note that theta is in radian. radian.
mode (str): "iou" (intersection over union) or iof (intersection over mode (str): "iou" (intersection over union) or iof (intersection over
foreground). foreground).
Returns: Returns:
ious(Tensor): shape (N, M) if aligned == False else shape (N,) torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
``False``, the shape of ious is (N, M) else (N,).
""" """
assert mode in ['iou', 'iof'] assert mode in ['iou', 'iof']
mode_dict = {'iou': 0, 'iof': 1} mode_dict = {'iou': 0, 'iof': 1}
......
...@@ -180,7 +180,8 @@ carafe = CARAFEFunction.apply ...@@ -180,7 +180,8 @@ carafe = CARAFEFunction.apply
class CARAFE(Module): class CARAFE(Module):
""" CARAFE: Content-Aware ReAssembly of FEatures """ CARAFE: Content-Aware ReAssembly of FEatures
Please refer to https://arxiv.org/abs/1905.02188 for more details. Please refer to `CARAFE: Content-Aware ReAssembly of FEatures
<https://arxiv.org/abs/1905.02188>`_ for more details.
Args: Args:
kernel_size (int): reassemble kernel size kernel_size (int): reassemble kernel size
...@@ -211,8 +212,8 @@ class CARAFEPack(nn.Module): ...@@ -211,8 +212,8 @@ class CARAFEPack(nn.Module):
compressor 2) content encoder 3) CARAFE op. compressor 2) content encoder 3) CARAFE op.
Official implementation of ICCV 2019 paper Official implementation of ICCV 2019 paper
CARAFE: Content-Aware ReAssembly of FEatures `CARAFE: Content-Aware ReAssembly of FEatures
Please refer to https://arxiv.org/abs/1905.02188 for more details. <https://arxiv.org/abs/1905.02188>`_.
Args: Args:
channels (int): input feature channels channels (int): input feature channels
......
...@@ -53,10 +53,11 @@ class CrissCrossAttention(nn.Module): ...@@ -53,10 +53,11 @@ class CrissCrossAttention(nn.Module):
"""forward function of Criss-Cross Attention. """forward function of Criss-Cross Attention.
Args: Args:
x (Tensor): Input feature. \ x (torch.Tensor): Input feature with the shape of
shape (batch_size, in_channels, height, width) (batch_size, in_channels, height, width).
Returns: Returns:
Tensor: Output of the layer, with shape of \ torch.Tensor: Output of the layer, with the shape of
(batch_size, in_channels, height, width) (batch_size, in_channels, height, width)
""" """
B, C, H, W = x.size() B, C, H, W = x.size()
......
...@@ -12,16 +12,16 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, ...@@ -12,16 +12,16 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
"""Expand kernel contours so that foreground pixels are assigned into """Expand kernel contours so that foreground pixels are assigned into
instances. instances.
Arguments: Args:
kernel_mask (np.array or Tensor): The instance kernel mask with kernel_mask (np.array or torch.Tensor): The instance kernel mask with
size hxw. size hxw.
internal_kernel_label (np.array or Tensor): The instance internal internal_kernel_label (np.array or torch.Tensor): The instance internal
kernel label with size hxw. kernel label with size hxw.
min_kernel_area (int): The minimum kernel area. min_kernel_area (int): The minimum kernel area.
kernel_num (int): The instance kernel number. kernel_num (int): The instance kernel number.
Returns: Returns:
label (list): The instance index map with size hxw. list: The instance index map with size hxw.
""" """
assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
......
...@@ -104,11 +104,13 @@ class CornerPool(nn.Module): ...@@ -104,11 +104,13 @@ class CornerPool(nn.Module):
Corner Pooling is a new type of pooling layer that helps a Corner Pooling is a new type of pooling layer that helps a
convolutional network better localize corners of bounding boxes. convolutional network better localize corners of bounding boxes.
Please refer to https://arxiv.org/abs/1808.01244 for more details. Please refer to `CornerNet: Detecting Objects as Paired Keypoints
<https://arxiv.org/abs/1808.01244>`_ for more details.
Code is modified from https://github.com/princeton-vl/CornerNet-Lite. Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
Args: Args:
mode(str): Pooling orientation for the pooling layer mode (str): Pooling orientation for the pooling layer
- 'bottom': Bottom Pooling - 'bottom': Bottom Pooling
- 'left': Left Pooling - 'left': Left Pooling
......
...@@ -18,11 +18,11 @@ class FurthestPointSampling(Function): ...@@ -18,11 +18,11 @@ class FurthestPointSampling(Function):
num_points: int) -> torch.Tensor: num_points: int) -> torch.Tensor:
""" """
Args: Args:
points_xyz (Tensor): (B, N, 3) where N > num_points. points_xyz (torch.Tensor): (B, N, 3) where N > num_points.
num_points (int): Number of points in the sampled set. num_points (int): Number of points in the sampled set.
Returns: Returns:
Tensor: (B, num_points) indices of the sampled points. torch.Tensor: (B, num_points) indices of the sampled points.
""" """
assert points_xyz.is_contiguous() assert points_xyz.is_contiguous()
...@@ -56,11 +56,12 @@ class FurthestPointSamplingWithDist(Function): ...@@ -56,11 +56,12 @@ class FurthestPointSamplingWithDist(Function):
num_points: int) -> torch.Tensor: num_points: int) -> torch.Tensor:
""" """
Args: Args:
points_dist (Tensor): (B, N, N) Distance between each point pair. points_dist (torch.Tensor): (B, N, N) Distance between each point
pair.
num_points (int): Number of points in the sampled set. num_points (int): Number of points in the sampled set.
Returns: Returns:
Tensor: (B, num_points) indices of the sampled points. torch.Tensor: (B, num_points) indices of the sampled points.
""" """
assert points_dist.is_contiguous() assert points_dist.is_contiguous()
......
...@@ -191,7 +191,8 @@ class FusedBiasLeakyReLU(nn.Module): ...@@ -191,7 +191,8 @@ class FusedBiasLeakyReLU(nn.Module):
"""Fused bias leaky ReLU. """Fused bias leaky ReLU.
This function is introduced in the StyleGAN2: This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958 `Analyzing and Improving the Image Quality of StyleGAN
<http://arxiv.org/abs/1912.04958>`_
The bias term comes from the convolution operation. In addition, to keep The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a the variance of the feature map or gradients unchanged, they also adopt a
...@@ -226,7 +227,8 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): ...@@ -226,7 +227,8 @@ def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
"""Fused bias leaky ReLU function. """Fused bias leaky ReLU function.
This function is introduced in the StyleGAN2: This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958 `Analyzing and Improving the Image Quality of StyleGAN
<http://arxiv.org/abs/1912.04958>`_
The bias term comes from the convolution operation. In addition, to keep The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a the variance of the feature map or gradients unchanged, they also adopt a
......
...@@ -15,11 +15,11 @@ class GatherPoints(Function): ...@@ -15,11 +15,11 @@ class GatherPoints(Function):
indices: torch.Tensor) -> torch.Tensor: indices: torch.Tensor) -> torch.Tensor:
""" """
Args: Args:
features (Tensor): (B, C, N) features to gather. features (torch.Tensor): (B, C, N) features to gather.
indices (Tensor): (B, M) where M is the number of points. indices (torch.Tensor): (B, M) where M is the number of points.
Returns: Returns:
Tensor: (B, C, M) where M is the number of points. torch.Tensor: (B, C, M) where M is the number of points.
""" """
assert features.is_contiguous() assert features.is_contiguous()
assert indices.is_contiguous() assert indices.is_contiguous()
......
...@@ -67,12 +67,16 @@ class QueryAndGroup(nn.Module): ...@@ -67,12 +67,16 @@ class QueryAndGroup(nn.Module):
def forward(self, points_xyz, center_xyz, features=None): def forward(self, points_xyz, center_xyz, features=None):
""" """
Args: Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the
center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. points.
features (Tensor): (B, C, N) Descriptors of the features. center_xyz (torch.Tensor): (B, npoint, 3) coordinates of the
centriods.
features (torch.Tensor): (B, C, N) The features of grouped
points.
Returns: Returns:
Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. torch.Tensor: (B, 3 + C, npoint, sample_num) Grouped
concatenated coordinates and features of points.
""" """
# if self.max_radius is None, we will perform kNN instead of ball query # if self.max_radius is None, we will perform kNN instead of ball query
# idx is of shape [B, npoint, sample_num] # idx is of shape [B, npoint, sample_num]
......
...@@ -17,7 +17,7 @@ def boxes_iou_bev(boxes_a, boxes_b): ...@@ -17,7 +17,7 @@ def boxes_iou_bev(boxes_a, boxes_b):
boxes_b (torch.Tensor): Input boxes b with shape (N, 5). boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
Returns: Returns:
ans_iou (torch.Tensor): IoU result with shape (M, N). torch.Tensor: IoU result with shape (M, N).
""" """
ans_iou = boxes_a.new_zeros( ans_iou = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
......
...@@ -8,6 +8,7 @@ ext_module = ext_loader.load_ext('_ext', ['knn_forward']) ...@@ -8,6 +8,7 @@ ext_module = ext_loader.load_ext('_ext', ['knn_forward'])
class KNN(Function): class KNN(Function):
r"""KNN (CUDA) based on heap data structure. r"""KNN (CUDA) based on heap data structure.
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/ Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
scene_seg/lib/pointops/src/knnquery_heap>`_. scene_seg/lib/pointops/src/knnquery_heap>`_.
...@@ -23,10 +24,10 @@ class KNN(Function): ...@@ -23,10 +24,10 @@ class KNN(Function):
""" """
Args: Args:
k (int): number of nearest neighbors. k (int): number of nearest neighbors.
xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). xyz (torch.Tensor): (B, N, 3) if transposed == False, else
xyz coordinates of the features. (B, 3, N). xyz coordinates of the features.
center_xyz (Tensor, optional): (B, npoint, 3) if transposed == center_xyz (torch.Tensor, optional): (B, npoint, 3) if transposed
False, else (B, 3, npoint). centers of the knn query. is False, else (B, 3, npoint). centers of the knn query.
Default: None. Default: None.
transposed (bool, optional): whether the input tensors are transposed (bool, optional): whether the input tensors are
transposed. Should not explicitly use this keyword when transposed. Should not explicitly use this keyword when
...@@ -34,8 +35,8 @@ class KNN(Function): ...@@ -34,8 +35,8 @@ class KNN(Function):
Default: False. Default: False.
Returns: Returns:
Tensor: (B, k, npoint) tensor with the indices of torch.Tensor: (B, k, npoint) tensor with the indices of the
the features that form k-nearest neighbours. features that form k-nearest neighbours.
""" """
assert (k > 0) & (k < 100), 'k should be in range(0, 100)' assert (k > 0) & (k < 100), 'k should be in range(0, 100)'
......
...@@ -25,22 +25,22 @@ class MultiScaleDeformableAttnFunction(Function): ...@@ -25,22 +25,22 @@ class MultiScaleDeformableAttnFunction(Function):
"""GPU version of multi-scale deformable attention. """GPU version of multi-scale deformable attention.
Args: Args:
value (Tensor): The value has shape value (torch.Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads) (bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of value_spatial_shapes (torch.Tensor): Spatial shape of
each feature map, has shape (num_levels, 2), each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w) last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points, sampling_locations (torch.Tensor): The location of sampling points,
has shape has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2), (bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y). the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used attention_weights (torch.Tensor): The weight of sampling points
when calculate the attention, has shape used when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points), (bs ,num_queries, num_heads, num_levels, num_points),
im2col_step (Tensor): The step used in image to column. im2col_step (Tensor): The step used in image to column.
Returns: Returns:
Tensor: has shape (bs, num_queries, embed_dims) torch.Tensor: has shape (bs, num_queries, embed_dims)
""" """
ctx.im2col_step = im2col_step ctx.im2col_step = im2col_step
...@@ -62,12 +62,10 @@ class MultiScaleDeformableAttnFunction(Function): ...@@ -62,12 +62,10 @@ class MultiScaleDeformableAttnFunction(Function):
"""GPU version of backward function. """GPU version of backward function.
Args: Args:
grad_output (Tensor): Gradient grad_output (torch.Tensor): Gradient of output tensor of forward.
of output tensor of forward.
Returns: Returns:
Tuple[Tensor]: Gradient tuple[Tensor]: Gradient of input tensors in forward.
of input tensors in forward.
""" """
value, value_spatial_shapes, value_level_start_index,\ value, value_spatial_shapes, value_level_start_index,\
sampling_locations, attention_weights = ctx.saved_tensors sampling_locations, attention_weights = ctx.saved_tensors
...@@ -96,21 +94,21 @@ def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, ...@@ -96,21 +94,21 @@ def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
"""CPU version of multi-scale deformable attention. """CPU version of multi-scale deformable attention.
Args: Args:
value (Tensor): The value has shape value (torch.Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads) (bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of value_spatial_shapes (torch.Tensor): Spatial shape of
each feature map, has shape (num_levels, 2), each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w) last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points, sampling_locations (torch.Tensor): The location of sampling points,
has shape has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2), (bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y). the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used attention_weights (torch.Tensor): The weight of sampling points used
when calculate the attention, has shape when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points), (bs ,num_queries, num_heads, num_levels, num_points),
Returns: Returns:
Tensor: has shape (bs, num_queries, embed_dims) torch.Tensor: has shape (bs, num_queries, embed_dims)
""" """
bs, _, num_heads, embed_dims = value.shape bs, _, num_heads, embed_dims = value.shape
...@@ -263,37 +261,38 @@ class MultiScaleDeformableAttention(BaseModule): ...@@ -263,37 +261,38 @@ class MultiScaleDeformableAttention(BaseModule):
"""Forward Function of MultiScaleDeformAttention. """Forward Function of MultiScaleDeformAttention.
Args: Args:
query (Tensor): Query of Transformer with shape query (torch.Tensor): Query of Transformer with shape
(num_query, bs, embed_dims). (num_query, bs, embed_dims).
key (Tensor): The key tensor with shape key (torch.Tensor): The key tensor with shape
`(num_key, bs, embed_dims)`. `(num_key, bs, embed_dims)`.
value (Tensor): The value tensor with shape value (torch.Tensor): The value tensor with shape
`(num_key, bs, embed_dims)`. `(num_key, bs, embed_dims)`.
identity (Tensor): The tensor used for addition, with the identity (torch.Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None, same shape as `query`. Default None. If None,
`query` will be used. `query` will be used.
query_pos (Tensor): The positional encoding for `query`. query_pos (torch.Tensor): The positional encoding for `query`.
Default: None. Default: None.
key_pos (Tensor): The positional encoding for `key`. Default key_pos (torch.Tensor): The positional encoding for `key`. Default
None. None.
reference_points (Tensor): The normalized reference reference_points (torch.Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2), points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0), all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area. bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to additional two dimensions is (w, h) to
form reference boxes. form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with key_padding_mask (torch.Tensor): ByteTensor for `query`, with
shape [bs, num_key]. shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in spatial_shapes (torch.Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2), different levels. With shape (num_levels, 2),
last dimension represents (h, w). last dimension represents (h, w).
level_start_index (Tensor): The start index of each level. level_start_index (torch.Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns: Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims]. torch.Tensor: forwarded results with shape
[num_query, bs, embed_dims].
""" """
if value is None: if value is None:
......
...@@ -131,8 +131,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): ...@@ -131,8 +131,8 @@ def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
max_num (int): maximum number of boxes after NMS. max_num (int): maximum number of boxes after NMS.
Returns: Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \ tuple: kept dets (boxes and scores) and indice, which always have
same data type as the input. the same data type as the input.
Example: Example:
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
...@@ -190,7 +190,7 @@ def soft_nms(boxes, ...@@ -190,7 +190,7 @@ def soft_nms(boxes,
The input can be either a torch tensor or numpy array. The input can be either a torch tensor or numpy array.
The returned type will always be the same as inputs. The returned type will always be the same as inputs.
Arguments: Args:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ). scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS. iou_threshold (float): IoU threshold for NMS.
...@@ -200,8 +200,8 @@ def soft_nms(boxes, ...@@ -200,8 +200,8 @@ def soft_nms(boxes,
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
Returns: Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \ tuple: kept dets (boxes and scores) and indice, which always have
same data type as the input. the same data type as the input.
Example: Example:
>>> boxes = np.array([[4., 3., 5., 3.], >>> boxes = np.array([[4., 3., 5., 3.],
...@@ -258,15 +258,15 @@ def soft_nms(boxes, ...@@ -258,15 +258,15 @@ def soft_nms(boxes,
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
"""Performs non-maximum suppression in a batched fashion. r"""Performs non-maximum suppression in a batched fashion.
Modified from https://github.com/pytorch/vision/blob Modified from https://github.com/pytorch/vision/blob\
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
In order to perform NMS independently per class, we add an offset to all In order to perform NMS independently per class, we add an offset to all
the boxes. The offset is dependent only on the class idx, and is large the boxes. The offset is dependent only on the class idx, and is large
enough so that boxes from different classes do not overlap. enough so that boxes from different classes do not overlap.
Arguments: Args:
boxes (torch.Tensor): boxes in shape (N, 4). boxes (torch.Tensor): boxes in shape (N, 4).
scores (torch.Tensor): scores in shape (N, ). scores (torch.Tensor): scores in shape (N, ).
idxs (torch.Tensor): each index value correspond to a bbox cluster, idxs (torch.Tensor): each index value correspond to a bbox cluster,
...@@ -343,14 +343,14 @@ def nms_match(dets, iou_threshold): ...@@ -343,14 +343,14 @@ def nms_match(dets, iou_threshold):
record the indice of suppressed bbox and form a group with the indice of record the indice of suppressed bbox and form a group with the indice of
kept bbox. In each group, indice is sorted as score order. kept bbox. In each group, indice is sorted as score order.
Arguments: Args:
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
iou_thr (float): IoU thresh for NMS. iou_thr (float): IoU thresh for NMS.
Returns: Returns:
List[torch.Tensor | np.ndarray]: The outer list corresponds different list[torch.Tensor | np.ndarray]: The outer list corresponds different
matched group, the inner Tensor corresponds the indices for a group matched group, the inner Tensor corresponds the indices for a group
in score order. in score order.
""" """
if dets.shape[0] == 0: if dets.shape[0] == 0:
matched = [] matched = []
...@@ -381,15 +381,15 @@ def nms_rotated(dets, scores, iou_threshold, labels=None): ...@@ -381,15 +381,15 @@ def nms_rotated(dets, scores, iou_threshold, labels=None):
IoU greater than iou_threshold with another (higher scoring) rotated box. IoU greater than iou_threshold with another (higher scoring) rotated box.
Args: Args:
boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to
be in (x_ctr, y_ctr, width, height, angle_radian) format. be in (x_ctr, y_ctr, width, height, angle_radian) format.
scores (Tensor): scores in shape (N, ). scores (Tensor): scores in shape (N, ).
iou_threshold (float): IoU thresh for NMS. iou_threshold (float): IoU thresh for NMS.
labels (Tensor): boxes' label in shape (N,). labels (Tensor): boxes' label in shape (N,).
Returns: Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \ tuple: kept dets(boxes and scores) and indice, which is always the
same data type as the input. same data type as the input.
""" """
if dets.shape[0] == 0: if dets.shape[0] == 0:
return dets, None return dets, None
......
...@@ -13,21 +13,22 @@ def pixel_group(score, mask, embedding, kernel_label, kernel_contour, ...@@ -13,21 +13,22 @@ def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
methods. methods.
Arguments: Arguments:
score (np.array or Tensor): The foreground score with size hxw. score (np.array or torch.Tensor): The foreground score with size hxw.
mask (np.array or Tensor): The foreground mask with size hxw. mask (np.array or Tensor): The foreground mask with size hxw.
embedding (np.array or Tensor): The embedding with size hxwxc to embedding (np.array or torch.Tensor): The embedding with size hxwxc to
distinguish instances. distinguish instances.
kernel_label (np.array or Tensor): The instance kernel index with kernel_label (np.array or torch.Tensor): The instance kernel index with
size hxw.
kernel_contour (np.array or torch.Tensor): The kernel contour with
size hxw. size hxw.
kernel_contour (np.array or Tensor): The kernel contour with size hxw.
kernel_region_num (int): The instance kernel region number. kernel_region_num (int): The instance kernel region number.
distance_threshold (float): The embedding distance threshold between distance_threshold (float): The embedding distance threshold between
kernel and pixel in one instance. kernel and pixel in one instance.
Returns: Returns:
pixel_assignment (List[List[float]]): The instance coordinate list. list[list[float]]: The instance coordinates and attributes list. Each
Each element consists of averaged confidence, pixel number, and element consists of averaged confidence, pixel number, and coordinates
coordinates (x_i, y_i for all pixels) in order. (x_i, y_i for all pixels) in order.
""" """
assert isinstance(score, (torch.Tensor, np.ndarray)) assert isinstance(score, (torch.Tensor, np.ndarray))
assert isinstance(mask, (torch.Tensor, np.ndarray)) assert isinstance(mask, (torch.Tensor, np.ndarray))
......
...@@ -22,6 +22,7 @@ def bilinear_grid_sample(im, grid, align_corners=False): ...@@ -22,6 +22,7 @@ def bilinear_grid_sample(im, grid, align_corners=False):
corner pixels. If set to False, they are instead considered as corner pixels. If set to False, they are instead considered as
referring to the corner points of the input’s corner pixels, referring to the corner points of the input’s corner pixels,
making the sampling more resolution agnostic. making the sampling more resolution agnostic.
Returns: Returns:
torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
""" """
...@@ -93,10 +94,12 @@ def is_in_onnx_export_without_custom_ops(): ...@@ -93,10 +94,12 @@ def is_in_onnx_export_without_custom_ops():
def normalize(grid): def normalize(grid):
"""Normalize input grid from [-1, 1] to [0, 1] """Normalize input grid from [-1, 1] to [0, 1]
Args: Args:
grid (Tensor): The grid to be normalize, range [-1, 1]. grid (torch.Tensor): The grid to be normalize, range [-1, 1].
Returns: Returns:
Tensor: Normalized grid, range [0, 1]. torch.Tensor: Normalized grid, range [0, 1].
""" """
return (grid + 1.0) / 2.0 return (grid + 1.0) / 2.0
...@@ -104,10 +107,12 @@ def normalize(grid): ...@@ -104,10 +107,12 @@ def normalize(grid):
def denormalize(grid): def denormalize(grid):
"""Denormalize input grid from range [0, 1] to [-1, 1] """Denormalize input grid from range [0, 1] to [-1, 1]
Args: Args:
grid (Tensor): The grid to be denormalize, range [0, 1]. grid (torch.Tensor): The grid to be denormalize, range [0, 1].
Returns: Returns:
Tensor: Denormalized grid, range [-1, 1]. torch.Tensor: Denormalized grid, range [-1, 1].
""" """
return grid * 2.0 - 1.0 return grid * 2.0 - 1.0
...@@ -119,12 +124,12 @@ def generate_grid(num_grid, size, device): ...@@ -119,12 +124,12 @@ def generate_grid(num_grid, size, device):
Args: Args:
num_grid (int): The number of grids to sample, one for each region. num_grid (int): The number of grids to sample, one for each region.
size (tuple(int, int)): The side size of the regular grid. size (tuple[int, int]): The side size of the regular grid.
device (torch.device): Desired device of returned tensor. device (torch.device): Desired device of returned tensor.
Returns: Returns:
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that torch.Tensor: A tensor of shape (num_grid, size[0]*size[1], 2) that
contains coordinates for the regular grids. contains coordinates for the regular grids.
""" """
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
...@@ -139,11 +144,11 @@ def rel_roi_point_to_abs_img_point(rois, rel_roi_points): ...@@ -139,11 +144,11 @@ def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
point coordinates. point coordinates.
Args: Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
RoI, location, range (0, 1), shape (N, P, 2) to RoI, location, range (0, 1), shape (N, P, 2)
Returns: Returns:
Tensor: Image based absolute point coordinates, shape (N, P, 2) torch.Tensor: Image based absolute point coordinates, shape (N, P, 2)
""" """
with torch.no_grad(): with torch.no_grad():
...@@ -171,6 +176,7 @@ def get_shape_from_feature_map(x): ...@@ -171,6 +176,7 @@ def get_shape_from_feature_map(x):
Args: Args:
x (torch.Tensor): Input tensor, shape (N, C, H, W) x (torch.Tensor): Input tensor, shape (N, C, H, W)
Returns: Returns:
torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
""" """
...@@ -188,14 +194,15 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): ...@@ -188,14 +194,15 @@ def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
coordinates for sampling. coordinates for sampling.
Args: Args:
abs_img_points (Tensor): Image based absolute point coordinates, abs_img_points (torch.Tensor): Image based absolute point coordinates,
shape (N, P, 2) shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map. img (tuple or torch.Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1. spatial_scale (float, optional): Scale points by this factor.
Default: 1.
Returns: Returns:
Tensor: Image based relative point coordinates for sampling, Tensor: Image based relative point coordinates for sampling, shape
shape (N, P, 2) (N, P, 2).
""" """
assert (isinstance(img, tuple) and len(img) == 2) or \ assert (isinstance(img, tuple) and len(img) == 2) or \
...@@ -221,15 +228,16 @@ def rel_roi_point_to_rel_img_point(rois, ...@@ -221,15 +228,16 @@ def rel_roi_point_to_rel_img_point(rois,
point coordinates. point coordinates.
Args: Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
RoI, location, range (0, 1), shape (N, P, 2) to RoI, location, range (0, 1), shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map. img (tuple or torch.Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1. spatial_scale (float, optional): Scale points by this factor.
Default: 1.
Returns: Returns:
Tensor: Image based relative point coordinates for sampling, torch.Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2) shape (N, P, 2).
""" """
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
...@@ -245,14 +253,16 @@ def point_sample(input, points, align_corners=False, **kwargs): ...@@ -245,14 +253,16 @@ def point_sample(input, points, align_corners=False, **kwargs):
lie inside ``[0, 1] x [0, 1]`` square. lie inside ``[0, 1] x [0, 1]`` square.
Args: Args:
input (Tensor): Feature map, shape (N, C, H, W). input (torch.Tensor): Feature map, shape (N, C, H, W).
points (Tensor): Image based absolute point coordinates (normalized), points (torch.Tensor): Image based absolute point coordinates
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). (normalized), range [0, 1] x [0, 1], shape (N, P, 2) or
align_corners (bool): Whether align_corners. Default: False (N, Hgrid, Wgrid, 2).
align_corners (bool, optional): Whether align_corners.
Default: False
Returns: Returns:
Tensor: Features of `point` on `input`, shape (N, C, P) or torch.Tensor: Features of `point` on `input`, shape (N, C, P) or
(N, C, Hgrid, Wgrid). (N, C, Hgrid, Wgrid).
""" """
add_dim = False add_dim = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment