Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InstructBLIP_pytorch
Commits
c04f261a
Commit
c04f261a
authored
Aug 22, 2024
by
dongchy920
Browse files
InstruceBLIP
parents
Pipeline
#1594
canceled with stages
Changes
421
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3714 additions
and
0 deletions
+3714
-0
lavis/common/annotator/midas/__init__.py
lavis/common/annotator/midas/__init__.py
+38
-0
lavis/common/annotator/midas/api.py
lavis/common/annotator/midas/api.py
+169
-0
lavis/common/annotator/midas/midas/__init__.py
lavis/common/annotator/midas/midas/__init__.py
+0
-0
lavis/common/annotator/midas/midas/base_model.py
lavis/common/annotator/midas/midas/base_model.py
+16
-0
lavis/common/annotator/midas/midas/blocks.py
lavis/common/annotator/midas/midas/blocks.py
+342
-0
lavis/common/annotator/midas/midas/dpt_depth.py
lavis/common/annotator/midas/midas/dpt_depth.py
+109
-0
lavis/common/annotator/midas/midas/midas_net.py
lavis/common/annotator/midas/midas/midas_net.py
+76
-0
lavis/common/annotator/midas/midas/midas_net_custom.py
lavis/common/annotator/midas/midas/midas_net_custom.py
+129
-0
lavis/common/annotator/midas/midas/transforms.py
lavis/common/annotator/midas/midas/transforms.py
+234
-0
lavis/common/annotator/midas/midas/vit.py
lavis/common/annotator/midas/midas/vit.py
+491
-0
lavis/common/annotator/midas/utils.py
lavis/common/annotator/midas/utils.py
+189
-0
lavis/common/annotator/mlsd/__init__.py
lavis/common/annotator/mlsd/__init__.py
+39
-0
lavis/common/annotator/mlsd/models/mbv2_mlsd_large.py
lavis/common/annotator/mlsd/models/mbv2_mlsd_large.py
+293
-0
lavis/common/annotator/mlsd/models/mbv2_mlsd_tiny.py
lavis/common/annotator/mlsd/models/mbv2_mlsd_tiny.py
+276
-0
lavis/common/annotator/mlsd/utils.py
lavis/common/annotator/mlsd/utils.py
+580
-0
lavis/common/annotator/openpose/__init__.py
lavis/common/annotator/openpose/__init__.py
+44
-0
lavis/common/annotator/openpose/body.py
lavis/common/annotator/openpose/body.py
+219
-0
lavis/common/annotator/openpose/hand.py
lavis/common/annotator/openpose/hand.py
+87
-0
lavis/common/annotator/openpose/model.py
lavis/common/annotator/openpose/model.py
+219
-0
lavis/common/annotator/openpose/util.py
lavis/common/annotator/openpose/util.py
+164
-0
No files found.
Too many changes to show.
To preserve performance only
421 of 421+
files are displayed.
Plain diff
Email patch
lavis/common/annotator/midas/__init__.py
0 → 100644
View file @
c04f261a
import
cv2
import
numpy
as
np
import
torch
from
einops
import
rearrange
from
.api
import
MiDaSInference
class
MidasDetector
:
def
__init__
(
self
):
self
.
model
=
MiDaSInference
(
model_type
=
"dpt_hybrid"
).
cuda
()
def
__call__
(
self
,
input_image
,
a
=
np
.
pi
*
2.0
,
bg_th
=
0.1
):
assert
input_image
.
ndim
==
3
image_depth
=
input_image
with
torch
.
no_grad
():
image_depth
=
torch
.
from_numpy
(
image_depth
).
float
().
cuda
()
image_depth
=
image_depth
/
127.5
-
1.0
image_depth
=
rearrange
(
image_depth
,
'h w c -> 1 c h w'
)
depth
=
self
.
model
(
image_depth
)[
0
]
depth_pt
=
depth
.
clone
()
depth_pt
-=
torch
.
min
(
depth_pt
)
depth_pt
/=
torch
.
max
(
depth_pt
)
depth_pt
=
depth_pt
.
cpu
().
numpy
()
depth_image
=
(
depth_pt
*
255.0
).
clip
(
0
,
255
).
astype
(
np
.
uint8
)
depth_np
=
depth
.
cpu
().
numpy
()
x
=
cv2
.
Sobel
(
depth_np
,
cv2
.
CV_32F
,
1
,
0
,
ksize
=
3
)
y
=
cv2
.
Sobel
(
depth_np
,
cv2
.
CV_32F
,
0
,
1
,
ksize
=
3
)
z
=
np
.
ones_like
(
x
)
*
a
x
[
depth_pt
<
bg_th
]
=
0
y
[
depth_pt
<
bg_th
]
=
0
normal
=
np
.
stack
([
x
,
y
,
z
],
axis
=
2
)
normal
/=
np
.
sum
(
normal
**
2.0
,
axis
=
2
,
keepdims
=
True
)
**
0.5
normal_image
=
(
normal
*
127.5
+
127.5
).
clip
(
0
,
255
).
astype
(
np
.
uint8
)
return
depth_image
,
normal_image
lavis/common/annotator/midas/api.py
0 → 100644
View file @
c04f261a
# based on https://github.com/isl-org/MiDaS
import
cv2
import
os
import
torch
import
torch.nn
as
nn
from
torchvision.transforms
import
Compose
from
.midas.dpt_depth
import
DPTDepthModel
from
.midas.midas_net
import
MidasNet
from
.midas.midas_net_custom
import
MidasNet_small
from
.midas.transforms
import
Resize
,
NormalizeImage
,
PrepareForNet
from
annotator.util
import
annotator_ckpts_path
ISL_PATHS
=
{
"dpt_large"
:
os
.
path
.
join
(
annotator_ckpts_path
,
"dpt_large-midas-2f21e586.pt"
),
"dpt_hybrid"
:
os
.
path
.
join
(
annotator_ckpts_path
,
"dpt_hybrid-midas-501f0c75.pt"
),
"midas_v21"
:
""
,
"midas_v21_small"
:
""
,
}
remote_model_path
=
"https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
def
disabled_train
(
self
,
mode
=
True
):
"""Overwrite model.train with this function to make sure train/eval mode
does not change anymore."""
return
self
def
load_midas_transform
(
model_type
):
# https://github.com/isl-org/MiDaS/blob/master/run.py
# load transform only
if
model_type
==
"dpt_large"
:
# DPT-Large
net_w
,
net_h
=
384
,
384
resize_mode
=
"minimal"
normalization
=
NormalizeImage
(
mean
=
[
0.5
,
0.5
,
0.5
],
std
=
[
0.5
,
0.5
,
0.5
])
elif
model_type
==
"dpt_hybrid"
:
# DPT-Hybrid
net_w
,
net_h
=
384
,
384
resize_mode
=
"minimal"
normalization
=
NormalizeImage
(
mean
=
[
0.5
,
0.5
,
0.5
],
std
=
[
0.5
,
0.5
,
0.5
])
elif
model_type
==
"midas_v21"
:
net_w
,
net_h
=
384
,
384
resize_mode
=
"upper_bound"
normalization
=
NormalizeImage
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])
elif
model_type
==
"midas_v21_small"
:
net_w
,
net_h
=
256
,
256
resize_mode
=
"upper_bound"
normalization
=
NormalizeImage
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])
else
:
assert
False
,
f
"model_type '
{
model_type
}
' not implemented, use: --model_type large"
transform
=
Compose
(
[
Resize
(
net_w
,
net_h
,
resize_target
=
None
,
keep_aspect_ratio
=
True
,
ensure_multiple_of
=
32
,
resize_method
=
resize_mode
,
image_interpolation_method
=
cv2
.
INTER_CUBIC
,
),
normalization
,
PrepareForNet
(),
]
)
return
transform
def
load_model
(
model_type
):
# https://github.com/isl-org/MiDaS/blob/master/run.py
# load network
model_path
=
ISL_PATHS
[
model_type
]
if
model_type
==
"dpt_large"
:
# DPT-Large
model
=
DPTDepthModel
(
path
=
model_path
,
backbone
=
"vitl16_384"
,
non_negative
=
True
,
)
net_w
,
net_h
=
384
,
384
resize_mode
=
"minimal"
normalization
=
NormalizeImage
(
mean
=
[
0.5
,
0.5
,
0.5
],
std
=
[
0.5
,
0.5
,
0.5
])
elif
model_type
==
"dpt_hybrid"
:
# DPT-Hybrid
if
not
os
.
path
.
exists
(
model_path
):
from
basicsr.utils.download_util
import
load_file_from_url
load_file_from_url
(
remote_model_path
,
model_dir
=
annotator_ckpts_path
)
model
=
DPTDepthModel
(
path
=
model_path
,
backbone
=
"vitb_rn50_384"
,
non_negative
=
True
,
)
net_w
,
net_h
=
384
,
384
resize_mode
=
"minimal"
normalization
=
NormalizeImage
(
mean
=
[
0.5
,
0.5
,
0.5
],
std
=
[
0.5
,
0.5
,
0.5
])
elif
model_type
==
"midas_v21"
:
model
=
MidasNet
(
model_path
,
non_negative
=
True
)
net_w
,
net_h
=
384
,
384
resize_mode
=
"upper_bound"
normalization
=
NormalizeImage
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]
)
elif
model_type
==
"midas_v21_small"
:
model
=
MidasNet_small
(
model_path
,
features
=
64
,
backbone
=
"efficientnet_lite3"
,
exportable
=
True
,
non_negative
=
True
,
blocks
=
{
'expand'
:
True
})
net_w
,
net_h
=
256
,
256
resize_mode
=
"upper_bound"
normalization
=
NormalizeImage
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]
)
else
:
print
(
f
"model_type '
{
model_type
}
' not implemented, use: --model_type large"
)
assert
False
transform
=
Compose
(
[
Resize
(
net_w
,
net_h
,
resize_target
=
None
,
keep_aspect_ratio
=
True
,
ensure_multiple_of
=
32
,
resize_method
=
resize_mode
,
image_interpolation_method
=
cv2
.
INTER_CUBIC
,
),
normalization
,
PrepareForNet
(),
]
)
return
model
.
eval
(),
transform
class
MiDaSInference
(
nn
.
Module
):
MODEL_TYPES_TORCH_HUB
=
[
"DPT_Large"
,
"DPT_Hybrid"
,
"MiDaS_small"
]
MODEL_TYPES_ISL
=
[
"dpt_large"
,
"dpt_hybrid"
,
"midas_v21"
,
"midas_v21_small"
,
]
def
__init__
(
self
,
model_type
):
super
().
__init__
()
assert
(
model_type
in
self
.
MODEL_TYPES_ISL
)
model
,
_
=
load_model
(
model_type
)
self
.
model
=
model
self
.
model
.
train
=
disabled_train
def
forward
(
self
,
x
):
with
torch
.
no_grad
():
prediction
=
self
.
model
(
x
)
return
prediction
lavis/common/annotator/midas/midas/__init__.py
0 → 100644
View file @
c04f261a
lavis/common/annotator/midas/midas/base_model.py
0 → 100644
View file @
c04f261a
import
torch
class
BaseModel
(
torch
.
nn
.
Module
):
def
load
(
self
,
path
):
"""Load model from file.
Args:
path (str): file path
"""
parameters
=
torch
.
load
(
path
,
map_location
=
torch
.
device
(
'cpu'
))
if
"optimizer"
in
parameters
:
parameters
=
parameters
[
"model"
]
self
.
load_state_dict
(
parameters
)
lavis/common/annotator/midas/midas/blocks.py
0 → 100644
View file @
c04f261a
import
torch
import
torch.nn
as
nn
from
.vit
import
(
_make_pretrained_vitb_rn50_384
,
_make_pretrained_vitl16_384
,
_make_pretrained_vitb16_384
,
forward_vit
,
)
def
_make_encoder
(
backbone
,
features
,
use_pretrained
,
groups
=
1
,
expand
=
False
,
exportable
=
True
,
hooks
=
None
,
use_vit_only
=
False
,
use_readout
=
"ignore"
,):
if
backbone
==
"vitl16_384"
:
pretrained
=
_make_pretrained_vitl16_384
(
use_pretrained
,
hooks
=
hooks
,
use_readout
=
use_readout
)
scratch
=
_make_scratch
(
[
256
,
512
,
1024
,
1024
],
features
,
groups
=
groups
,
expand
=
expand
)
# ViT-L/16 - 85.0% Top1 (backbone)
elif
backbone
==
"vitb_rn50_384"
:
pretrained
=
_make_pretrained_vitb_rn50_384
(
use_pretrained
,
hooks
=
hooks
,
use_vit_only
=
use_vit_only
,
use_readout
=
use_readout
,
)
scratch
=
_make_scratch
(
[
256
,
512
,
768
,
768
],
features
,
groups
=
groups
,
expand
=
expand
)
# ViT-H/16 - 85.0% Top1 (backbone)
elif
backbone
==
"vitb16_384"
:
pretrained
=
_make_pretrained_vitb16_384
(
use_pretrained
,
hooks
=
hooks
,
use_readout
=
use_readout
)
scratch
=
_make_scratch
(
[
96
,
192
,
384
,
768
],
features
,
groups
=
groups
,
expand
=
expand
)
# ViT-B/16 - 84.6% Top1 (backbone)
elif
backbone
==
"resnext101_wsl"
:
pretrained
=
_make_pretrained_resnext101_wsl
(
use_pretrained
)
scratch
=
_make_scratch
([
256
,
512
,
1024
,
2048
],
features
,
groups
=
groups
,
expand
=
expand
)
# efficientnet_lite3
elif
backbone
==
"efficientnet_lite3"
:
pretrained
=
_make_pretrained_efficientnet_lite3
(
use_pretrained
,
exportable
=
exportable
)
scratch
=
_make_scratch
([
32
,
48
,
136
,
384
],
features
,
groups
=
groups
,
expand
=
expand
)
# efficientnet_lite3
else
:
print
(
f
"Backbone '
{
backbone
}
' not implemented"
)
assert
False
return
pretrained
,
scratch
def
_make_scratch
(
in_shape
,
out_shape
,
groups
=
1
,
expand
=
False
):
scratch
=
nn
.
Module
()
out_shape1
=
out_shape
out_shape2
=
out_shape
out_shape3
=
out_shape
out_shape4
=
out_shape
if
expand
==
True
:
out_shape1
=
out_shape
out_shape2
=
out_shape
*
2
out_shape3
=
out_shape
*
4
out_shape4
=
out_shape
*
8
scratch
.
layer1_rn
=
nn
.
Conv2d
(
in_shape
[
0
],
out_shape1
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
groups
=
groups
)
scratch
.
layer2_rn
=
nn
.
Conv2d
(
in_shape
[
1
],
out_shape2
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
groups
=
groups
)
scratch
.
layer3_rn
=
nn
.
Conv2d
(
in_shape
[
2
],
out_shape3
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
groups
=
groups
)
scratch
.
layer4_rn
=
nn
.
Conv2d
(
in_shape
[
3
],
out_shape4
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
groups
=
groups
)
return
scratch
def
_make_pretrained_efficientnet_lite3
(
use_pretrained
,
exportable
=
False
):
efficientnet
=
torch
.
hub
.
load
(
"rwightman/gen-efficientnet-pytorch"
,
"tf_efficientnet_lite3"
,
pretrained
=
use_pretrained
,
exportable
=
exportable
)
return
_make_efficientnet_backbone
(
efficientnet
)
def
_make_efficientnet_backbone
(
effnet
):
pretrained
=
nn
.
Module
()
pretrained
.
layer1
=
nn
.
Sequential
(
effnet
.
conv_stem
,
effnet
.
bn1
,
effnet
.
act1
,
*
effnet
.
blocks
[
0
:
2
]
)
pretrained
.
layer2
=
nn
.
Sequential
(
*
effnet
.
blocks
[
2
:
3
])
pretrained
.
layer3
=
nn
.
Sequential
(
*
effnet
.
blocks
[
3
:
5
])
pretrained
.
layer4
=
nn
.
Sequential
(
*
effnet
.
blocks
[
5
:
9
])
return
pretrained
def
_make_resnet_backbone
(
resnet
):
pretrained
=
nn
.
Module
()
pretrained
.
layer1
=
nn
.
Sequential
(
resnet
.
conv1
,
resnet
.
bn1
,
resnet
.
relu
,
resnet
.
maxpool
,
resnet
.
layer1
)
pretrained
.
layer2
=
resnet
.
layer2
pretrained
.
layer3
=
resnet
.
layer3
pretrained
.
layer4
=
resnet
.
layer4
return
pretrained
def
_make_pretrained_resnext101_wsl
(
use_pretrained
):
resnet
=
torch
.
hub
.
load
(
"facebookresearch/WSL-Images"
,
"resnext101_32x8d_wsl"
)
return
_make_resnet_backbone
(
resnet
)
class
Interpolate
(
nn
.
Module
):
"""Interpolation module.
"""
def
__init__
(
self
,
scale_factor
,
mode
,
align_corners
=
False
):
"""Init.
Args:
scale_factor (float): scaling
mode (str): interpolation mode
"""
super
(
Interpolate
,
self
).
__init__
()
self
.
interp
=
nn
.
functional
.
interpolate
self
.
scale_factor
=
scale_factor
self
.
mode
=
mode
self
.
align_corners
=
align_corners
def
forward
(
self
,
x
):
"""Forward pass.
Args:
x (tensor): input
Returns:
tensor: interpolated data
"""
x
=
self
.
interp
(
x
,
scale_factor
=
self
.
scale_factor
,
mode
=
self
.
mode
,
align_corners
=
self
.
align_corners
)
return
x
class
ResidualConvUnit
(
nn
.
Module
):
"""Residual convolution module.
"""
def
__init__
(
self
,
features
):
"""Init.
Args:
features (int): number of features
"""
super
().
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
features
,
features
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
self
.
conv2
=
nn
.
Conv2d
(
features
,
features
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
x
):
"""Forward pass.
Args:
x (tensor): input
Returns:
tensor: output
"""
out
=
self
.
relu
(
x
)
out
=
self
.
conv1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
return
out
+
x
class
FeatureFusionBlock
(
nn
.
Module
):
"""Feature fusion block.
"""
def
__init__
(
self
,
features
):
"""Init.
Args:
features (int): number of features
"""
super
(
FeatureFusionBlock
,
self
).
__init__
()
self
.
resConfUnit1
=
ResidualConvUnit
(
features
)
self
.
resConfUnit2
=
ResidualConvUnit
(
features
)
def
forward
(
self
,
*
xs
):
"""Forward pass.
Returns:
tensor: output
"""
output
=
xs
[
0
]
if
len
(
xs
)
==
2
:
output
+=
self
.
resConfUnit1
(
xs
[
1
])
output
=
self
.
resConfUnit2
(
output
)
output
=
nn
.
functional
.
interpolate
(
output
,
scale_factor
=
2
,
mode
=
"bilinear"
,
align_corners
=
True
)
return
output
class
ResidualConvUnit_custom
(
nn
.
Module
):
"""Residual convolution module.
"""
def
__init__
(
self
,
features
,
activation
,
bn
):
"""Init.
Args:
features (int): number of features
"""
super
().
__init__
()
self
.
bn
=
bn
self
.
groups
=
1
self
.
conv1
=
nn
.
Conv2d
(
features
,
features
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
,
groups
=
self
.
groups
)
self
.
conv2
=
nn
.
Conv2d
(
features
,
features
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
,
groups
=
self
.
groups
)
if
self
.
bn
==
True
:
self
.
bn1
=
nn
.
BatchNorm2d
(
features
)
self
.
bn2
=
nn
.
BatchNorm2d
(
features
)
self
.
activation
=
activation
self
.
skip_add
=
nn
.
quantized
.
FloatFunctional
()
def
forward
(
self
,
x
):
"""Forward pass.
Args:
x (tensor): input
Returns:
tensor: output
"""
out
=
self
.
activation
(
x
)
out
=
self
.
conv1
(
out
)
if
self
.
bn
==
True
:
out
=
self
.
bn1
(
out
)
out
=
self
.
activation
(
out
)
out
=
self
.
conv2
(
out
)
if
self
.
bn
==
True
:
out
=
self
.
bn2
(
out
)
if
self
.
groups
>
1
:
out
=
self
.
conv_merge
(
out
)
return
self
.
skip_add
.
add
(
out
,
x
)
# return out + x
class
FeatureFusionBlock_custom
(
nn
.
Module
):
"""Feature fusion block.
"""
def
__init__
(
self
,
features
,
activation
,
deconv
=
False
,
bn
=
False
,
expand
=
False
,
align_corners
=
True
):
"""Init.
Args:
features (int): number of features
"""
super
(
FeatureFusionBlock_custom
,
self
).
__init__
()
self
.
deconv
=
deconv
self
.
align_corners
=
align_corners
self
.
groups
=
1
self
.
expand
=
expand
out_features
=
features
if
self
.
expand
==
True
:
out_features
=
features
//
2
self
.
out_conv
=
nn
.
Conv2d
(
features
,
out_features
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
,
groups
=
1
)
self
.
resConfUnit1
=
ResidualConvUnit_custom
(
features
,
activation
,
bn
)
self
.
resConfUnit2
=
ResidualConvUnit_custom
(
features
,
activation
,
bn
)
self
.
skip_add
=
nn
.
quantized
.
FloatFunctional
()
def
forward
(
self
,
*
xs
):
"""Forward pass.
Returns:
tensor: output
"""
output
=
xs
[
0
]
if
len
(
xs
)
==
2
:
res
=
self
.
resConfUnit1
(
xs
[
1
])
output
=
self
.
skip_add
.
add
(
output
,
res
)
# output += res
output
=
self
.
resConfUnit2
(
output
)
output
=
nn
.
functional
.
interpolate
(
output
,
scale_factor
=
2
,
mode
=
"bilinear"
,
align_corners
=
self
.
align_corners
)
output
=
self
.
out_conv
(
output
)
return
output
lavis/common/annotator/midas/midas/dpt_depth.py
0 → 100644
View file @
c04f261a
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
.base_model
import
BaseModel
from
.blocks
import
(
FeatureFusionBlock
,
FeatureFusionBlock_custom
,
Interpolate
,
_make_encoder
,
forward_vit
,
)
def
_make_fusion_block
(
features
,
use_bn
):
return
FeatureFusionBlock_custom
(
features
,
nn
.
ReLU
(
False
),
deconv
=
False
,
bn
=
use_bn
,
expand
=
False
,
align_corners
=
True
,
)
class
DPT
(
BaseModel
):
def
__init__
(
self
,
head
,
features
=
256
,
backbone
=
"vitb_rn50_384"
,
readout
=
"project"
,
channels_last
=
False
,
use_bn
=
False
,
):
super
(
DPT
,
self
).
__init__
()
self
.
channels_last
=
channels_last
hooks
=
{
"vitb_rn50_384"
:
[
0
,
1
,
8
,
11
],
"vitb16_384"
:
[
2
,
5
,
8
,
11
],
"vitl16_384"
:
[
5
,
11
,
17
,
23
],
}
# Instantiate backbone and reassemble blocks
self
.
pretrained
,
self
.
scratch
=
_make_encoder
(
backbone
,
features
,
False
,
# Set to true of you want to train from scratch, uses ImageNet weights
groups
=
1
,
expand
=
False
,
exportable
=
False
,
hooks
=
hooks
[
backbone
],
use_readout
=
readout
,
)
self
.
scratch
.
refinenet1
=
_make_fusion_block
(
features
,
use_bn
)
self
.
scratch
.
refinenet2
=
_make_fusion_block
(
features
,
use_bn
)
self
.
scratch
.
refinenet3
=
_make_fusion_block
(
features
,
use_bn
)
self
.
scratch
.
refinenet4
=
_make_fusion_block
(
features
,
use_bn
)
self
.
scratch
.
output_conv
=
head
def
forward
(
self
,
x
):
if
self
.
channels_last
==
True
:
x
.
contiguous
(
memory_format
=
torch
.
channels_last
)
layer_1
,
layer_2
,
layer_3
,
layer_4
=
forward_vit
(
self
.
pretrained
,
x
)
layer_1_rn
=
self
.
scratch
.
layer1_rn
(
layer_1
)
layer_2_rn
=
self
.
scratch
.
layer2_rn
(
layer_2
)
layer_3_rn
=
self
.
scratch
.
layer3_rn
(
layer_3
)
layer_4_rn
=
self
.
scratch
.
layer4_rn
(
layer_4
)
path_4
=
self
.
scratch
.
refinenet4
(
layer_4_rn
)
path_3
=
self
.
scratch
.
refinenet3
(
path_4
,
layer_3_rn
)
path_2
=
self
.
scratch
.
refinenet2
(
path_3
,
layer_2_rn
)
path_1
=
self
.
scratch
.
refinenet1
(
path_2
,
layer_1_rn
)
out
=
self
.
scratch
.
output_conv
(
path_1
)
return
out
class
DPTDepthModel
(
DPT
):
def
__init__
(
self
,
path
=
None
,
non_negative
=
True
,
**
kwargs
):
features
=
kwargs
[
"features"
]
if
"features"
in
kwargs
else
256
head
=
nn
.
Sequential
(
nn
.
Conv2d
(
features
,
features
//
2
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
),
Interpolate
(
scale_factor
=
2
,
mode
=
"bilinear"
,
align_corners
=
True
),
nn
.
Conv2d
(
features
//
2
,
32
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
),
nn
.
ReLU
(
True
),
nn
.
Conv2d
(
32
,
1
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
),
nn
.
ReLU
(
True
)
if
non_negative
else
nn
.
Identity
(),
nn
.
Identity
(),
)
super
().
__init__
(
head
,
**
kwargs
)
if
path
is
not
None
:
self
.
load
(
path
)
def
forward
(
self
,
x
):
return
super
().
forward
(
x
).
squeeze
(
dim
=
1
)
lavis/common/annotator/midas/midas/midas_net.py
0 → 100644
View file @
c04f261a
"""MidashNet: Network for monocular depth estimation trained by mixing several datasets.
This file contains code that is adapted from
https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
"""
import
torch
import
torch.nn
as
nn
from
.base_model
import
BaseModel
from
.blocks
import
FeatureFusionBlock
,
Interpolate
,
_make_encoder
class
MidasNet
(
BaseModel
):
"""Network for monocular depth estimation.
"""
def
__init__
(
self
,
path
=
None
,
features
=
256
,
non_negative
=
True
):
"""Init.
Args:
path (str, optional): Path to saved model. Defaults to None.
features (int, optional): Number of features. Defaults to 256.
backbone (str, optional): Backbone network for encoder. Defaults to resnet50
"""
print
(
"Loading weights: "
,
path
)
super
(
MidasNet
,
self
).
__init__
()
use_pretrained
=
False
if
path
is
None
else
True
self
.
pretrained
,
self
.
scratch
=
_make_encoder
(
backbone
=
"resnext101_wsl"
,
features
=
features
,
use_pretrained
=
use_pretrained
)
self
.
scratch
.
refinenet4
=
FeatureFusionBlock
(
features
)
self
.
scratch
.
refinenet3
=
FeatureFusionBlock
(
features
)
self
.
scratch
.
refinenet2
=
FeatureFusionBlock
(
features
)
self
.
scratch
.
refinenet1
=
FeatureFusionBlock
(
features
)
self
.
scratch
.
output_conv
=
nn
.
Sequential
(
nn
.
Conv2d
(
features
,
128
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
),
Interpolate
(
scale_factor
=
2
,
mode
=
"bilinear"
),
nn
.
Conv2d
(
128
,
32
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
),
nn
.
ReLU
(
True
),
nn
.
Conv2d
(
32
,
1
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
),
nn
.
ReLU
(
True
)
if
non_negative
else
nn
.
Identity
(),
)
if
path
:
self
.
load
(
path
)
def
forward
(
self
,
x
):
"""Forward pass.
Args:
x (tensor): input data (image)
Returns:
tensor: depth
"""
layer_1
=
self
.
pretrained
.
layer1
(
x
)
layer_2
=
self
.
pretrained
.
layer2
(
layer_1
)
layer_3
=
self
.
pretrained
.
layer3
(
layer_2
)
layer_4
=
self
.
pretrained
.
layer4
(
layer_3
)
layer_1_rn
=
self
.
scratch
.
layer1_rn
(
layer_1
)
layer_2_rn
=
self
.
scratch
.
layer2_rn
(
layer_2
)
layer_3_rn
=
self
.
scratch
.
layer3_rn
(
layer_3
)
layer_4_rn
=
self
.
scratch
.
layer4_rn
(
layer_4
)
path_4
=
self
.
scratch
.
refinenet4
(
layer_4_rn
)
path_3
=
self
.
scratch
.
refinenet3
(
path_4
,
layer_3_rn
)
path_2
=
self
.
scratch
.
refinenet2
(
path_3
,
layer_2_rn
)
path_1
=
self
.
scratch
.
refinenet1
(
path_2
,
layer_1_rn
)
out
=
self
.
scratch
.
output_conv
(
path_1
)
return
torch
.
squeeze
(
out
,
dim
=
1
)
lavis/common/annotator/midas/midas/midas_net_custom.py
0 → 100644
View file @
c04f261a
"""MidashNet: Network for monocular depth estimation trained by mixing several datasets.
This file contains code that is adapted from
https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
"""
import
torch
import
torch.nn
as
nn
from
.base_model
import
BaseModel
from
.blocks
import
FeatureFusionBlock
,
FeatureFusionBlock_custom
,
Interpolate
,
_make_encoder
class
MidasNet_small
(
BaseModel
):
"""Network for monocular depth estimation.
"""
def
__init__
(
self
,
path
=
None
,
features
=
64
,
backbone
=
"efficientnet_lite3"
,
non_negative
=
True
,
exportable
=
True
,
channels_last
=
False
,
align_corners
=
True
,
blocks
=
{
'expand'
:
True
}):
"""Init.
Args:
path (str, optional): Path to saved model. Defaults to None.
features (int, optional): Number of features. Defaults to 256.
backbone (str, optional): Backbone network for encoder. Defaults to resnet50
"""
print
(
"Loading weights: "
,
path
)
super
(
MidasNet_small
,
self
).
__init__
()
use_pretrained
=
False
if
path
else
True
self
.
channels_last
=
channels_last
self
.
blocks
=
blocks
self
.
backbone
=
backbone
self
.
groups
=
1
features1
=
features
features2
=
features
features3
=
features
features4
=
features
self
.
expand
=
False
if
"expand"
in
self
.
blocks
and
self
.
blocks
[
'expand'
]
==
True
:
self
.
expand
=
True
features1
=
features
features2
=
features
*
2
features3
=
features
*
4
features4
=
features
*
8
self
.
pretrained
,
self
.
scratch
=
_make_encoder
(
self
.
backbone
,
features
,
use_pretrained
,
groups
=
self
.
groups
,
expand
=
self
.
expand
,
exportable
=
exportable
)
self
.
scratch
.
activation
=
nn
.
ReLU
(
False
)
self
.
scratch
.
refinenet4
=
FeatureFusionBlock_custom
(
features4
,
self
.
scratch
.
activation
,
deconv
=
False
,
bn
=
False
,
expand
=
self
.
expand
,
align_corners
=
align_corners
)
self
.
scratch
.
refinenet3
=
FeatureFusionBlock_custom
(
features3
,
self
.
scratch
.
activation
,
deconv
=
False
,
bn
=
False
,
expand
=
self
.
expand
,
align_corners
=
align_corners
)
self
.
scratch
.
refinenet2
=
FeatureFusionBlock_custom
(
features2
,
self
.
scratch
.
activation
,
deconv
=
False
,
bn
=
False
,
expand
=
self
.
expand
,
align_corners
=
align_corners
)
self
.
scratch
.
refinenet1
=
FeatureFusionBlock_custom
(
features1
,
self
.
scratch
.
activation
,
deconv
=
False
,
bn
=
False
,
align_corners
=
align_corners
)
self
.
scratch
.
output_conv
=
nn
.
Sequential
(
nn
.
Conv2d
(
features
,
features
//
2
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
groups
=
self
.
groups
),
Interpolate
(
scale_factor
=
2
,
mode
=
"bilinear"
),
nn
.
Conv2d
(
features
//
2
,
32
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
),
self
.
scratch
.
activation
,
nn
.
Conv2d
(
32
,
1
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
),
nn
.
ReLU
(
True
)
if
non_negative
else
nn
.
Identity
(),
nn
.
Identity
(),
)
if
path
:
self
.
load
(
path
)
def
forward
(
self
,
x
):
"""Forward pass.
Args:
x (tensor): input data (image)
Returns:
tensor: depth
"""
if
self
.
channels_last
==
True
:
print
(
"self.channels_last = "
,
self
.
channels_last
)
x
.
contiguous
(
memory_format
=
torch
.
channels_last
)
layer_1
=
self
.
pretrained
.
layer1
(
x
)
layer_2
=
self
.
pretrained
.
layer2
(
layer_1
)
layer_3
=
self
.
pretrained
.
layer3
(
layer_2
)
layer_4
=
self
.
pretrained
.
layer4
(
layer_3
)
layer_1_rn
=
self
.
scratch
.
layer1_rn
(
layer_1
)
layer_2_rn
=
self
.
scratch
.
layer2_rn
(
layer_2
)
layer_3_rn
=
self
.
scratch
.
layer3_rn
(
layer_3
)
layer_4_rn
=
self
.
scratch
.
layer4_rn
(
layer_4
)
path_4
=
self
.
scratch
.
refinenet4
(
layer_4_rn
)
path_3
=
self
.
scratch
.
refinenet3
(
path_4
,
layer_3_rn
)
path_2
=
self
.
scratch
.
refinenet2
(
path_3
,
layer_2_rn
)
path_1
=
self
.
scratch
.
refinenet1
(
path_2
,
layer_1_rn
)
out
=
self
.
scratch
.
output_conv
(
path_1
)
return
torch
.
squeeze
(
out
,
dim
=
1
)
def
fuse_model
(
m
):
prev_previous_type
=
nn
.
Identity
()
prev_previous_name
=
''
previous_type
=
nn
.
Identity
()
previous_name
=
''
for
name
,
module
in
m
.
named_modules
():
if
prev_previous_type
==
nn
.
Conv2d
and
previous_type
==
nn
.
BatchNorm2d
and
type
(
module
)
==
nn
.
ReLU
:
# print("FUSED ", prev_previous_name, previous_name, name)
torch
.
quantization
.
fuse_modules
(
m
,
[
prev_previous_name
,
previous_name
,
name
],
inplace
=
True
)
elif
prev_previous_type
==
nn
.
Conv2d
and
previous_type
==
nn
.
BatchNorm2d
:
# print("FUSED ", prev_previous_name, previous_name)
torch
.
quantization
.
fuse_modules
(
m
,
[
prev_previous_name
,
previous_name
],
inplace
=
True
)
# elif previous_type == nn.Conv2d and type(module) == nn.ReLU:
# print("FUSED ", previous_name, name)
# torch.quantization.fuse_modules(m, [previous_name, name], inplace=True)
prev_previous_type
=
previous_type
prev_previous_name
=
previous_name
previous_type
=
type
(
module
)
previous_name
=
name
\ No newline at end of file
lavis/common/annotator/midas/midas/transforms.py
0 → 100644
View file @
c04f261a
import
numpy
as
np
import
cv2
import
math
def
apply_min_size
(
sample
,
size
,
image_interpolation_method
=
cv2
.
INTER_AREA
):
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
Args:
sample (dict): sample
size (tuple): image size
Returns:
tuple: new size
"""
shape
=
list
(
sample
[
"disparity"
].
shape
)
if
shape
[
0
]
>=
size
[
0
]
and
shape
[
1
]
>=
size
[
1
]:
return
sample
scale
=
[
0
,
0
]
scale
[
0
]
=
size
[
0
]
/
shape
[
0
]
scale
[
1
]
=
size
[
1
]
/
shape
[
1
]
scale
=
max
(
scale
)
shape
[
0
]
=
math
.
ceil
(
scale
*
shape
[
0
])
shape
[
1
]
=
math
.
ceil
(
scale
*
shape
[
1
])
# resize
sample
[
"image"
]
=
cv2
.
resize
(
sample
[
"image"
],
tuple
(
shape
[::
-
1
]),
interpolation
=
image_interpolation_method
)
sample
[
"disparity"
]
=
cv2
.
resize
(
sample
[
"disparity"
],
tuple
(
shape
[::
-
1
]),
interpolation
=
cv2
.
INTER_NEAREST
)
sample
[
"mask"
]
=
cv2
.
resize
(
sample
[
"mask"
].
astype
(
np
.
float32
),
tuple
(
shape
[::
-
1
]),
interpolation
=
cv2
.
INTER_NEAREST
,
)
sample
[
"mask"
]
=
sample
[
"mask"
].
astype
(
bool
)
return
tuple
(
shape
)
class
Resize
(
object
):
"""Resize sample to given size (width, height).
"""
def
__init__
(
self
,
width
,
height
,
resize_target
=
True
,
keep_aspect_ratio
=
False
,
ensure_multiple_of
=
1
,
resize_method
=
"lower_bound"
,
image_interpolation_method
=
cv2
.
INTER_AREA
,
):
"""Init.
Args:
width (int): desired output width
height (int): desired output height
resize_target (bool, optional):
True: Resize the full sample (image, mask, target).
False: Resize image only.
Defaults to True.
keep_aspect_ratio (bool, optional):
True: Keep the aspect ratio of the input sample.
Output sample might not have the given width and height, and
resize behaviour depends on the parameter 'resize_method'.
Defaults to False.
ensure_multiple_of (int, optional):
Output width and height is constrained to be multiple of this parameter.
Defaults to 1.
resize_method (str, optional):
"lower_bound": Output will be at least as large as the given size.
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
Defaults to "lower_bound".
"""
self
.
__width
=
width
self
.
__height
=
height
self
.
__resize_target
=
resize_target
self
.
__keep_aspect_ratio
=
keep_aspect_ratio
self
.
__multiple_of
=
ensure_multiple_of
self
.
__resize_method
=
resize_method
self
.
__image_interpolation_method
=
image_interpolation_method
def
constrain_to_multiple_of
(
self
,
x
,
min_val
=
0
,
max_val
=
None
):
y
=
(
np
.
round
(
x
/
self
.
__multiple_of
)
*
self
.
__multiple_of
).
astype
(
int
)
if
max_val
is
not
None
and
y
>
max_val
:
y
=
(
np
.
floor
(
x
/
self
.
__multiple_of
)
*
self
.
__multiple_of
).
astype
(
int
)
if
y
<
min_val
:
y
=
(
np
.
ceil
(
x
/
self
.
__multiple_of
)
*
self
.
__multiple_of
).
astype
(
int
)
return
y
def
get_size
(
self
,
width
,
height
):
# determine new height and width
scale_height
=
self
.
__height
/
height
scale_width
=
self
.
__width
/
width
if
self
.
__keep_aspect_ratio
:
if
self
.
__resize_method
==
"lower_bound"
:
# scale such that output size is lower bound
if
scale_width
>
scale_height
:
# fit width
scale_height
=
scale_width
else
:
# fit height
scale_width
=
scale_height
elif
self
.
__resize_method
==
"upper_bound"
:
# scale such that output size is upper bound
if
scale_width
<
scale_height
:
# fit width
scale_height
=
scale_width
else
:
# fit height
scale_width
=
scale_height
elif
self
.
__resize_method
==
"minimal"
:
# scale as least as possbile
if
abs
(
1
-
scale_width
)
<
abs
(
1
-
scale_height
):
# fit width
scale_height
=
scale_width
else
:
# fit height
scale_width
=
scale_height
else
:
raise
ValueError
(
f
"resize_method
{
self
.
__resize_method
}
not implemented"
)
if
self
.
__resize_method
==
"lower_bound"
:
new_height
=
self
.
constrain_to_multiple_of
(
scale_height
*
height
,
min_val
=
self
.
__height
)
new_width
=
self
.
constrain_to_multiple_of
(
scale_width
*
width
,
min_val
=
self
.
__width
)
elif
self
.
__resize_method
==
"upper_bound"
:
new_height
=
self
.
constrain_to_multiple_of
(
scale_height
*
height
,
max_val
=
self
.
__height
)
new_width
=
self
.
constrain_to_multiple_of
(
scale_width
*
width
,
max_val
=
self
.
__width
)
elif
self
.
__resize_method
==
"minimal"
:
new_height
=
self
.
constrain_to_multiple_of
(
scale_height
*
height
)
new_width
=
self
.
constrain_to_multiple_of
(
scale_width
*
width
)
else
:
raise
ValueError
(
f
"resize_method
{
self
.
__resize_method
}
not implemented"
)
return
(
new_width
,
new_height
)
def
__call__
(
self
,
sample
):
width
,
height
=
self
.
get_size
(
sample
[
"image"
].
shape
[
1
],
sample
[
"image"
].
shape
[
0
]
)
# resize sample
sample
[
"image"
]
=
cv2
.
resize
(
sample
[
"image"
],
(
width
,
height
),
interpolation
=
self
.
__image_interpolation_method
,
)
if
self
.
__resize_target
:
if
"disparity"
in
sample
:
sample
[
"disparity"
]
=
cv2
.
resize
(
sample
[
"disparity"
],
(
width
,
height
),
interpolation
=
cv2
.
INTER_NEAREST
,
)
if
"depth"
in
sample
:
sample
[
"depth"
]
=
cv2
.
resize
(
sample
[
"depth"
],
(
width
,
height
),
interpolation
=
cv2
.
INTER_NEAREST
)
sample
[
"mask"
]
=
cv2
.
resize
(
sample
[
"mask"
].
astype
(
np
.
float32
),
(
width
,
height
),
interpolation
=
cv2
.
INTER_NEAREST
,
)
sample
[
"mask"
]
=
sample
[
"mask"
].
astype
(
bool
)
return
sample
class
NormalizeImage
(
object
):
"""Normlize image by given mean and std.
"""
def
__init__
(
self
,
mean
,
std
):
self
.
__mean
=
mean
self
.
__std
=
std
def
__call__
(
self
,
sample
):
sample
[
"image"
]
=
(
sample
[
"image"
]
-
self
.
__mean
)
/
self
.
__std
return
sample
class
PrepareForNet
(
object
):
"""Prepare sample for usage as network input.
"""
def
__init__
(
self
):
pass
def
__call__
(
self
,
sample
):
image
=
np
.
transpose
(
sample
[
"image"
],
(
2
,
0
,
1
))
sample
[
"image"
]
=
np
.
ascontiguousarray
(
image
).
astype
(
np
.
float32
)
if
"mask"
in
sample
:
sample
[
"mask"
]
=
sample
[
"mask"
].
astype
(
np
.
float32
)
sample
[
"mask"
]
=
np
.
ascontiguousarray
(
sample
[
"mask"
])
if
"disparity"
in
sample
:
disparity
=
sample
[
"disparity"
].
astype
(
np
.
float32
)
sample
[
"disparity"
]
=
np
.
ascontiguousarray
(
disparity
)
if
"depth"
in
sample
:
depth
=
sample
[
"depth"
].
astype
(
np
.
float32
)
sample
[
"depth"
]
=
np
.
ascontiguousarray
(
depth
)
return
sample
lavis/common/annotator/midas/midas/vit.py
0 → 100644
View file @
c04f261a
import
torch
import
torch.nn
as
nn
import
timm
import
types
import
math
import
torch.nn.functional
as
F
class
Slice
(
nn
.
Module
):
def
__init__
(
self
,
start_index
=
1
):
super
(
Slice
,
self
).
__init__
()
self
.
start_index
=
start_index
def
forward
(
self
,
x
):
return
x
[:,
self
.
start_index
:]
class
AddReadout
(
nn
.
Module
):
def
__init__
(
self
,
start_index
=
1
):
super
(
AddReadout
,
self
).
__init__
()
self
.
start_index
=
start_index
def
forward
(
self
,
x
):
if
self
.
start_index
==
2
:
readout
=
(
x
[:,
0
]
+
x
[:,
1
])
/
2
else
:
readout
=
x
[:,
0
]
return
x
[:,
self
.
start_index
:]
+
readout
.
unsqueeze
(
1
)
class
ProjectReadout
(
nn
.
Module
):
def
__init__
(
self
,
in_features
,
start_index
=
1
):
super
(
ProjectReadout
,
self
).
__init__
()
self
.
start_index
=
start_index
self
.
project
=
nn
.
Sequential
(
nn
.
Linear
(
2
*
in_features
,
in_features
),
nn
.
GELU
())
def
forward
(
self
,
x
):
readout
=
x
[:,
0
].
unsqueeze
(
1
).
expand_as
(
x
[:,
self
.
start_index
:])
features
=
torch
.
cat
((
x
[:,
self
.
start_index
:],
readout
),
-
1
)
return
self
.
project
(
features
)
class
Transpose
(
nn
.
Module
):
def
__init__
(
self
,
dim0
,
dim1
):
super
(
Transpose
,
self
).
__init__
()
self
.
dim0
=
dim0
self
.
dim1
=
dim1
def
forward
(
self
,
x
):
x
=
x
.
transpose
(
self
.
dim0
,
self
.
dim1
)
return
x
def
forward_vit
(
pretrained
,
x
):
b
,
c
,
h
,
w
=
x
.
shape
glob
=
pretrained
.
model
.
forward_flex
(
x
)
layer_1
=
pretrained
.
activations
[
"1"
]
layer_2
=
pretrained
.
activations
[
"2"
]
layer_3
=
pretrained
.
activations
[
"3"
]
layer_4
=
pretrained
.
activations
[
"4"
]
layer_1
=
pretrained
.
act_postprocess1
[
0
:
2
](
layer_1
)
layer_2
=
pretrained
.
act_postprocess2
[
0
:
2
](
layer_2
)
layer_3
=
pretrained
.
act_postprocess3
[
0
:
2
](
layer_3
)
layer_4
=
pretrained
.
act_postprocess4
[
0
:
2
](
layer_4
)
unflatten
=
nn
.
Sequential
(
nn
.
Unflatten
(
2
,
torch
.
Size
(
[
h
//
pretrained
.
model
.
patch_size
[
1
],
w
//
pretrained
.
model
.
patch_size
[
0
],
]
),
)
)
if
layer_1
.
ndim
==
3
:
layer_1
=
unflatten
(
layer_1
)
if
layer_2
.
ndim
==
3
:
layer_2
=
unflatten
(
layer_2
)
if
layer_3
.
ndim
==
3
:
layer_3
=
unflatten
(
layer_3
)
if
layer_4
.
ndim
==
3
:
layer_4
=
unflatten
(
layer_4
)
layer_1
=
pretrained
.
act_postprocess1
[
3
:
len
(
pretrained
.
act_postprocess1
)](
layer_1
)
layer_2
=
pretrained
.
act_postprocess2
[
3
:
len
(
pretrained
.
act_postprocess2
)](
layer_2
)
layer_3
=
pretrained
.
act_postprocess3
[
3
:
len
(
pretrained
.
act_postprocess3
)](
layer_3
)
layer_4
=
pretrained
.
act_postprocess4
[
3
:
len
(
pretrained
.
act_postprocess4
)](
layer_4
)
return
layer_1
,
layer_2
,
layer_3
,
layer_4
def
_resize_pos_embed
(
self
,
posemb
,
gs_h
,
gs_w
):
posemb_tok
,
posemb_grid
=
(
posemb
[:,
:
self
.
start_index
],
posemb
[
0
,
self
.
start_index
:],
)
gs_old
=
int
(
math
.
sqrt
(
len
(
posemb_grid
)))
posemb_grid
=
posemb_grid
.
reshape
(
1
,
gs_old
,
gs_old
,
-
1
).
permute
(
0
,
3
,
1
,
2
)
posemb_grid
=
F
.
interpolate
(
posemb_grid
,
size
=
(
gs_h
,
gs_w
),
mode
=
"bilinear"
)
posemb_grid
=
posemb_grid
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
1
,
gs_h
*
gs_w
,
-
1
)
posemb
=
torch
.
cat
([
posemb_tok
,
posemb_grid
],
dim
=
1
)
return
posemb
def
forward_flex
(
self
,
x
):
b
,
c
,
h
,
w
=
x
.
shape
pos_embed
=
self
.
_resize_pos_embed
(
self
.
pos_embed
,
h
//
self
.
patch_size
[
1
],
w
//
self
.
patch_size
[
0
]
)
B
=
x
.
shape
[
0
]
if
hasattr
(
self
.
patch_embed
,
"backbone"
):
x
=
self
.
patch_embed
.
backbone
(
x
)
if
isinstance
(
x
,
(
list
,
tuple
)):
x
=
x
[
-
1
]
# last feature if backbone outputs list/tuple of features
x
=
self
.
patch_embed
.
proj
(
x
).
flatten
(
2
).
transpose
(
1
,
2
)
if
getattr
(
self
,
"dist_token"
,
None
)
is
not
None
:
cls_tokens
=
self
.
cls_token
.
expand
(
B
,
-
1
,
-
1
)
# stole cls_tokens impl from Phil Wang, thanks
dist_token
=
self
.
dist_token
.
expand
(
B
,
-
1
,
-
1
)
x
=
torch
.
cat
((
cls_tokens
,
dist_token
,
x
),
dim
=
1
)
else
:
cls_tokens
=
self
.
cls_token
.
expand
(
B
,
-
1
,
-
1
)
# stole cls_tokens impl from Phil Wang, thanks
x
=
torch
.
cat
((
cls_tokens
,
x
),
dim
=
1
)
x
=
x
+
pos_embed
x
=
self
.
pos_drop
(
x
)
for
blk
in
self
.
blocks
:
x
=
blk
(
x
)
x
=
self
.
norm
(
x
)
return
x
activations
=
{}
def
get_activation
(
name
):
def
hook
(
model
,
input
,
output
):
activations
[
name
]
=
output
return
hook
def
get_readout_oper
(
vit_features
,
features
,
use_readout
,
start_index
=
1
):
if
use_readout
==
"ignore"
:
readout_oper
=
[
Slice
(
start_index
)]
*
len
(
features
)
elif
use_readout
==
"add"
:
readout_oper
=
[
AddReadout
(
start_index
)]
*
len
(
features
)
elif
use_readout
==
"project"
:
readout_oper
=
[
ProjectReadout
(
vit_features
,
start_index
)
for
out_feat
in
features
]
else
:
assert
(
False
),
"wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'"
return
readout_oper
def
_make_vit_b16_backbone
(
model
,
features
=
[
96
,
192
,
384
,
768
],
size
=
[
384
,
384
],
hooks
=
[
2
,
5
,
8
,
11
],
vit_features
=
768
,
use_readout
=
"ignore"
,
start_index
=
1
,
):
pretrained
=
nn
.
Module
()
pretrained
.
model
=
model
pretrained
.
model
.
blocks
[
hooks
[
0
]].
register_forward_hook
(
get_activation
(
"1"
))
pretrained
.
model
.
blocks
[
hooks
[
1
]].
register_forward_hook
(
get_activation
(
"2"
))
pretrained
.
model
.
blocks
[
hooks
[
2
]].
register_forward_hook
(
get_activation
(
"3"
))
pretrained
.
model
.
blocks
[
hooks
[
3
]].
register_forward_hook
(
get_activation
(
"4"
))
pretrained
.
activations
=
activations
readout_oper
=
get_readout_oper
(
vit_features
,
features
,
use_readout
,
start_index
)
# 32, 48, 136, 384
pretrained
.
act_postprocess1
=
nn
.
Sequential
(
readout_oper
[
0
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
0
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
nn
.
ConvTranspose2d
(
in_channels
=
features
[
0
],
out_channels
=
features
[
0
],
kernel_size
=
4
,
stride
=
4
,
padding
=
0
,
bias
=
True
,
dilation
=
1
,
groups
=
1
,
),
)
pretrained
.
act_postprocess2
=
nn
.
Sequential
(
readout_oper
[
1
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
1
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
nn
.
ConvTranspose2d
(
in_channels
=
features
[
1
],
out_channels
=
features
[
1
],
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
bias
=
True
,
dilation
=
1
,
groups
=
1
,
),
)
pretrained
.
act_postprocess3
=
nn
.
Sequential
(
readout_oper
[
2
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
2
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
)
pretrained
.
act_postprocess4
=
nn
.
Sequential
(
readout_oper
[
3
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
3
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
nn
.
Conv2d
(
in_channels
=
features
[
3
],
out_channels
=
features
[
3
],
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
),
)
pretrained
.
model
.
start_index
=
start_index
pretrained
.
model
.
patch_size
=
[
16
,
16
]
# We inject this function into the VisionTransformer instances so that
# we can use it with interpolated position embeddings without modifying the library source.
pretrained
.
model
.
forward_flex
=
types
.
MethodType
(
forward_flex
,
pretrained
.
model
)
pretrained
.
model
.
_resize_pos_embed
=
types
.
MethodType
(
_resize_pos_embed
,
pretrained
.
model
)
return
pretrained
def
_make_pretrained_vitl16_384
(
pretrained
,
use_readout
=
"ignore"
,
hooks
=
None
):
model
=
timm
.
create_model
(
"vit_large_patch16_384"
,
pretrained
=
pretrained
)
hooks
=
[
5
,
11
,
17
,
23
]
if
hooks
==
None
else
hooks
return
_make_vit_b16_backbone
(
model
,
features
=
[
256
,
512
,
1024
,
1024
],
hooks
=
hooks
,
vit_features
=
1024
,
use_readout
=
use_readout
,
)
def
_make_pretrained_vitb16_384
(
pretrained
,
use_readout
=
"ignore"
,
hooks
=
None
):
model
=
timm
.
create_model
(
"vit_base_patch16_384"
,
pretrained
=
pretrained
)
hooks
=
[
2
,
5
,
8
,
11
]
if
hooks
==
None
else
hooks
return
_make_vit_b16_backbone
(
model
,
features
=
[
96
,
192
,
384
,
768
],
hooks
=
hooks
,
use_readout
=
use_readout
)
def
_make_pretrained_deitb16_384
(
pretrained
,
use_readout
=
"ignore"
,
hooks
=
None
):
model
=
timm
.
create_model
(
"vit_deit_base_patch16_384"
,
pretrained
=
pretrained
)
hooks
=
[
2
,
5
,
8
,
11
]
if
hooks
==
None
else
hooks
return
_make_vit_b16_backbone
(
model
,
features
=
[
96
,
192
,
384
,
768
],
hooks
=
hooks
,
use_readout
=
use_readout
)
def
_make_pretrained_deitb16_distil_384
(
pretrained
,
use_readout
=
"ignore"
,
hooks
=
None
):
model
=
timm
.
create_model
(
"vit_deit_base_distilled_patch16_384"
,
pretrained
=
pretrained
)
hooks
=
[
2
,
5
,
8
,
11
]
if
hooks
==
None
else
hooks
return
_make_vit_b16_backbone
(
model
,
features
=
[
96
,
192
,
384
,
768
],
hooks
=
hooks
,
use_readout
=
use_readout
,
start_index
=
2
,
)
def
_make_vit_b_rn50_backbone
(
model
,
features
=
[
256
,
512
,
768
,
768
],
size
=
[
384
,
384
],
hooks
=
[
0
,
1
,
8
,
11
],
vit_features
=
768
,
use_vit_only
=
False
,
use_readout
=
"ignore"
,
start_index
=
1
,
):
pretrained
=
nn
.
Module
()
pretrained
.
model
=
model
if
use_vit_only
==
True
:
pretrained
.
model
.
blocks
[
hooks
[
0
]].
register_forward_hook
(
get_activation
(
"1"
))
pretrained
.
model
.
blocks
[
hooks
[
1
]].
register_forward_hook
(
get_activation
(
"2"
))
else
:
pretrained
.
model
.
patch_embed
.
backbone
.
stages
[
0
].
register_forward_hook
(
get_activation
(
"1"
)
)
pretrained
.
model
.
patch_embed
.
backbone
.
stages
[
1
].
register_forward_hook
(
get_activation
(
"2"
)
)
pretrained
.
model
.
blocks
[
hooks
[
2
]].
register_forward_hook
(
get_activation
(
"3"
))
pretrained
.
model
.
blocks
[
hooks
[
3
]].
register_forward_hook
(
get_activation
(
"4"
))
pretrained
.
activations
=
activations
readout_oper
=
get_readout_oper
(
vit_features
,
features
,
use_readout
,
start_index
)
if
use_vit_only
==
True
:
pretrained
.
act_postprocess1
=
nn
.
Sequential
(
readout_oper
[
0
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
0
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
nn
.
ConvTranspose2d
(
in_channels
=
features
[
0
],
out_channels
=
features
[
0
],
kernel_size
=
4
,
stride
=
4
,
padding
=
0
,
bias
=
True
,
dilation
=
1
,
groups
=
1
,
),
)
pretrained
.
act_postprocess2
=
nn
.
Sequential
(
readout_oper
[
1
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
1
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
nn
.
ConvTranspose2d
(
in_channels
=
features
[
1
],
out_channels
=
features
[
1
],
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
bias
=
True
,
dilation
=
1
,
groups
=
1
,
),
)
else
:
pretrained
.
act_postprocess1
=
nn
.
Sequential
(
nn
.
Identity
(),
nn
.
Identity
(),
nn
.
Identity
()
)
pretrained
.
act_postprocess2
=
nn
.
Sequential
(
nn
.
Identity
(),
nn
.
Identity
(),
nn
.
Identity
()
)
pretrained
.
act_postprocess3
=
nn
.
Sequential
(
readout_oper
[
2
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
2
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
)
pretrained
.
act_postprocess4
=
nn
.
Sequential
(
readout_oper
[
3
],
Transpose
(
1
,
2
),
nn
.
Unflatten
(
2
,
torch
.
Size
([
size
[
0
]
//
16
,
size
[
1
]
//
16
])),
nn
.
Conv2d
(
in_channels
=
vit_features
,
out_channels
=
features
[
3
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
),
nn
.
Conv2d
(
in_channels
=
features
[
3
],
out_channels
=
features
[
3
],
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
),
)
pretrained
.
model
.
start_index
=
start_index
pretrained
.
model
.
patch_size
=
[
16
,
16
]
# We inject this function into the VisionTransformer instances so that
# we can use it with interpolated position embeddings without modifying the library source.
pretrained
.
model
.
forward_flex
=
types
.
MethodType
(
forward_flex
,
pretrained
.
model
)
# We inject this function into the VisionTransformer instances so that
# we can use it with interpolated position embeddings without modifying the library source.
pretrained
.
model
.
_resize_pos_embed
=
types
.
MethodType
(
_resize_pos_embed
,
pretrained
.
model
)
return
pretrained
def
_make_pretrained_vitb_rn50_384
(
pretrained
,
use_readout
=
"ignore"
,
hooks
=
None
,
use_vit_only
=
False
):
model
=
timm
.
create_model
(
"vit_base_resnet50_384"
,
pretrained
=
pretrained
)
hooks
=
[
0
,
1
,
8
,
11
]
if
hooks
==
None
else
hooks
return
_make_vit_b_rn50_backbone
(
model
,
features
=
[
256
,
512
,
768
,
768
],
size
=
[
384
,
384
],
hooks
=
hooks
,
use_vit_only
=
use_vit_only
,
use_readout
=
use_readout
,
)
lavis/common/annotator/midas/utils.py
0 → 100644
View file @
c04f261a
"""Utils for monoDepth."""
import
sys
import
re
import
numpy
as
np
import
cv2
import
torch
def
read_pfm
(
path
):
"""Read pfm file.
Args:
path (str): path to file
Returns:
tuple: (data, scale)
"""
with
open
(
path
,
"rb"
)
as
file
:
color
=
None
width
=
None
height
=
None
scale
=
None
endian
=
None
header
=
file
.
readline
().
rstrip
()
if
header
.
decode
(
"ascii"
)
==
"PF"
:
color
=
True
elif
header
.
decode
(
"ascii"
)
==
"Pf"
:
color
=
False
else
:
raise
Exception
(
"Not a PFM file: "
+
path
)
dim_match
=
re
.
match
(
r
"^(\d+)\s(\d+)\s$"
,
file
.
readline
().
decode
(
"ascii"
))
if
dim_match
:
width
,
height
=
list
(
map
(
int
,
dim_match
.
groups
()))
else
:
raise
Exception
(
"Malformed PFM header."
)
scale
=
float
(
file
.
readline
().
decode
(
"ascii"
).
rstrip
())
if
scale
<
0
:
# little-endian
endian
=
"<"
scale
=
-
scale
else
:
# big-endian
endian
=
">"
data
=
np
.
fromfile
(
file
,
endian
+
"f"
)
shape
=
(
height
,
width
,
3
)
if
color
else
(
height
,
width
)
data
=
np
.
reshape
(
data
,
shape
)
data
=
np
.
flipud
(
data
)
return
data
,
scale
def
write_pfm
(
path
,
image
,
scale
=
1
):
"""Write pfm file.
Args:
path (str): pathto file
image (array): data
scale (int, optional): Scale. Defaults to 1.
"""
with
open
(
path
,
"wb"
)
as
file
:
color
=
None
if
image
.
dtype
.
name
!=
"float32"
:
raise
Exception
(
"Image dtype must be float32."
)
image
=
np
.
flipud
(
image
)
if
len
(
image
.
shape
)
==
3
and
image
.
shape
[
2
]
==
3
:
# color image
color
=
True
elif
(
len
(
image
.
shape
)
==
2
or
len
(
image
.
shape
)
==
3
and
image
.
shape
[
2
]
==
1
):
# greyscale
color
=
False
else
:
raise
Exception
(
"Image must have H x W x 3, H x W x 1 or H x W dimensions."
)
file
.
write
(
"PF
\n
"
if
color
else
"Pf
\n
"
.
encode
())
file
.
write
(
"%d %d
\n
"
.
encode
()
%
(
image
.
shape
[
1
],
image
.
shape
[
0
]))
endian
=
image
.
dtype
.
byteorder
if
endian
==
"<"
or
endian
==
"="
and
sys
.
byteorder
==
"little"
:
scale
=
-
scale
file
.
write
(
"%f
\n
"
.
encode
()
%
scale
)
image
.
tofile
(
file
)
def
read_image
(
path
):
"""Read image and output RGB image (0-1).
Args:
path (str): path to file
Returns:
array: RGB image (0-1)
"""
img
=
cv2
.
imread
(
path
)
if
img
.
ndim
==
2
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2RGB
)
/
255.0
return
img
def
resize_image
(
img
):
"""Resize image and make it fit for network.
Args:
img (array): image
Returns:
tensor: data ready for network
"""
height_orig
=
img
.
shape
[
0
]
width_orig
=
img
.
shape
[
1
]
if
width_orig
>
height_orig
:
scale
=
width_orig
/
384
else
:
scale
=
height_orig
/
384
height
=
(
np
.
ceil
(
height_orig
/
scale
/
32
)
*
32
).
astype
(
int
)
width
=
(
np
.
ceil
(
width_orig
/
scale
/
32
)
*
32
).
astype
(
int
)
img_resized
=
cv2
.
resize
(
img
,
(
width
,
height
),
interpolation
=
cv2
.
INTER_AREA
)
img_resized
=
(
torch
.
from_numpy
(
np
.
transpose
(
img_resized
,
(
2
,
0
,
1
))).
contiguous
().
float
()
)
img_resized
=
img_resized
.
unsqueeze
(
0
)
return
img_resized
def
resize_depth
(
depth
,
width
,
height
):
"""Resize depth map and bring to CPU (numpy).
Args:
depth (tensor): depth
width (int): image width
height (int): image height
Returns:
array: processed depth
"""
depth
=
torch
.
squeeze
(
depth
[
0
,
:,
:,
:]).
to
(
"cpu"
)
depth_resized
=
cv2
.
resize
(
depth
.
numpy
(),
(
width
,
height
),
interpolation
=
cv2
.
INTER_CUBIC
)
return
depth_resized
def
write_depth
(
path
,
depth
,
bits
=
1
):
"""Write depth map to pfm and png file.
Args:
path (str): filepath without extension
depth (array): depth
"""
write_pfm
(
path
+
".pfm"
,
depth
.
astype
(
np
.
float32
))
depth_min
=
depth
.
min
()
depth_max
=
depth
.
max
()
max_val
=
(
2
**
(
8
*
bits
))
-
1
if
depth_max
-
depth_min
>
np
.
finfo
(
"float"
).
eps
:
out
=
max_val
*
(
depth
-
depth_min
)
/
(
depth_max
-
depth_min
)
else
:
out
=
np
.
zeros
(
depth
.
shape
,
dtype
=
depth
.
type
)
if
bits
==
1
:
cv2
.
imwrite
(
path
+
".png"
,
out
.
astype
(
"uint8"
))
elif
bits
==
2
:
cv2
.
imwrite
(
path
+
".png"
,
out
.
astype
(
"uint16"
))
return
lavis/common/annotator/mlsd/__init__.py
0 → 100644
View file @
c04f261a
import
cv2
import
numpy
as
np
import
torch
import
os
from
einops
import
rearrange
from
.models.mbv2_mlsd_tiny
import
MobileV2_MLSD_Tiny
from
.models.mbv2_mlsd_large
import
MobileV2_MLSD_Large
from
.utils
import
pred_lines
from
annotator.util
import
annotator_ckpts_path
remote_model_path
=
"https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth"
class
MLSDdetector
:
def
__init__
(
self
):
model_path
=
os
.
path
.
join
(
annotator_ckpts_path
,
"mlsd_large_512_fp32.pth"
)
if
not
os
.
path
.
exists
(
model_path
):
from
basicsr.utils.download_util
import
load_file_from_url
load_file_from_url
(
remote_model_path
,
model_dir
=
annotator_ckpts_path
)
model
=
MobileV2_MLSD_Large
()
model
.
load_state_dict
(
torch
.
load
(
model_path
),
strict
=
True
)
self
.
model
=
model
.
cuda
().
eval
()
def
__call__
(
self
,
input_image
,
thr_v
,
thr_d
):
assert
input_image
.
ndim
==
3
img
=
input_image
img_output
=
np
.
zeros_like
(
img
)
try
:
with
torch
.
no_grad
():
lines
=
pred_lines
(
img
,
self
.
model
,
[
img
.
shape
[
0
],
img
.
shape
[
1
]],
thr_v
,
thr_d
)
for
line
in
lines
:
x_start
,
y_start
,
x_end
,
y_end
=
[
int
(
val
)
for
val
in
line
]
cv2
.
line
(
img_output
,
(
x_start
,
y_start
),
(
x_end
,
y_end
),
[
255
,
255
,
255
],
1
)
except
Exception
as
e
:
pass
return
img_output
[:,
:,
0
]
lavis/common/annotator/mlsd/models/mbv2_mlsd_large.py
0 → 100644
View file @
c04f261a
import
os
import
sys
import
torch
import
torch.nn
as
nn
import
torch.utils.model_zoo
as
model_zoo
from
torch.nn
import
functional
as
F
class
BlockTypeA
(
nn
.
Module
):
def
__init__
(
self
,
in_c1
,
in_c2
,
out_c1
,
out_c2
,
upscale
=
True
):
super
(
BlockTypeA
,
self
).
__init__
()
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c2
,
out_c2
,
kernel_size
=
1
),
nn
.
BatchNorm2d
(
out_c2
),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c1
,
out_c1
,
kernel_size
=
1
),
nn
.
BatchNorm2d
(
out_c1
),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
upscale
=
upscale
def
forward
(
self
,
a
,
b
):
b
=
self
.
conv1
(
b
)
a
=
self
.
conv2
(
a
)
if
self
.
upscale
:
b
=
F
.
interpolate
(
b
,
scale_factor
=
2.0
,
mode
=
'bilinear'
,
align_corners
=
True
)
return
torch
.
cat
((
a
,
b
),
dim
=
1
)
class
BlockTypeB
(
nn
.
Module
):
def
__init__
(
self
,
in_c
,
out_c
):
super
(
BlockTypeB
,
self
).
__init__
()
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
in_c
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
in_c
),
nn
.
ReLU
()
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
out_c
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
out_c
),
nn
.
ReLU
()
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
+
x
x
=
self
.
conv2
(
x
)
return
x
class
BlockTypeC
(
nn
.
Module
):
def
__init__
(
self
,
in_c
,
out_c
):
super
(
BlockTypeC
,
self
).
__init__
()
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
in_c
,
kernel_size
=
3
,
padding
=
5
,
dilation
=
5
),
nn
.
BatchNorm2d
(
in_c
),
nn
.
ReLU
()
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
in_c
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
in_c
),
nn
.
ReLU
()
)
self
.
conv3
=
nn
.
Conv2d
(
in_c
,
out_c
,
kernel_size
=
1
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
conv3
(
x
)
return
x
def
_make_divisible
(
v
,
divisor
,
min_value
=
None
):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
# Make sure that round down does not go down by more than 10%.
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
ConvBNReLU
(
nn
.
Sequential
):
def
__init__
(
self
,
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
1
,
groups
=
1
):
self
.
channel_pad
=
out_planes
-
in_planes
self
.
stride
=
stride
#padding = (kernel_size - 1) // 2
# TFLite uses slightly different padding than PyTorch
if
stride
==
2
:
padding
=
0
else
:
padding
=
(
kernel_size
-
1
)
//
2
super
(
ConvBNReLU
,
self
).
__init__
(
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
,
stride
,
padding
,
groups
=
groups
,
bias
=
False
),
nn
.
BatchNorm2d
(
out_planes
),
nn
.
ReLU6
(
inplace
=
True
)
)
self
.
max_pool
=
nn
.
MaxPool2d
(
kernel_size
=
stride
,
stride
=
stride
)
def
forward
(
self
,
x
):
# TFLite uses different padding
if
self
.
stride
==
2
:
x
=
F
.
pad
(
x
,
(
0
,
1
,
0
,
1
),
"constant"
,
0
)
#print(x.shape)
for
module
in
self
:
if
not
isinstance
(
module
,
nn
.
MaxPool2d
):
x
=
module
(
x
)
return
x
class
InvertedResidual
(
nn
.
Module
):
def
__init__
(
self
,
inp
,
oup
,
stride
,
expand_ratio
):
super
(
InvertedResidual
,
self
).
__init__
()
self
.
stride
=
stride
assert
stride
in
[
1
,
2
]
hidden_dim
=
int
(
round
(
inp
*
expand_ratio
))
self
.
use_res_connect
=
self
.
stride
==
1
and
inp
==
oup
layers
=
[]
if
expand_ratio
!=
1
:
# pw
layers
.
append
(
ConvBNReLU
(
inp
,
hidden_dim
,
kernel_size
=
1
))
layers
.
extend
([
# dw
ConvBNReLU
(
hidden_dim
,
hidden_dim
,
stride
=
stride
,
groups
=
hidden_dim
),
# pw-linear
nn
.
Conv2d
(
hidden_dim
,
oup
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
])
self
.
conv
=
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
if
self
.
use_res_connect
:
return
x
+
self
.
conv
(
x
)
else
:
return
self
.
conv
(
x
)
class
MobileNetV2
(
nn
.
Module
):
def
__init__
(
self
,
pretrained
=
True
):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
"""
super
(
MobileNetV2
,
self
).
__init__
()
block
=
InvertedResidual
input_channel
=
32
last_channel
=
1280
width_mult
=
1.0
round_nearest
=
8
inverted_residual_setting
=
[
# t, c, n, s
[
1
,
16
,
1
,
1
],
[
6
,
24
,
2
,
2
],
[
6
,
32
,
3
,
2
],
[
6
,
64
,
4
,
2
],
[
6
,
96
,
3
,
1
],
#[6, 160, 3, 2],
#[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if
len
(
inverted_residual_setting
)
==
0
or
len
(
inverted_residual_setting
[
0
])
!=
4
:
raise
ValueError
(
"inverted_residual_setting should be non-empty "
"or a 4-element list, got {}"
.
format
(
inverted_residual_setting
))
# building first layer
input_channel
=
_make_divisible
(
input_channel
*
width_mult
,
round_nearest
)
self
.
last_channel
=
_make_divisible
(
last_channel
*
max
(
1.0
,
width_mult
),
round_nearest
)
features
=
[
ConvBNReLU
(
4
,
input_channel
,
stride
=
2
)]
# building inverted residual blocks
for
t
,
c
,
n
,
s
in
inverted_residual_setting
:
output_channel
=
_make_divisible
(
c
*
width_mult
,
round_nearest
)
for
i
in
range
(
n
):
stride
=
s
if
i
==
0
else
1
features
.
append
(
block
(
input_channel
,
output_channel
,
stride
,
expand_ratio
=
t
))
input_channel
=
output_channel
self
.
features
=
nn
.
Sequential
(
*
features
)
self
.
fpn_selected
=
[
1
,
3
,
6
,
10
,
13
]
# weight initialization
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
'fan_out'
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
nn
.
init
.
ones_
(
m
.
weight
)
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
Linear
):
nn
.
init
.
normal_
(
m
.
weight
,
0
,
0.01
)
nn
.
init
.
zeros_
(
m
.
bias
)
if
pretrained
:
self
.
_load_pretrained_model
()
def
_forward_impl
(
self
,
x
):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
fpn_features
=
[]
for
i
,
f
in
enumerate
(
self
.
features
):
if
i
>
self
.
fpn_selected
[
-
1
]:
break
x
=
f
(
x
)
if
i
in
self
.
fpn_selected
:
fpn_features
.
append
(
x
)
c1
,
c2
,
c3
,
c4
,
c5
=
fpn_features
return
c1
,
c2
,
c3
,
c4
,
c5
def
forward
(
self
,
x
):
return
self
.
_forward_impl
(
x
)
def
_load_pretrained_model
(
self
):
pretrain_dict
=
model_zoo
.
load_url
(
'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
)
model_dict
=
{}
state_dict
=
self
.
state_dict
()
for
k
,
v
in
pretrain_dict
.
items
():
if
k
in
state_dict
:
model_dict
[
k
]
=
v
state_dict
.
update
(
model_dict
)
self
.
load_state_dict
(
state_dict
)
class
MobileV2_MLSD_Large
(
nn
.
Module
):
def
__init__
(
self
):
super
(
MobileV2_MLSD_Large
,
self
).
__init__
()
self
.
backbone
=
MobileNetV2
(
pretrained
=
False
)
## A, B
self
.
block15
=
BlockTypeA
(
in_c1
=
64
,
in_c2
=
96
,
out_c1
=
64
,
out_c2
=
64
,
upscale
=
False
)
self
.
block16
=
BlockTypeB
(
128
,
64
)
## A, B
self
.
block17
=
BlockTypeA
(
in_c1
=
32
,
in_c2
=
64
,
out_c1
=
64
,
out_c2
=
64
)
self
.
block18
=
BlockTypeB
(
128
,
64
)
## A, B
self
.
block19
=
BlockTypeA
(
in_c1
=
24
,
in_c2
=
64
,
out_c1
=
64
,
out_c2
=
64
)
self
.
block20
=
BlockTypeB
(
128
,
64
)
## A, B, C
self
.
block21
=
BlockTypeA
(
in_c1
=
16
,
in_c2
=
64
,
out_c1
=
64
,
out_c2
=
64
)
self
.
block22
=
BlockTypeB
(
128
,
64
)
self
.
block23
=
BlockTypeC
(
64
,
16
)
def
forward
(
self
,
x
):
c1
,
c2
,
c3
,
c4
,
c5
=
self
.
backbone
(
x
)
x
=
self
.
block15
(
c4
,
c5
)
x
=
self
.
block16
(
x
)
x
=
self
.
block17
(
c3
,
x
)
x
=
self
.
block18
(
x
)
x
=
self
.
block19
(
c2
,
x
)
x
=
self
.
block20
(
x
)
x
=
self
.
block21
(
c1
,
x
)
x
=
self
.
block22
(
x
)
x
=
self
.
block23
(
x
)
x
=
x
[:,
7
:,
:,
:]
return
x
\ No newline at end of file
lavis/common/annotator/mlsd/models/mbv2_mlsd_tiny.py
0 → 100644
View file @
c04f261a
import
os
import
sys
import
torch
import
torch.nn
as
nn
import
torch.utils.model_zoo
as
model_zoo
from
torch.nn
import
functional
as
F
class
BlockTypeA
(
nn
.
Module
):
def
__init__
(
self
,
in_c1
,
in_c2
,
out_c1
,
out_c2
,
upscale
=
True
):
super
(
BlockTypeA
,
self
).
__init__
()
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c2
,
out_c2
,
kernel_size
=
1
),
nn
.
BatchNorm2d
(
out_c2
),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c1
,
out_c1
,
kernel_size
=
1
),
nn
.
BatchNorm2d
(
out_c1
),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
upscale
=
upscale
def
forward
(
self
,
a
,
b
):
b
=
self
.
conv1
(
b
)
a
=
self
.
conv2
(
a
)
b
=
F
.
interpolate
(
b
,
scale_factor
=
2.0
,
mode
=
'bilinear'
,
align_corners
=
True
)
return
torch
.
cat
((
a
,
b
),
dim
=
1
)
class
BlockTypeB
(
nn
.
Module
):
def
__init__
(
self
,
in_c
,
out_c
):
super
(
BlockTypeB
,
self
).
__init__
()
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
in_c
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
in_c
),
nn
.
ReLU
()
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
out_c
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
out_c
),
nn
.
ReLU
()
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
+
x
x
=
self
.
conv2
(
x
)
return
x
class
BlockTypeC
(
nn
.
Module
):
def
__init__
(
self
,
in_c
,
out_c
):
super
(
BlockTypeC
,
self
).
__init__
()
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
in_c
,
kernel_size
=
3
,
padding
=
5
,
dilation
=
5
),
nn
.
BatchNorm2d
(
in_c
),
nn
.
ReLU
()
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_c
,
in_c
,
kernel_size
=
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
in_c
),
nn
.
ReLU
()
)
self
.
conv3
=
nn
.
Conv2d
(
in_c
,
out_c
,
kernel_size
=
1
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
conv3
(
x
)
return
x
def
_make_divisible
(
v
,
divisor
,
min_value
=
None
):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
# Make sure that round down does not go down by more than 10%.
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
ConvBNReLU
(
nn
.
Sequential
):
def
__init__
(
self
,
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
1
,
groups
=
1
):
self
.
channel_pad
=
out_planes
-
in_planes
self
.
stride
=
stride
#padding = (kernel_size - 1) // 2
# TFLite uses slightly different padding than PyTorch
if
stride
==
2
:
padding
=
0
else
:
padding
=
(
kernel_size
-
1
)
//
2
super
(
ConvBNReLU
,
self
).
__init__
(
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
,
stride
,
padding
,
groups
=
groups
,
bias
=
False
),
nn
.
BatchNorm2d
(
out_planes
),
nn
.
ReLU6
(
inplace
=
True
)
)
self
.
max_pool
=
nn
.
MaxPool2d
(
kernel_size
=
stride
,
stride
=
stride
)
def
forward
(
self
,
x
):
# TFLite uses different padding
if
self
.
stride
==
2
:
x
=
F
.
pad
(
x
,
(
0
,
1
,
0
,
1
),
"constant"
,
0
)
#print(x.shape)
for
module
in
self
:
if
not
isinstance
(
module
,
nn
.
MaxPool2d
):
x
=
module
(
x
)
return
x
class
InvertedResidual
(
nn
.
Module
):
def
__init__
(
self
,
inp
,
oup
,
stride
,
expand_ratio
):
super
(
InvertedResidual
,
self
).
__init__
()
self
.
stride
=
stride
assert
stride
in
[
1
,
2
]
hidden_dim
=
int
(
round
(
inp
*
expand_ratio
))
self
.
use_res_connect
=
self
.
stride
==
1
and
inp
==
oup
layers
=
[]
if
expand_ratio
!=
1
:
# pw
layers
.
append
(
ConvBNReLU
(
inp
,
hidden_dim
,
kernel_size
=
1
))
layers
.
extend
([
# dw
ConvBNReLU
(
hidden_dim
,
hidden_dim
,
stride
=
stride
,
groups
=
hidden_dim
),
# pw-linear
nn
.
Conv2d
(
hidden_dim
,
oup
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
])
self
.
conv
=
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
if
self
.
use_res_connect
:
return
x
+
self
.
conv
(
x
)
else
:
return
self
.
conv
(
x
)
class
MobileNetV2
(
nn
.
Module
):
def
__init__
(
self
,
pretrained
=
True
):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
"""
super
(
MobileNetV2
,
self
).
__init__
()
block
=
InvertedResidual
input_channel
=
32
last_channel
=
1280
width_mult
=
1.0
round_nearest
=
8
inverted_residual_setting
=
[
# t, c, n, s
[
1
,
16
,
1
,
1
],
[
6
,
24
,
2
,
2
],
[
6
,
32
,
3
,
2
],
[
6
,
64
,
4
,
2
],
#[6, 96, 3, 1],
#[6, 160, 3, 2],
#[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if
len
(
inverted_residual_setting
)
==
0
or
len
(
inverted_residual_setting
[
0
])
!=
4
:
raise
ValueError
(
"inverted_residual_setting should be non-empty "
"or a 4-element list, got {}"
.
format
(
inverted_residual_setting
))
# building first layer
input_channel
=
_make_divisible
(
input_channel
*
width_mult
,
round_nearest
)
self
.
last_channel
=
_make_divisible
(
last_channel
*
max
(
1.0
,
width_mult
),
round_nearest
)
features
=
[
ConvBNReLU
(
4
,
input_channel
,
stride
=
2
)]
# building inverted residual blocks
for
t
,
c
,
n
,
s
in
inverted_residual_setting
:
output_channel
=
_make_divisible
(
c
*
width_mult
,
round_nearest
)
for
i
in
range
(
n
):
stride
=
s
if
i
==
0
else
1
features
.
append
(
block
(
input_channel
,
output_channel
,
stride
,
expand_ratio
=
t
))
input_channel
=
output_channel
self
.
features
=
nn
.
Sequential
(
*
features
)
self
.
fpn_selected
=
[
3
,
6
,
10
]
# weight initialization
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
'fan_out'
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
nn
.
init
.
ones_
(
m
.
weight
)
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
Linear
):
nn
.
init
.
normal_
(
m
.
weight
,
0
,
0.01
)
nn
.
init
.
zeros_
(
m
.
bias
)
#if pretrained:
# self._load_pretrained_model()
def
_forward_impl
(
self
,
x
):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
fpn_features
=
[]
for
i
,
f
in
enumerate
(
self
.
features
):
if
i
>
self
.
fpn_selected
[
-
1
]:
break
x
=
f
(
x
)
if
i
in
self
.
fpn_selected
:
fpn_features
.
append
(
x
)
c2
,
c3
,
c4
=
fpn_features
return
c2
,
c3
,
c4
def
forward
(
self
,
x
):
return
self
.
_forward_impl
(
x
)
def
_load_pretrained_model
(
self
):
pretrain_dict
=
model_zoo
.
load_url
(
'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
)
model_dict
=
{}
state_dict
=
self
.
state_dict
()
for
k
,
v
in
pretrain_dict
.
items
():
if
k
in
state_dict
:
model_dict
[
k
]
=
v
state_dict
.
update
(
model_dict
)
self
.
load_state_dict
(
state_dict
)
class
MobileV2_MLSD_Tiny
(
nn
.
Module
):
def
__init__
(
self
):
super
(
MobileV2_MLSD_Tiny
,
self
).
__init__
()
self
.
backbone
=
MobileNetV2
(
pretrained
=
True
)
self
.
block12
=
BlockTypeA
(
in_c1
=
32
,
in_c2
=
64
,
out_c1
=
64
,
out_c2
=
64
)
self
.
block13
=
BlockTypeB
(
128
,
64
)
self
.
block14
=
BlockTypeA
(
in_c1
=
24
,
in_c2
=
64
,
out_c1
=
32
,
out_c2
=
32
)
self
.
block15
=
BlockTypeB
(
64
,
64
)
self
.
block16
=
BlockTypeC
(
64
,
16
)
def
forward
(
self
,
x
):
c2
,
c3
,
c4
=
self
.
backbone
(
x
)
x
=
self
.
block12
(
c3
,
c4
)
x
=
self
.
block13
(
x
)
x
=
self
.
block14
(
c2
,
x
)
x
=
self
.
block15
(
x
)
x
=
self
.
block16
(
x
)
x
=
x
[:,
7
:,
:,
:]
#print(x.shape)
x
=
F
.
interpolate
(
x
,
scale_factor
=
2.0
,
mode
=
'bilinear'
,
align_corners
=
True
)
return
x
\ No newline at end of file
lavis/common/annotator/mlsd/utils.py
0 → 100644
View file @
c04f261a
'''
modified by lihaoweicv
pytorch version
'''
'''
M-LSD
Copyright 2021-present NAVER Corp.
Apache License v2.0
'''
import
os
import
numpy
as
np
import
cv2
import
torch
from
torch.nn
import
functional
as
F
def
deccode_output_score_and_ptss
(
tpMap
,
topk_n
=
200
,
ksize
=
5
):
'''
tpMap:
center: tpMap[1, 0, :, :]
displacement: tpMap[1, 1:5, :, :]
'''
b
,
c
,
h
,
w
=
tpMap
.
shape
assert
b
==
1
,
'only support bsize==1'
displacement
=
tpMap
[:,
1
:
5
,
:,
:][
0
]
center
=
tpMap
[:,
0
,
:,
:]
heat
=
torch
.
sigmoid
(
center
)
hmax
=
F
.
max_pool2d
(
heat
,
(
ksize
,
ksize
),
stride
=
1
,
padding
=
(
ksize
-
1
)
//
2
)
keep
=
(
hmax
==
heat
).
float
()
heat
=
heat
*
keep
heat
=
heat
.
reshape
(
-
1
,
)
scores
,
indices
=
torch
.
topk
(
heat
,
topk_n
,
dim
=-
1
,
largest
=
True
)
yy
=
torch
.
floor_divide
(
indices
,
w
).
unsqueeze
(
-
1
)
xx
=
torch
.
fmod
(
indices
,
w
).
unsqueeze
(
-
1
)
ptss
=
torch
.
cat
((
yy
,
xx
),
dim
=-
1
)
ptss
=
ptss
.
detach
().
cpu
().
numpy
()
scores
=
scores
.
detach
().
cpu
().
numpy
()
displacement
=
displacement
.
detach
().
cpu
().
numpy
()
displacement
=
displacement
.
transpose
((
1
,
2
,
0
))
return
ptss
,
scores
,
displacement
def
pred_lines
(
image
,
model
,
input_shape
=
[
512
,
512
],
score_thr
=
0.10
,
dist_thr
=
20.0
):
h
,
w
,
_
=
image
.
shape
h_ratio
,
w_ratio
=
[
h
/
input_shape
[
0
],
w
/
input_shape
[
1
]]
resized_image
=
np
.
concatenate
([
cv2
.
resize
(
image
,
(
input_shape
[
1
],
input_shape
[
0
]),
interpolation
=
cv2
.
INTER_AREA
),
np
.
ones
([
input_shape
[
0
],
input_shape
[
1
],
1
])],
axis
=-
1
)
resized_image
=
resized_image
.
transpose
((
2
,
0
,
1
))
batch_image
=
np
.
expand_dims
(
resized_image
,
axis
=
0
).
astype
(
'float32'
)
batch_image
=
(
batch_image
/
127.5
)
-
1.0
batch_image
=
torch
.
from_numpy
(
batch_image
).
float
().
cuda
()
outputs
=
model
(
batch_image
)
pts
,
pts_score
,
vmap
=
deccode_output_score_and_ptss
(
outputs
,
200
,
3
)
start
=
vmap
[:,
:,
:
2
]
end
=
vmap
[:,
:,
2
:]
dist_map
=
np
.
sqrt
(
np
.
sum
((
start
-
end
)
**
2
,
axis
=-
1
))
segments_list
=
[]
for
center
,
score
in
zip
(
pts
,
pts_score
):
y
,
x
=
center
distance
=
dist_map
[
y
,
x
]
if
score
>
score_thr
and
distance
>
dist_thr
:
disp_x_start
,
disp_y_start
,
disp_x_end
,
disp_y_end
=
vmap
[
y
,
x
,
:]
x_start
=
x
+
disp_x_start
y_start
=
y
+
disp_y_start
x_end
=
x
+
disp_x_end
y_end
=
y
+
disp_y_end
segments_list
.
append
([
x_start
,
y_start
,
x_end
,
y_end
])
lines
=
2
*
np
.
array
(
segments_list
)
# 256 > 512
lines
[:,
0
]
=
lines
[:,
0
]
*
w_ratio
lines
[:,
1
]
=
lines
[:,
1
]
*
h_ratio
lines
[:,
2
]
=
lines
[:,
2
]
*
w_ratio
lines
[:,
3
]
=
lines
[:,
3
]
*
h_ratio
return
lines
def
pred_squares
(
image
,
model
,
input_shape
=
[
512
,
512
],
params
=
{
'score'
:
0.06
,
'outside_ratio'
:
0.28
,
'inside_ratio'
:
0.45
,
'w_overlap'
:
0.0
,
'w_degree'
:
1.95
,
'w_length'
:
0.0
,
'w_area'
:
1.86
,
'w_center'
:
0.14
}):
'''
shape = [height, width]
'''
h
,
w
,
_
=
image
.
shape
original_shape
=
[
h
,
w
]
resized_image
=
np
.
concatenate
([
cv2
.
resize
(
image
,
(
input_shape
[
0
],
input_shape
[
1
]),
interpolation
=
cv2
.
INTER_AREA
),
np
.
ones
([
input_shape
[
0
],
input_shape
[
1
],
1
])],
axis
=-
1
)
resized_image
=
resized_image
.
transpose
((
2
,
0
,
1
))
batch_image
=
np
.
expand_dims
(
resized_image
,
axis
=
0
).
astype
(
'float32'
)
batch_image
=
(
batch_image
/
127.5
)
-
1.0
batch_image
=
torch
.
from_numpy
(
batch_image
).
float
().
cuda
()
outputs
=
model
(
batch_image
)
pts
,
pts_score
,
vmap
=
deccode_output_score_and_ptss
(
outputs
,
200
,
3
)
start
=
vmap
[:,
:,
:
2
]
# (x, y)
end
=
vmap
[:,
:,
2
:]
# (x, y)
dist_map
=
np
.
sqrt
(
np
.
sum
((
start
-
end
)
**
2
,
axis
=-
1
))
junc_list
=
[]
segments_list
=
[]
for
junc
,
score
in
zip
(
pts
,
pts_score
):
y
,
x
=
junc
distance
=
dist_map
[
y
,
x
]
if
score
>
params
[
'score'
]
and
distance
>
20.0
:
junc_list
.
append
([
x
,
y
])
disp_x_start
,
disp_y_start
,
disp_x_end
,
disp_y_end
=
vmap
[
y
,
x
,
:]
d_arrow
=
1.0
x_start
=
x
+
d_arrow
*
disp_x_start
y_start
=
y
+
d_arrow
*
disp_y_start
x_end
=
x
+
d_arrow
*
disp_x_end
y_end
=
y
+
d_arrow
*
disp_y_end
segments_list
.
append
([
x_start
,
y_start
,
x_end
,
y_end
])
segments
=
np
.
array
(
segments_list
)
####### post processing for squares
# 1. get unique lines
point
=
np
.
array
([[
0
,
0
]])
point
=
point
[
0
]
start
=
segments
[:,
:
2
]
end
=
segments
[:,
2
:]
diff
=
start
-
end
a
=
diff
[:,
1
]
b
=
-
diff
[:,
0
]
c
=
a
*
start
[:,
0
]
+
b
*
start
[:,
1
]
d
=
np
.
abs
(
a
*
point
[
0
]
+
b
*
point
[
1
]
-
c
)
/
np
.
sqrt
(
a
**
2
+
b
**
2
+
1e-10
)
theta
=
np
.
arctan2
(
diff
[:,
0
],
diff
[:,
1
])
*
180
/
np
.
pi
theta
[
theta
<
0.0
]
+=
180
hough
=
np
.
concatenate
([
d
[:,
None
],
theta
[:,
None
]],
axis
=-
1
)
d_quant
=
1
theta_quant
=
2
hough
[:,
0
]
//=
d_quant
hough
[:,
1
]
//=
theta_quant
_
,
indices
,
counts
=
np
.
unique
(
hough
,
axis
=
0
,
return_index
=
True
,
return_counts
=
True
)
acc_map
=
np
.
zeros
([
512
//
d_quant
+
1
,
360
//
theta_quant
+
1
],
dtype
=
'float32'
)
idx_map
=
np
.
zeros
([
512
//
d_quant
+
1
,
360
//
theta_quant
+
1
],
dtype
=
'int32'
)
-
1
yx_indices
=
hough
[
indices
,
:].
astype
(
'int32'
)
acc_map
[
yx_indices
[:,
0
],
yx_indices
[:,
1
]]
=
counts
idx_map
[
yx_indices
[:,
0
],
yx_indices
[:,
1
]]
=
indices
acc_map_np
=
acc_map
# acc_map = acc_map[None, :, :, None]
#
# ### fast suppression using tensorflow op
# acc_map = tf.constant(acc_map, dtype=tf.float32)
# max_acc_map = tf.keras.layers.MaxPool2D(pool_size=(5, 5), strides=1, padding='same')(acc_map)
# acc_map = acc_map * tf.cast(tf.math.equal(acc_map, max_acc_map), tf.float32)
# flatten_acc_map = tf.reshape(acc_map, [1, -1])
# topk_values, topk_indices = tf.math.top_k(flatten_acc_map, k=len(pts))
# _, h, w, _ = acc_map.shape
# y = tf.expand_dims(topk_indices // w, axis=-1)
# x = tf.expand_dims(topk_indices % w, axis=-1)
# yx = tf.concat([y, x], axis=-1)
### fast suppression using pytorch op
acc_map
=
torch
.
from_numpy
(
acc_map_np
).
unsqueeze
(
0
).
unsqueeze
(
0
)
_
,
_
,
h
,
w
=
acc_map
.
shape
max_acc_map
=
F
.
max_pool2d
(
acc_map
,
kernel_size
=
5
,
stride
=
1
,
padding
=
2
)
acc_map
=
acc_map
*
(
(
acc_map
==
max_acc_map
).
float
()
)
flatten_acc_map
=
acc_map
.
reshape
([
-
1
,
])
scores
,
indices
=
torch
.
topk
(
flatten_acc_map
,
len
(
pts
),
dim
=-
1
,
largest
=
True
)
yy
=
torch
.
div
(
indices
,
w
,
rounding_mode
=
'floor'
).
unsqueeze
(
-
1
)
xx
=
torch
.
fmod
(
indices
,
w
).
unsqueeze
(
-
1
)
yx
=
torch
.
cat
((
yy
,
xx
),
dim
=-
1
)
yx
=
yx
.
detach
().
cpu
().
numpy
()
topk_values
=
scores
.
detach
().
cpu
().
numpy
()
indices
=
idx_map
[
yx
[:,
0
],
yx
[:,
1
]]
basis
=
5
//
2
merged_segments
=
[]
for
yx_pt
,
max_indice
,
value
in
zip
(
yx
,
indices
,
topk_values
):
y
,
x
=
yx_pt
if
max_indice
==
-
1
or
value
==
0
:
continue
segment_list
=
[]
for
y_offset
in
range
(
-
basis
,
basis
+
1
):
for
x_offset
in
range
(
-
basis
,
basis
+
1
):
indice
=
idx_map
[
y
+
y_offset
,
x
+
x_offset
]
cnt
=
int
(
acc_map_np
[
y
+
y_offset
,
x
+
x_offset
])
if
indice
!=
-
1
:
segment_list
.
append
(
segments
[
indice
])
if
cnt
>
1
:
check_cnt
=
1
current_hough
=
hough
[
indice
]
for
new_indice
,
new_hough
in
enumerate
(
hough
):
if
(
current_hough
==
new_hough
).
all
()
and
indice
!=
new_indice
:
segment_list
.
append
(
segments
[
new_indice
])
check_cnt
+=
1
if
check_cnt
==
cnt
:
break
group_segments
=
np
.
array
(
segment_list
).
reshape
([
-
1
,
2
])
sorted_group_segments
=
np
.
sort
(
group_segments
,
axis
=
0
)
x_min
,
y_min
=
sorted_group_segments
[
0
,
:]
x_max
,
y_max
=
sorted_group_segments
[
-
1
,
:]
deg
=
theta
[
max_indice
]
if
deg
>=
90
:
merged_segments
.
append
([
x_min
,
y_max
,
x_max
,
y_min
])
else
:
merged_segments
.
append
([
x_min
,
y_min
,
x_max
,
y_max
])
# 2. get intersections
new_segments
=
np
.
array
(
merged_segments
)
# (x1, y1, x2, y2)
start
=
new_segments
[:,
:
2
]
# (x1, y1)
end
=
new_segments
[:,
2
:]
# (x2, y2)
new_centers
=
(
start
+
end
)
/
2.0
diff
=
start
-
end
dist_segments
=
np
.
sqrt
(
np
.
sum
(
diff
**
2
,
axis
=-
1
))
# ax + by = c
a
=
diff
[:,
1
]
b
=
-
diff
[:,
0
]
c
=
a
*
start
[:,
0
]
+
b
*
start
[:,
1
]
pre_det
=
a
[:,
None
]
*
b
[
None
,
:]
det
=
pre_det
-
np
.
transpose
(
pre_det
)
pre_inter_y
=
a
[:,
None
]
*
c
[
None
,
:]
inter_y
=
(
pre_inter_y
-
np
.
transpose
(
pre_inter_y
))
/
(
det
+
1e-10
)
pre_inter_x
=
c
[:,
None
]
*
b
[
None
,
:]
inter_x
=
(
pre_inter_x
-
np
.
transpose
(
pre_inter_x
))
/
(
det
+
1e-10
)
inter_pts
=
np
.
concatenate
([
inter_x
[:,
:,
None
],
inter_y
[:,
:,
None
]],
axis
=-
1
).
astype
(
'int32'
)
# 3. get corner information
# 3.1 get distance
'''
dist_segments:
| dist(0), dist(1), dist(2), ...|
dist_inter_to_segment1:
| dist(inter,0), dist(inter,0), dist(inter,0), ... |
| dist(inter,1), dist(inter,1), dist(inter,1), ... |
...
dist_inter_to_semgnet2:
| dist(inter,0), dist(inter,1), dist(inter,2), ... |
| dist(inter,0), dist(inter,1), dist(inter,2), ... |
...
'''
dist_inter_to_segment1_start
=
np
.
sqrt
(
np
.
sum
(((
inter_pts
-
start
[:,
None
,
:])
**
2
),
axis
=-
1
,
keepdims
=
True
))
# [n_batch, n_batch, 1]
dist_inter_to_segment1_end
=
np
.
sqrt
(
np
.
sum
(((
inter_pts
-
end
[:,
None
,
:])
**
2
),
axis
=-
1
,
keepdims
=
True
))
# [n_batch, n_batch, 1]
dist_inter_to_segment2_start
=
np
.
sqrt
(
np
.
sum
(((
inter_pts
-
start
[
None
,
:,
:])
**
2
),
axis
=-
1
,
keepdims
=
True
))
# [n_batch, n_batch, 1]
dist_inter_to_segment2_end
=
np
.
sqrt
(
np
.
sum
(((
inter_pts
-
end
[
None
,
:,
:])
**
2
),
axis
=-
1
,
keepdims
=
True
))
# [n_batch, n_batch, 1]
# sort ascending
dist_inter_to_segment1
=
np
.
sort
(
np
.
concatenate
([
dist_inter_to_segment1_start
,
dist_inter_to_segment1_end
],
axis
=-
1
),
axis
=-
1
)
# [n_batch, n_batch, 2]
dist_inter_to_segment2
=
np
.
sort
(
np
.
concatenate
([
dist_inter_to_segment2_start
,
dist_inter_to_segment2_end
],
axis
=-
1
),
axis
=-
1
)
# [n_batch, n_batch, 2]
# 3.2 get degree
inter_to_start
=
new_centers
[:,
None
,
:]
-
inter_pts
deg_inter_to_start
=
np
.
arctan2
(
inter_to_start
[:,
:,
1
],
inter_to_start
[:,
:,
0
])
*
180
/
np
.
pi
deg_inter_to_start
[
deg_inter_to_start
<
0.0
]
+=
360
inter_to_end
=
new_centers
[
None
,
:,
:]
-
inter_pts
deg_inter_to_end
=
np
.
arctan2
(
inter_to_end
[:,
:,
1
],
inter_to_end
[:,
:,
0
])
*
180
/
np
.
pi
deg_inter_to_end
[
deg_inter_to_end
<
0.0
]
+=
360
'''
B -- G
| |
C -- R
B : blue / G: green / C: cyan / R: red
0 -- 1
| |
3 -- 2
'''
# rename variables
deg1_map
,
deg2_map
=
deg_inter_to_start
,
deg_inter_to_end
# sort deg ascending
deg_sort
=
np
.
sort
(
np
.
concatenate
([
deg1_map
[:,
:,
None
],
deg2_map
[:,
:,
None
]],
axis
=-
1
),
axis
=-
1
)
deg_diff_map
=
np
.
abs
(
deg1_map
-
deg2_map
)
# we only consider the smallest degree of intersect
deg_diff_map
[
deg_diff_map
>
180
]
=
360
-
deg_diff_map
[
deg_diff_map
>
180
]
# define available degree range
deg_range
=
[
60
,
120
]
corner_dict
=
{
corner_info
:
[]
for
corner_info
in
range
(
4
)}
inter_points
=
[]
for
i
in
range
(
inter_pts
.
shape
[
0
]):
for
j
in
range
(
i
+
1
,
inter_pts
.
shape
[
1
]):
# i, j > line index, always i < j
x
,
y
=
inter_pts
[
i
,
j
,
:]
deg1
,
deg2
=
deg_sort
[
i
,
j
,
:]
deg_diff
=
deg_diff_map
[
i
,
j
]
check_degree
=
deg_diff
>
deg_range
[
0
]
and
deg_diff
<
deg_range
[
1
]
outside_ratio
=
params
[
'outside_ratio'
]
# over ratio >>> drop it!
inside_ratio
=
params
[
'inside_ratio'
]
# over ratio >>> drop it!
check_distance
=
((
dist_inter_to_segment1
[
i
,
j
,
1
]
>=
dist_segments
[
i
]
and
\
dist_inter_to_segment1
[
i
,
j
,
0
]
<=
dist_segments
[
i
]
*
outside_ratio
)
or
\
(
dist_inter_to_segment1
[
i
,
j
,
1
]
<=
dist_segments
[
i
]
and
\
dist_inter_to_segment1
[
i
,
j
,
0
]
<=
dist_segments
[
i
]
*
inside_ratio
))
and
\
((
dist_inter_to_segment2
[
i
,
j
,
1
]
>=
dist_segments
[
j
]
and
\
dist_inter_to_segment2
[
i
,
j
,
0
]
<=
dist_segments
[
j
]
*
outside_ratio
)
or
\
(
dist_inter_to_segment2
[
i
,
j
,
1
]
<=
dist_segments
[
j
]
and
\
dist_inter_to_segment2
[
i
,
j
,
0
]
<=
dist_segments
[
j
]
*
inside_ratio
))
if
check_degree
and
check_distance
:
corner_info
=
None
if
(
deg1
>=
0
and
deg1
<=
45
and
deg2
>=
45
and
deg2
<=
120
)
or
\
(
deg2
>=
315
and
deg1
>=
45
and
deg1
<=
120
):
corner_info
,
color_info
=
0
,
'blue'
elif
(
deg1
>=
45
and
deg1
<=
125
and
deg2
>=
125
and
deg2
<=
225
):
corner_info
,
color_info
=
1
,
'green'
elif
(
deg1
>=
125
and
deg1
<=
225
and
deg2
>=
225
and
deg2
<=
315
):
corner_info
,
color_info
=
2
,
'black'
elif
(
deg1
>=
0
and
deg1
<=
45
and
deg2
>=
225
and
deg2
<=
315
)
or
\
(
deg2
>=
315
and
deg1
>=
225
and
deg1
<=
315
):
corner_info
,
color_info
=
3
,
'cyan'
else
:
corner_info
,
color_info
=
4
,
'red'
# we don't use it
continue
corner_dict
[
corner_info
].
append
([
x
,
y
,
i
,
j
])
inter_points
.
append
([
x
,
y
])
square_list
=
[]
connect_list
=
[]
segments_list
=
[]
for
corner0
in
corner_dict
[
0
]:
for
corner1
in
corner_dict
[
1
]:
connect01
=
False
for
corner0_line
in
corner0
[
2
:]:
if
corner0_line
in
corner1
[
2
:]:
connect01
=
True
break
if
connect01
:
for
corner2
in
corner_dict
[
2
]:
connect12
=
False
for
corner1_line
in
corner1
[
2
:]:
if
corner1_line
in
corner2
[
2
:]:
connect12
=
True
break
if
connect12
:
for
corner3
in
corner_dict
[
3
]:
connect23
=
False
for
corner2_line
in
corner2
[
2
:]:
if
corner2_line
in
corner3
[
2
:]:
connect23
=
True
break
if
connect23
:
for
corner3_line
in
corner3
[
2
:]:
if
corner3_line
in
corner0
[
2
:]:
# SQUARE!!!
'''
0 -- 1
| |
3 -- 2
square_list:
order: 0 > 1 > 2 > 3
| x0, y0, x1, y1, x2, y2, x3, y3 |
| x0, y0, x1, y1, x2, y2, x3, y3 |
...
connect_list:
order: 01 > 12 > 23 > 30
| line_idx01, line_idx12, line_idx23, line_idx30 |
| line_idx01, line_idx12, line_idx23, line_idx30 |
...
segments_list:
order: 0 > 1 > 2 > 3
| line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j |
| line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j |
...
'''
square_list
.
append
(
corner0
[:
2
]
+
corner1
[:
2
]
+
corner2
[:
2
]
+
corner3
[:
2
])
connect_list
.
append
([
corner0_line
,
corner1_line
,
corner2_line
,
corner3_line
])
segments_list
.
append
(
corner0
[
2
:]
+
corner1
[
2
:]
+
corner2
[
2
:]
+
corner3
[
2
:])
def
check_outside_inside
(
segments_info
,
connect_idx
):
# return 'outside or inside', min distance, cover_param, peri_param
if
connect_idx
==
segments_info
[
0
]:
check_dist_mat
=
dist_inter_to_segment1
else
:
check_dist_mat
=
dist_inter_to_segment2
i
,
j
=
segments_info
min_dist
,
max_dist
=
check_dist_mat
[
i
,
j
,
:]
connect_dist
=
dist_segments
[
connect_idx
]
if
max_dist
>
connect_dist
:
return
'outside'
,
min_dist
,
0
,
1
else
:
return
'inside'
,
min_dist
,
-
1
,
-
1
top_square
=
None
try
:
map_size
=
input_shape
[
0
]
/
2
squares
=
np
.
array
(
square_list
).
reshape
([
-
1
,
4
,
2
])
score_array
=
[]
connect_array
=
np
.
array
(
connect_list
)
segments_array
=
np
.
array
(
segments_list
).
reshape
([
-
1
,
4
,
2
])
# get degree of corners:
squares_rollup
=
np
.
roll
(
squares
,
1
,
axis
=
1
)
squares_rolldown
=
np
.
roll
(
squares
,
-
1
,
axis
=
1
)
vec1
=
squares_rollup
-
squares
normalized_vec1
=
vec1
/
(
np
.
linalg
.
norm
(
vec1
,
axis
=-
1
,
keepdims
=
True
)
+
1e-10
)
vec2
=
squares_rolldown
-
squares
normalized_vec2
=
vec2
/
(
np
.
linalg
.
norm
(
vec2
,
axis
=-
1
,
keepdims
=
True
)
+
1e-10
)
inner_products
=
np
.
sum
(
normalized_vec1
*
normalized_vec2
,
axis
=-
1
)
# [n_squares, 4]
squares_degree
=
np
.
arccos
(
inner_products
)
*
180
/
np
.
pi
# [n_squares, 4]
# get square score
overlap_scores
=
[]
degree_scores
=
[]
length_scores
=
[]
for
connects
,
segments
,
square
,
degree
in
zip
(
connect_array
,
segments_array
,
squares
,
squares_degree
):
'''
0 -- 1
| |
3 -- 2
# segments: [4, 2]
# connects: [4]
'''
###################################### OVERLAP SCORES
cover
=
0
perimeter
=
0
# check 0 > 1 > 2 > 3
square_length
=
[]
for
start_idx
in
range
(
4
):
end_idx
=
(
start_idx
+
1
)
%
4
connect_idx
=
connects
[
start_idx
]
# segment idx of segment01
start_segments
=
segments
[
start_idx
]
end_segments
=
segments
[
end_idx
]
start_point
=
square
[
start_idx
]
end_point
=
square
[
end_idx
]
# check whether outside or inside
start_position
,
start_min
,
start_cover_param
,
start_peri_param
=
check_outside_inside
(
start_segments
,
connect_idx
)
end_position
,
end_min
,
end_cover_param
,
end_peri_param
=
check_outside_inside
(
end_segments
,
connect_idx
)
cover
+=
dist_segments
[
connect_idx
]
+
start_cover_param
*
start_min
+
end_cover_param
*
end_min
perimeter
+=
dist_segments
[
connect_idx
]
+
start_peri_param
*
start_min
+
end_peri_param
*
end_min
square_length
.
append
(
dist_segments
[
connect_idx
]
+
start_peri_param
*
start_min
+
end_peri_param
*
end_min
)
overlap_scores
.
append
(
cover
/
perimeter
)
######################################
###################################### DEGREE SCORES
'''
deg0 vs deg2
deg1 vs deg3
'''
deg0
,
deg1
,
deg2
,
deg3
=
degree
deg_ratio1
=
deg0
/
deg2
if
deg_ratio1
>
1.0
:
deg_ratio1
=
1
/
deg_ratio1
deg_ratio2
=
deg1
/
deg3
if
deg_ratio2
>
1.0
:
deg_ratio2
=
1
/
deg_ratio2
degree_scores
.
append
((
deg_ratio1
+
deg_ratio2
)
/
2
)
######################################
###################################### LENGTH SCORES
'''
len0 vs len2
len1 vs len3
'''
len0
,
len1
,
len2
,
len3
=
square_length
len_ratio1
=
len0
/
len2
if
len2
>
len0
else
len2
/
len0
len_ratio2
=
len1
/
len3
if
len3
>
len1
else
len3
/
len1
length_scores
.
append
((
len_ratio1
+
len_ratio2
)
/
2
)
######################################
overlap_scores
=
np
.
array
(
overlap_scores
)
overlap_scores
/=
np
.
max
(
overlap_scores
)
degree_scores
=
np
.
array
(
degree_scores
)
# degree_scores /= np.max(degree_scores)
length_scores
=
np
.
array
(
length_scores
)
###################################### AREA SCORES
area_scores
=
np
.
reshape
(
squares
,
[
-
1
,
4
,
2
])
area_x
=
area_scores
[:,
:,
0
]
area_y
=
area_scores
[:,
:,
1
]
correction
=
area_x
[:,
-
1
]
*
area_y
[:,
0
]
-
area_y
[:,
-
1
]
*
area_x
[:,
0
]
area_scores
=
np
.
sum
(
area_x
[:,
:
-
1
]
*
area_y
[:,
1
:],
axis
=-
1
)
-
np
.
sum
(
area_y
[:,
:
-
1
]
*
area_x
[:,
1
:],
axis
=-
1
)
area_scores
=
0.5
*
np
.
abs
(
area_scores
+
correction
)
area_scores
/=
(
map_size
*
map_size
)
# np.max(area_scores)
######################################
###################################### CENTER SCORES
centers
=
np
.
array
([[
256
//
2
,
256
//
2
]],
dtype
=
'float32'
)
# [1, 2]
# squares: [n, 4, 2]
square_centers
=
np
.
mean
(
squares
,
axis
=
1
)
# [n, 2]
center2center
=
np
.
sqrt
(
np
.
sum
((
centers
-
square_centers
)
**
2
))
center_scores
=
center2center
/
(
map_size
/
np
.
sqrt
(
2.0
))
'''
score_w = [overlap, degree, area, center, length]
'''
score_w
=
[
0.0
,
1.0
,
10.0
,
0.5
,
1.0
]
score_array
=
params
[
'w_overlap'
]
*
overlap_scores
\
+
params
[
'w_degree'
]
*
degree_scores
\
+
params
[
'w_area'
]
*
area_scores
\
-
params
[
'w_center'
]
*
center_scores
\
+
params
[
'w_length'
]
*
length_scores
best_square
=
[]
sorted_idx
=
np
.
argsort
(
score_array
)[::
-
1
]
score_array
=
score_array
[
sorted_idx
]
squares
=
squares
[
sorted_idx
]
except
Exception
as
e
:
pass
'''return list
merged_lines, squares, scores
'''
try
:
new_segments
[:,
0
]
=
new_segments
[:,
0
]
*
2
/
input_shape
[
1
]
*
original_shape
[
1
]
new_segments
[:,
1
]
=
new_segments
[:,
1
]
*
2
/
input_shape
[
0
]
*
original_shape
[
0
]
new_segments
[:,
2
]
=
new_segments
[:,
2
]
*
2
/
input_shape
[
1
]
*
original_shape
[
1
]
new_segments
[:,
3
]
=
new_segments
[:,
3
]
*
2
/
input_shape
[
0
]
*
original_shape
[
0
]
except
:
new_segments
=
[]
try
:
squares
[:,
:,
0
]
=
squares
[:,
:,
0
]
*
2
/
input_shape
[
1
]
*
original_shape
[
1
]
squares
[:,
:,
1
]
=
squares
[:,
:,
1
]
*
2
/
input_shape
[
0
]
*
original_shape
[
0
]
except
:
squares
=
[]
score_array
=
[]
try
:
inter_points
=
np
.
array
(
inter_points
)
inter_points
[:,
0
]
=
inter_points
[:,
0
]
*
2
/
input_shape
[
1
]
*
original_shape
[
1
]
inter_points
[:,
1
]
=
inter_points
[:,
1
]
*
2
/
input_shape
[
0
]
*
original_shape
[
0
]
except
:
inter_points
=
[]
return
new_segments
,
squares
,
score_array
,
inter_points
lavis/common/annotator/openpose/__init__.py
0 → 100644
View file @
c04f261a
import
os
os
.
environ
[
"KMP_DUPLICATE_LIB_OK"
]
=
"TRUE"
import
torch
import
numpy
as
np
from
.
import
util
from
.body
import
Body
from
.hand
import
Hand
from
annotator.util
import
annotator_ckpts_path
body_model_path
=
"https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth"
hand_model_path
=
"https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth"
class
OpenposeDetector
:
def
__init__
(
self
):
body_modelpath
=
os
.
path
.
join
(
annotator_ckpts_path
,
"body_pose_model.pth"
)
hand_modelpath
=
os
.
path
.
join
(
annotator_ckpts_path
,
"hand_pose_model.pth"
)
if
not
os
.
path
.
exists
(
hand_modelpath
):
from
basicsr.utils.download_util
import
load_file_from_url
load_file_from_url
(
body_model_path
,
model_dir
=
annotator_ckpts_path
)
load_file_from_url
(
hand_model_path
,
model_dir
=
annotator_ckpts_path
)
self
.
body_estimation
=
Body
(
body_modelpath
)
self
.
hand_estimation
=
Hand
(
hand_modelpath
)
def
__call__
(
self
,
oriImg
,
hand
=
False
):
oriImg
=
oriImg
[:,
:,
::
-
1
].
copy
()
with
torch
.
no_grad
():
candidate
,
subset
=
self
.
body_estimation
(
oriImg
)
canvas
=
np
.
zeros_like
(
oriImg
)
canvas
=
util
.
draw_bodypose
(
canvas
,
candidate
,
subset
)
if
hand
:
hands_list
=
util
.
handDetect
(
candidate
,
subset
,
oriImg
)
all_hand_peaks
=
[]
for
x
,
y
,
w
,
is_left
in
hands_list
:
peaks
=
self
.
hand_estimation
(
oriImg
[
y
:
y
+
w
,
x
:
x
+
w
,
:])
peaks
[:,
0
]
=
np
.
where
(
peaks
[:,
0
]
==
0
,
peaks
[:,
0
],
peaks
[:,
0
]
+
x
)
peaks
[:,
1
]
=
np
.
where
(
peaks
[:,
1
]
==
0
,
peaks
[:,
1
],
peaks
[:,
1
]
+
y
)
all_hand_peaks
.
append
(
peaks
)
canvas
=
util
.
draw_handpose
(
canvas
,
all_hand_peaks
)
return
canvas
,
dict
(
candidate
=
candidate
.
tolist
(),
subset
=
subset
.
tolist
())
lavis/common/annotator/openpose/body.py
0 → 100644
View file @
c04f261a
import
cv2
import
numpy
as
np
import
math
import
time
from
scipy.ndimage.filters
import
gaussian_filter
import
matplotlib.pyplot
as
plt
import
matplotlib
import
torch
from
torchvision
import
transforms
from
.
import
util
from
.model
import
bodypose_model
class
Body
(
object
):
def
__init__
(
self
,
model_path
):
self
.
model
=
bodypose_model
()
if
torch
.
cuda
.
is_available
():
self
.
model
=
self
.
model
.
cuda
()
print
(
'cuda'
)
model_dict
=
util
.
transfer
(
self
.
model
,
torch
.
load
(
model_path
))
self
.
model
.
load_state_dict
(
model_dict
)
self
.
model
.
eval
()
def
__call__
(
self
,
oriImg
):
# scale_search = [0.5, 1.0, 1.5, 2.0]
scale_search
=
[
0.5
]
boxsize
=
368
stride
=
8
padValue
=
128
thre1
=
0.1
thre2
=
0.05
multiplier
=
[
x
*
boxsize
/
oriImg
.
shape
[
0
]
for
x
in
scale_search
]
heatmap_avg
=
np
.
zeros
((
oriImg
.
shape
[
0
],
oriImg
.
shape
[
1
],
19
))
paf_avg
=
np
.
zeros
((
oriImg
.
shape
[
0
],
oriImg
.
shape
[
1
],
38
))
for
m
in
range
(
len
(
multiplier
)):
scale
=
multiplier
[
m
]
imageToTest
=
cv2
.
resize
(
oriImg
,
(
0
,
0
),
fx
=
scale
,
fy
=
scale
,
interpolation
=
cv2
.
INTER_CUBIC
)
imageToTest_padded
,
pad
=
util
.
padRightDownCorner
(
imageToTest
,
stride
,
padValue
)
im
=
np
.
transpose
(
np
.
float32
(
imageToTest_padded
[:,
:,
:,
np
.
newaxis
]),
(
3
,
2
,
0
,
1
))
/
256
-
0.5
im
=
np
.
ascontiguousarray
(
im
)
data
=
torch
.
from_numpy
(
im
).
float
()
if
torch
.
cuda
.
is_available
():
data
=
data
.
cuda
()
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
with
torch
.
no_grad
():
Mconv7_stage6_L1
,
Mconv7_stage6_L2
=
self
.
model
(
data
)
Mconv7_stage6_L1
=
Mconv7_stage6_L1
.
cpu
().
numpy
()
Mconv7_stage6_L2
=
Mconv7_stage6_L2
.
cpu
().
numpy
()
# extract outputs, resize, and remove padding
# heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
heatmap
=
np
.
transpose
(
np
.
squeeze
(
Mconv7_stage6_L2
),
(
1
,
2
,
0
))
# output 1 is heatmaps
heatmap
=
cv2
.
resize
(
heatmap
,
(
0
,
0
),
fx
=
stride
,
fy
=
stride
,
interpolation
=
cv2
.
INTER_CUBIC
)
heatmap
=
heatmap
[:
imageToTest_padded
.
shape
[
0
]
-
pad
[
2
],
:
imageToTest_padded
.
shape
[
1
]
-
pad
[
3
],
:]
heatmap
=
cv2
.
resize
(
heatmap
,
(
oriImg
.
shape
[
1
],
oriImg
.
shape
[
0
]),
interpolation
=
cv2
.
INTER_CUBIC
)
# paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
paf
=
np
.
transpose
(
np
.
squeeze
(
Mconv7_stage6_L1
),
(
1
,
2
,
0
))
# output 0 is PAFs
paf
=
cv2
.
resize
(
paf
,
(
0
,
0
),
fx
=
stride
,
fy
=
stride
,
interpolation
=
cv2
.
INTER_CUBIC
)
paf
=
paf
[:
imageToTest_padded
.
shape
[
0
]
-
pad
[
2
],
:
imageToTest_padded
.
shape
[
1
]
-
pad
[
3
],
:]
paf
=
cv2
.
resize
(
paf
,
(
oriImg
.
shape
[
1
],
oriImg
.
shape
[
0
]),
interpolation
=
cv2
.
INTER_CUBIC
)
heatmap_avg
+=
heatmap_avg
+
heatmap
/
len
(
multiplier
)
paf_avg
+=
+
paf
/
len
(
multiplier
)
all_peaks
=
[]
peak_counter
=
0
for
part
in
range
(
18
):
map_ori
=
heatmap_avg
[:,
:,
part
]
one_heatmap
=
gaussian_filter
(
map_ori
,
sigma
=
3
)
map_left
=
np
.
zeros
(
one_heatmap
.
shape
)
map_left
[
1
:,
:]
=
one_heatmap
[:
-
1
,
:]
map_right
=
np
.
zeros
(
one_heatmap
.
shape
)
map_right
[:
-
1
,
:]
=
one_heatmap
[
1
:,
:]
map_up
=
np
.
zeros
(
one_heatmap
.
shape
)
map_up
[:,
1
:]
=
one_heatmap
[:,
:
-
1
]
map_down
=
np
.
zeros
(
one_heatmap
.
shape
)
map_down
[:,
:
-
1
]
=
one_heatmap
[:,
1
:]
peaks_binary
=
np
.
logical_and
.
reduce
(
(
one_heatmap
>=
map_left
,
one_heatmap
>=
map_right
,
one_heatmap
>=
map_up
,
one_heatmap
>=
map_down
,
one_heatmap
>
thre1
))
peaks
=
list
(
zip
(
np
.
nonzero
(
peaks_binary
)[
1
],
np
.
nonzero
(
peaks_binary
)[
0
]))
# note reverse
peaks_with_score
=
[
x
+
(
map_ori
[
x
[
1
],
x
[
0
]],)
for
x
in
peaks
]
peak_id
=
range
(
peak_counter
,
peak_counter
+
len
(
peaks
))
peaks_with_score_and_id
=
[
peaks_with_score
[
i
]
+
(
peak_id
[
i
],)
for
i
in
range
(
len
(
peak_id
))]
all_peaks
.
append
(
peaks_with_score_and_id
)
peak_counter
+=
len
(
peaks
)
# find connection in the specified sequence, center 29 is in the position 15
limbSeq
=
[[
2
,
3
],
[
2
,
6
],
[
3
,
4
],
[
4
,
5
],
[
6
,
7
],
[
7
,
8
],
[
2
,
9
],
[
9
,
10
],
\
[
10
,
11
],
[
2
,
12
],
[
12
,
13
],
[
13
,
14
],
[
2
,
1
],
[
1
,
15
],
[
15
,
17
],
\
[
1
,
16
],
[
16
,
18
],
[
3
,
17
],
[
6
,
18
]]
# the middle joints heatmap correpondence
mapIdx
=
[[
31
,
32
],
[
39
,
40
],
[
33
,
34
],
[
35
,
36
],
[
41
,
42
],
[
43
,
44
],
[
19
,
20
],
[
21
,
22
],
\
[
23
,
24
],
[
25
,
26
],
[
27
,
28
],
[
29
,
30
],
[
47
,
48
],
[
49
,
50
],
[
53
,
54
],
[
51
,
52
],
\
[
55
,
56
],
[
37
,
38
],
[
45
,
46
]]
connection_all
=
[]
special_k
=
[]
mid_num
=
10
for
k
in
range
(
len
(
mapIdx
)):
score_mid
=
paf_avg
[:,
:,
[
x
-
19
for
x
in
mapIdx
[
k
]]]
candA
=
all_peaks
[
limbSeq
[
k
][
0
]
-
1
]
candB
=
all_peaks
[
limbSeq
[
k
][
1
]
-
1
]
nA
=
len
(
candA
)
nB
=
len
(
candB
)
indexA
,
indexB
=
limbSeq
[
k
]
if
(
nA
!=
0
and
nB
!=
0
):
connection_candidate
=
[]
for
i
in
range
(
nA
):
for
j
in
range
(
nB
):
vec
=
np
.
subtract
(
candB
[
j
][:
2
],
candA
[
i
][:
2
])
norm
=
math
.
sqrt
(
vec
[
0
]
*
vec
[
0
]
+
vec
[
1
]
*
vec
[
1
])
norm
=
max
(
0.001
,
norm
)
vec
=
np
.
divide
(
vec
,
norm
)
startend
=
list
(
zip
(
np
.
linspace
(
candA
[
i
][
0
],
candB
[
j
][
0
],
num
=
mid_num
),
\
np
.
linspace
(
candA
[
i
][
1
],
candB
[
j
][
1
],
num
=
mid_num
)))
vec_x
=
np
.
array
([
score_mid
[
int
(
round
(
startend
[
I
][
1
])),
int
(
round
(
startend
[
I
][
0
])),
0
]
\
for
I
in
range
(
len
(
startend
))])
vec_y
=
np
.
array
([
score_mid
[
int
(
round
(
startend
[
I
][
1
])),
int
(
round
(
startend
[
I
][
0
])),
1
]
\
for
I
in
range
(
len
(
startend
))])
score_midpts
=
np
.
multiply
(
vec_x
,
vec
[
0
])
+
np
.
multiply
(
vec_y
,
vec
[
1
])
score_with_dist_prior
=
sum
(
score_midpts
)
/
len
(
score_midpts
)
+
min
(
0.5
*
oriImg
.
shape
[
0
]
/
norm
-
1
,
0
)
criterion1
=
len
(
np
.
nonzero
(
score_midpts
>
thre2
)[
0
])
>
0.8
*
len
(
score_midpts
)
criterion2
=
score_with_dist_prior
>
0
if
criterion1
and
criterion2
:
connection_candidate
.
append
(
[
i
,
j
,
score_with_dist_prior
,
score_with_dist_prior
+
candA
[
i
][
2
]
+
candB
[
j
][
2
]])
connection_candidate
=
sorted
(
connection_candidate
,
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
connection
=
np
.
zeros
((
0
,
5
))
for
c
in
range
(
len
(
connection_candidate
)):
i
,
j
,
s
=
connection_candidate
[
c
][
0
:
3
]
if
(
i
not
in
connection
[:,
3
]
and
j
not
in
connection
[:,
4
]):
connection
=
np
.
vstack
([
connection
,
[
candA
[
i
][
3
],
candB
[
j
][
3
],
s
,
i
,
j
]])
if
(
len
(
connection
)
>=
min
(
nA
,
nB
)):
break
connection_all
.
append
(
connection
)
else
:
special_k
.
append
(
k
)
connection_all
.
append
([])
# last number in each row is the total parts number of that person
# the second last number in each row is the score of the overall configuration
subset
=
-
1
*
np
.
ones
((
0
,
20
))
candidate
=
np
.
array
([
item
for
sublist
in
all_peaks
for
item
in
sublist
])
for
k
in
range
(
len
(
mapIdx
)):
if
k
not
in
special_k
:
partAs
=
connection_all
[
k
][:,
0
]
partBs
=
connection_all
[
k
][:,
1
]
indexA
,
indexB
=
np
.
array
(
limbSeq
[
k
])
-
1
for
i
in
range
(
len
(
connection_all
[
k
])):
# = 1:size(temp,1)
found
=
0
subset_idx
=
[
-
1
,
-
1
]
for
j
in
range
(
len
(
subset
)):
# 1:size(subset,1):
if
subset
[
j
][
indexA
]
==
partAs
[
i
]
or
subset
[
j
][
indexB
]
==
partBs
[
i
]:
subset_idx
[
found
]
=
j
found
+=
1
if
found
==
1
:
j
=
subset_idx
[
0
]
if
subset
[
j
][
indexB
]
!=
partBs
[
i
]:
subset
[
j
][
indexB
]
=
partBs
[
i
]
subset
[
j
][
-
1
]
+=
1
subset
[
j
][
-
2
]
+=
candidate
[
partBs
[
i
].
astype
(
int
),
2
]
+
connection_all
[
k
][
i
][
2
]
elif
found
==
2
:
# if found 2 and disjoint, merge them
j1
,
j2
=
subset_idx
membership
=
((
subset
[
j1
]
>=
0
).
astype
(
int
)
+
(
subset
[
j2
]
>=
0
).
astype
(
int
))[:
-
2
]
if
len
(
np
.
nonzero
(
membership
==
2
)[
0
])
==
0
:
# merge
subset
[
j1
][:
-
2
]
+=
(
subset
[
j2
][:
-
2
]
+
1
)
subset
[
j1
][
-
2
:]
+=
subset
[
j2
][
-
2
:]
subset
[
j1
][
-
2
]
+=
connection_all
[
k
][
i
][
2
]
subset
=
np
.
delete
(
subset
,
j2
,
0
)
else
:
# as like found == 1
subset
[
j1
][
indexB
]
=
partBs
[
i
]
subset
[
j1
][
-
1
]
+=
1
subset
[
j1
][
-
2
]
+=
candidate
[
partBs
[
i
].
astype
(
int
),
2
]
+
connection_all
[
k
][
i
][
2
]
# if find no partA in the subset, create a new subset
elif
not
found
and
k
<
17
:
row
=
-
1
*
np
.
ones
(
20
)
row
[
indexA
]
=
partAs
[
i
]
row
[
indexB
]
=
partBs
[
i
]
row
[
-
1
]
=
2
row
[
-
2
]
=
sum
(
candidate
[
connection_all
[
k
][
i
,
:
2
].
astype
(
int
),
2
])
+
connection_all
[
k
][
i
][
2
]
subset
=
np
.
vstack
([
subset
,
row
])
# delete some rows of subset which has few parts occur
deleteIdx
=
[]
for
i
in
range
(
len
(
subset
)):
if
subset
[
i
][
-
1
]
<
4
or
subset
[
i
][
-
2
]
/
subset
[
i
][
-
1
]
<
0.4
:
deleteIdx
.
append
(
i
)
subset
=
np
.
delete
(
subset
,
deleteIdx
,
axis
=
0
)
# subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
# candidate: x, y, score, id
return
candidate
,
subset
if
__name__
==
"__main__"
:
body_estimation
=
Body
(
'../model/body_pose_model.pth'
)
test_image
=
'../images/ski.jpg'
oriImg
=
cv2
.
imread
(
test_image
)
# B,G,R order
candidate
,
subset
=
body_estimation
(
oriImg
)
canvas
=
util
.
draw_bodypose
(
oriImg
,
candidate
,
subset
)
plt
.
imshow
(
canvas
[:,
:,
[
2
,
1
,
0
]])
plt
.
show
()
lavis/common/annotator/openpose/hand.py
0 → 100644
View file @
c04f261a
import
cv2
import
json
import
numpy
as
np
import
math
import
time
from
scipy.ndimage.filters
import
gaussian_filter
import
matplotlib.pyplot
as
plt
import
matplotlib
import
torch
from
skimage.measure
import
label
from
.model
import
handpose_model
from
.
import
util
class
Hand
(
object
):
def
__init__
(
self
,
model_path
):
self
.
model
=
handpose_model
()
if
torch
.
cuda
.
is_available
():
self
.
model
=
self
.
model
.
cuda
()
print
(
'cuda'
)
model_dict
=
util
.
transfer
(
self
.
model
,
torch
.
load
(
model_path
))
self
.
model
.
load_state_dict
(
model_dict
)
self
.
model
.
eval
()
def
__call__
(
self
,
oriImg
):
scale_search
=
[
0.5
,
1.0
,
1.5
,
2.0
]
# scale_search = [0.5]
boxsize
=
368
stride
=
8
padValue
=
128
thre
=
0.05
multiplier
=
[
x
*
boxsize
/
oriImg
.
shape
[
0
]
for
x
in
scale_search
]
heatmap_avg
=
np
.
zeros
((
oriImg
.
shape
[
0
],
oriImg
.
shape
[
1
],
22
))
# paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
for
m
in
range
(
len
(
multiplier
)):
scale
=
multiplier
[
m
]
imageToTest
=
cv2
.
resize
(
oriImg
,
(
0
,
0
),
fx
=
scale
,
fy
=
scale
,
interpolation
=
cv2
.
INTER_CUBIC
)
imageToTest_padded
,
pad
=
util
.
padRightDownCorner
(
imageToTest
,
stride
,
padValue
)
im
=
np
.
transpose
(
np
.
float32
(
imageToTest_padded
[:,
:,
:,
np
.
newaxis
]),
(
3
,
2
,
0
,
1
))
/
256
-
0.5
im
=
np
.
ascontiguousarray
(
im
)
data
=
torch
.
from_numpy
(
im
).
float
()
if
torch
.
cuda
.
is_available
():
data
=
data
.
cuda
()
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
with
torch
.
no_grad
():
output
=
self
.
model
(
data
).
cpu
().
numpy
()
# output = self.model(data).numpy()q
# extract outputs, resize, and remove padding
heatmap
=
np
.
transpose
(
np
.
squeeze
(
output
),
(
1
,
2
,
0
))
# output 1 is heatmaps
heatmap
=
cv2
.
resize
(
heatmap
,
(
0
,
0
),
fx
=
stride
,
fy
=
stride
,
interpolation
=
cv2
.
INTER_CUBIC
)
heatmap
=
heatmap
[:
imageToTest_padded
.
shape
[
0
]
-
pad
[
2
],
:
imageToTest_padded
.
shape
[
1
]
-
pad
[
3
],
:]
heatmap
=
cv2
.
resize
(
heatmap
,
(
oriImg
.
shape
[
1
],
oriImg
.
shape
[
0
]),
interpolation
=
cv2
.
INTER_CUBIC
)
heatmap_avg
+=
heatmap
/
len
(
multiplier
)
all_peaks
=
[]
for
part
in
range
(
21
):
map_ori
=
heatmap_avg
[:,
:,
part
]
one_heatmap
=
gaussian_filter
(
map_ori
,
sigma
=
3
)
binary
=
np
.
ascontiguousarray
(
one_heatmap
>
thre
,
dtype
=
np
.
uint8
)
# 全部小于阈值
if
np
.
sum
(
binary
)
==
0
:
all_peaks
.
append
([
0
,
0
])
continue
label_img
,
label_numbers
=
label
(
binary
,
return_num
=
True
,
connectivity
=
binary
.
ndim
)
max_index
=
np
.
argmax
([
np
.
sum
(
map_ori
[
label_img
==
i
])
for
i
in
range
(
1
,
label_numbers
+
1
)])
+
1
label_img
[
label_img
!=
max_index
]
=
0
map_ori
[
label_img
==
0
]
=
0
y
,
x
=
util
.
npmax
(
map_ori
)
all_peaks
.
append
([
x
,
y
])
return
np
.
array
(
all_peaks
)
if
__name__
==
"__main__"
:
hand_estimation
=
Hand
(
'../model/hand_pose_model.pth'
)
# test_image = '../images/hand.jpg'
test_image
=
'../images/hand.jpg'
oriImg
=
cv2
.
imread
(
test_image
)
# B,G,R order
peaks
=
hand_estimation
(
oriImg
)
canvas
=
util
.
draw_handpose
(
oriImg
,
peaks
,
True
)
cv2
.
imshow
(
''
,
canvas
)
cv2
.
waitKey
(
0
)
\ No newline at end of file
lavis/common/annotator/openpose/model.py
0 → 100644
View file @
c04f261a
import
torch
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
def
make_layers
(
block
,
no_relu_layers
):
layers
=
[]
for
layer_name
,
v
in
block
.
items
():
if
'pool'
in
layer_name
:
layer
=
nn
.
MaxPool2d
(
kernel_size
=
v
[
0
],
stride
=
v
[
1
],
padding
=
v
[
2
])
layers
.
append
((
layer_name
,
layer
))
else
:
conv2d
=
nn
.
Conv2d
(
in_channels
=
v
[
0
],
out_channels
=
v
[
1
],
kernel_size
=
v
[
2
],
stride
=
v
[
3
],
padding
=
v
[
4
])
layers
.
append
((
layer_name
,
conv2d
))
if
layer_name
not
in
no_relu_layers
:
layers
.
append
((
'relu_'
+
layer_name
,
nn
.
ReLU
(
inplace
=
True
)))
return
nn
.
Sequential
(
OrderedDict
(
layers
))
class
bodypose_model
(
nn
.
Module
):
def
__init__
(
self
):
super
(
bodypose_model
,
self
).
__init__
()
# these layers have no relu layer
no_relu_layers
=
[
'conv5_5_CPM_L1'
,
'conv5_5_CPM_L2'
,
'Mconv7_stage2_L1'
,
\
'Mconv7_stage2_L2'
,
'Mconv7_stage3_L1'
,
'Mconv7_stage3_L2'
,
\
'Mconv7_stage4_L1'
,
'Mconv7_stage4_L2'
,
'Mconv7_stage5_L1'
,
\
'Mconv7_stage5_L2'
,
'Mconv7_stage6_L1'
,
'Mconv7_stage6_L1'
]
blocks
=
{}
block0
=
OrderedDict
([
(
'conv1_1'
,
[
3
,
64
,
3
,
1
,
1
]),
(
'conv1_2'
,
[
64
,
64
,
3
,
1
,
1
]),
(
'pool1_stage1'
,
[
2
,
2
,
0
]),
(
'conv2_1'
,
[
64
,
128
,
3
,
1
,
1
]),
(
'conv2_2'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'pool2_stage1'
,
[
2
,
2
,
0
]),
(
'conv3_1'
,
[
128
,
256
,
3
,
1
,
1
]),
(
'conv3_2'
,
[
256
,
256
,
3
,
1
,
1
]),
(
'conv3_3'
,
[
256
,
256
,
3
,
1
,
1
]),
(
'conv3_4'
,
[
256
,
256
,
3
,
1
,
1
]),
(
'pool3_stage1'
,
[
2
,
2
,
0
]),
(
'conv4_1'
,
[
256
,
512
,
3
,
1
,
1
]),
(
'conv4_2'
,
[
512
,
512
,
3
,
1
,
1
]),
(
'conv4_3_CPM'
,
[
512
,
256
,
3
,
1
,
1
]),
(
'conv4_4_CPM'
,
[
256
,
128
,
3
,
1
,
1
])
])
# Stage 1
block1_1
=
OrderedDict
([
(
'conv5_1_CPM_L1'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'conv5_2_CPM_L1'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'conv5_3_CPM_L1'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'conv5_4_CPM_L1'
,
[
128
,
512
,
1
,
1
,
0
]),
(
'conv5_5_CPM_L1'
,
[
512
,
38
,
1
,
1
,
0
])
])
block1_2
=
OrderedDict
([
(
'conv5_1_CPM_L2'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'conv5_2_CPM_L2'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'conv5_3_CPM_L2'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'conv5_4_CPM_L2'
,
[
128
,
512
,
1
,
1
,
0
]),
(
'conv5_5_CPM_L2'
,
[
512
,
19
,
1
,
1
,
0
])
])
blocks
[
'block1_1'
]
=
block1_1
blocks
[
'block1_2'
]
=
block1_2
self
.
model0
=
make_layers
(
block0
,
no_relu_layers
)
# Stages 2 - 6
for
i
in
range
(
2
,
7
):
blocks
[
'block%d_1'
%
i
]
=
OrderedDict
([
(
'Mconv1_stage%d_L1'
%
i
,
[
185
,
128
,
7
,
1
,
3
]),
(
'Mconv2_stage%d_L1'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv3_stage%d_L1'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv4_stage%d_L1'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv5_stage%d_L1'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv6_stage%d_L1'
%
i
,
[
128
,
128
,
1
,
1
,
0
]),
(
'Mconv7_stage%d_L1'
%
i
,
[
128
,
38
,
1
,
1
,
0
])
])
blocks
[
'block%d_2'
%
i
]
=
OrderedDict
([
(
'Mconv1_stage%d_L2'
%
i
,
[
185
,
128
,
7
,
1
,
3
]),
(
'Mconv2_stage%d_L2'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv3_stage%d_L2'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv4_stage%d_L2'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv5_stage%d_L2'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv6_stage%d_L2'
%
i
,
[
128
,
128
,
1
,
1
,
0
]),
(
'Mconv7_stage%d_L2'
%
i
,
[
128
,
19
,
1
,
1
,
0
])
])
for
k
in
blocks
.
keys
():
blocks
[
k
]
=
make_layers
(
blocks
[
k
],
no_relu_layers
)
self
.
model1_1
=
blocks
[
'block1_1'
]
self
.
model2_1
=
blocks
[
'block2_1'
]
self
.
model3_1
=
blocks
[
'block3_1'
]
self
.
model4_1
=
blocks
[
'block4_1'
]
self
.
model5_1
=
blocks
[
'block5_1'
]
self
.
model6_1
=
blocks
[
'block6_1'
]
self
.
model1_2
=
blocks
[
'block1_2'
]
self
.
model2_2
=
blocks
[
'block2_2'
]
self
.
model3_2
=
blocks
[
'block3_2'
]
self
.
model4_2
=
blocks
[
'block4_2'
]
self
.
model5_2
=
blocks
[
'block5_2'
]
self
.
model6_2
=
blocks
[
'block6_2'
]
def
forward
(
self
,
x
):
out1
=
self
.
model0
(
x
)
out1_1
=
self
.
model1_1
(
out1
)
out1_2
=
self
.
model1_2
(
out1
)
out2
=
torch
.
cat
([
out1_1
,
out1_2
,
out1
],
1
)
out2_1
=
self
.
model2_1
(
out2
)
out2_2
=
self
.
model2_2
(
out2
)
out3
=
torch
.
cat
([
out2_1
,
out2_2
,
out1
],
1
)
out3_1
=
self
.
model3_1
(
out3
)
out3_2
=
self
.
model3_2
(
out3
)
out4
=
torch
.
cat
([
out3_1
,
out3_2
,
out1
],
1
)
out4_1
=
self
.
model4_1
(
out4
)
out4_2
=
self
.
model4_2
(
out4
)
out5
=
torch
.
cat
([
out4_1
,
out4_2
,
out1
],
1
)
out5_1
=
self
.
model5_1
(
out5
)
out5_2
=
self
.
model5_2
(
out5
)
out6
=
torch
.
cat
([
out5_1
,
out5_2
,
out1
],
1
)
out6_1
=
self
.
model6_1
(
out6
)
out6_2
=
self
.
model6_2
(
out6
)
return
out6_1
,
out6_2
class
handpose_model
(
nn
.
Module
):
def
__init__
(
self
):
super
(
handpose_model
,
self
).
__init__
()
# these layers have no relu layer
no_relu_layers
=
[
'conv6_2_CPM'
,
'Mconv7_stage2'
,
'Mconv7_stage3'
,
\
'Mconv7_stage4'
,
'Mconv7_stage5'
,
'Mconv7_stage6'
]
# stage 1
block1_0
=
OrderedDict
([
(
'conv1_1'
,
[
3
,
64
,
3
,
1
,
1
]),
(
'conv1_2'
,
[
64
,
64
,
3
,
1
,
1
]),
(
'pool1_stage1'
,
[
2
,
2
,
0
]),
(
'conv2_1'
,
[
64
,
128
,
3
,
1
,
1
]),
(
'conv2_2'
,
[
128
,
128
,
3
,
1
,
1
]),
(
'pool2_stage1'
,
[
2
,
2
,
0
]),
(
'conv3_1'
,
[
128
,
256
,
3
,
1
,
1
]),
(
'conv3_2'
,
[
256
,
256
,
3
,
1
,
1
]),
(
'conv3_3'
,
[
256
,
256
,
3
,
1
,
1
]),
(
'conv3_4'
,
[
256
,
256
,
3
,
1
,
1
]),
(
'pool3_stage1'
,
[
2
,
2
,
0
]),
(
'conv4_1'
,
[
256
,
512
,
3
,
1
,
1
]),
(
'conv4_2'
,
[
512
,
512
,
3
,
1
,
1
]),
(
'conv4_3'
,
[
512
,
512
,
3
,
1
,
1
]),
(
'conv4_4'
,
[
512
,
512
,
3
,
1
,
1
]),
(
'conv5_1'
,
[
512
,
512
,
3
,
1
,
1
]),
(
'conv5_2'
,
[
512
,
512
,
3
,
1
,
1
]),
(
'conv5_3_CPM'
,
[
512
,
128
,
3
,
1
,
1
])
])
block1_1
=
OrderedDict
([
(
'conv6_1_CPM'
,
[
128
,
512
,
1
,
1
,
0
]),
(
'conv6_2_CPM'
,
[
512
,
22
,
1
,
1
,
0
])
])
blocks
=
{}
blocks
[
'block1_0'
]
=
block1_0
blocks
[
'block1_1'
]
=
block1_1
# stage 2-6
for
i
in
range
(
2
,
7
):
blocks
[
'block%d'
%
i
]
=
OrderedDict
([
(
'Mconv1_stage%d'
%
i
,
[
150
,
128
,
7
,
1
,
3
]),
(
'Mconv2_stage%d'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv3_stage%d'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv4_stage%d'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv5_stage%d'
%
i
,
[
128
,
128
,
7
,
1
,
3
]),
(
'Mconv6_stage%d'
%
i
,
[
128
,
128
,
1
,
1
,
0
]),
(
'Mconv7_stage%d'
%
i
,
[
128
,
22
,
1
,
1
,
0
])
])
for
k
in
blocks
.
keys
():
blocks
[
k
]
=
make_layers
(
blocks
[
k
],
no_relu_layers
)
self
.
model1_0
=
blocks
[
'block1_0'
]
self
.
model1_1
=
blocks
[
'block1_1'
]
self
.
model2
=
blocks
[
'block2'
]
self
.
model3
=
blocks
[
'block3'
]
self
.
model4
=
blocks
[
'block4'
]
self
.
model5
=
blocks
[
'block5'
]
self
.
model6
=
blocks
[
'block6'
]
def
forward
(
self
,
x
):
out1_0
=
self
.
model1_0
(
x
)
out1_1
=
self
.
model1_1
(
out1_0
)
concat_stage2
=
torch
.
cat
([
out1_1
,
out1_0
],
1
)
out_stage2
=
self
.
model2
(
concat_stage2
)
concat_stage3
=
torch
.
cat
([
out_stage2
,
out1_0
],
1
)
out_stage3
=
self
.
model3
(
concat_stage3
)
concat_stage4
=
torch
.
cat
([
out_stage3
,
out1_0
],
1
)
out_stage4
=
self
.
model4
(
concat_stage4
)
concat_stage5
=
torch
.
cat
([
out_stage4
,
out1_0
],
1
)
out_stage5
=
self
.
model5
(
concat_stage5
)
concat_stage6
=
torch
.
cat
([
out_stage5
,
out1_0
],
1
)
out_stage6
=
self
.
model6
(
concat_stage6
)
return
out_stage6
lavis/common/annotator/openpose/util.py
0 → 100644
View file @
c04f261a
import
math
import
numpy
as
np
import
matplotlib
import
cv2
def
padRightDownCorner
(
img
,
stride
,
padValue
):
h
=
img
.
shape
[
0
]
w
=
img
.
shape
[
1
]
pad
=
4
*
[
None
]
pad
[
0
]
=
0
# up
pad
[
1
]
=
0
# left
pad
[
2
]
=
0
if
(
h
%
stride
==
0
)
else
stride
-
(
h
%
stride
)
# down
pad
[
3
]
=
0
if
(
w
%
stride
==
0
)
else
stride
-
(
w
%
stride
)
# right
img_padded
=
img
pad_up
=
np
.
tile
(
img_padded
[
0
:
1
,
:,
:]
*
0
+
padValue
,
(
pad
[
0
],
1
,
1
))
img_padded
=
np
.
concatenate
((
pad_up
,
img_padded
),
axis
=
0
)
pad_left
=
np
.
tile
(
img_padded
[:,
0
:
1
,
:]
*
0
+
padValue
,
(
1
,
pad
[
1
],
1
))
img_padded
=
np
.
concatenate
((
pad_left
,
img_padded
),
axis
=
1
)
pad_down
=
np
.
tile
(
img_padded
[
-
2
:
-
1
,
:,
:]
*
0
+
padValue
,
(
pad
[
2
],
1
,
1
))
img_padded
=
np
.
concatenate
((
img_padded
,
pad_down
),
axis
=
0
)
pad_right
=
np
.
tile
(
img_padded
[:,
-
2
:
-
1
,
:]
*
0
+
padValue
,
(
1
,
pad
[
3
],
1
))
img_padded
=
np
.
concatenate
((
img_padded
,
pad_right
),
axis
=
1
)
return
img_padded
,
pad
# transfer caffe model to pytorch which will match the layer name
def
transfer
(
model
,
model_weights
):
transfered_model_weights
=
{}
for
weights_name
in
model
.
state_dict
().
keys
():
transfered_model_weights
[
weights_name
]
=
model_weights
[
'.'
.
join
(
weights_name
.
split
(
'.'
)[
1
:])]
return
transfered_model_weights
# draw the body keypoint and lims
def
draw_bodypose
(
canvas
,
candidate
,
subset
):
stickwidth
=
4
limbSeq
=
[[
2
,
3
],
[
2
,
6
],
[
3
,
4
],
[
4
,
5
],
[
6
,
7
],
[
7
,
8
],
[
2
,
9
],
[
9
,
10
],
\
[
10
,
11
],
[
2
,
12
],
[
12
,
13
],
[
13
,
14
],
[
2
,
1
],
[
1
,
15
],
[
15
,
17
],
\
[
1
,
16
],
[
16
,
18
],
[
3
,
17
],
[
6
,
18
]]
colors
=
[[
255
,
0
,
0
],
[
255
,
85
,
0
],
[
255
,
170
,
0
],
[
255
,
255
,
0
],
[
170
,
255
,
0
],
[
85
,
255
,
0
],
[
0
,
255
,
0
],
\
[
0
,
255
,
85
],
[
0
,
255
,
170
],
[
0
,
255
,
255
],
[
0
,
170
,
255
],
[
0
,
85
,
255
],
[
0
,
0
,
255
],
[
85
,
0
,
255
],
\
[
170
,
0
,
255
],
[
255
,
0
,
255
],
[
255
,
0
,
170
],
[
255
,
0
,
85
]]
for
i
in
range
(
18
):
for
n
in
range
(
len
(
subset
)):
index
=
int
(
subset
[
n
][
i
])
if
index
==
-
1
:
continue
x
,
y
=
candidate
[
index
][
0
:
2
]
cv2
.
circle
(
canvas
,
(
int
(
x
),
int
(
y
)),
4
,
colors
[
i
],
thickness
=-
1
)
for
i
in
range
(
17
):
for
n
in
range
(
len
(
subset
)):
index
=
subset
[
n
][
np
.
array
(
limbSeq
[
i
])
-
1
]
if
-
1
in
index
:
continue
cur_canvas
=
canvas
.
copy
()
Y
=
candidate
[
index
.
astype
(
int
),
0
]
X
=
candidate
[
index
.
astype
(
int
),
1
]
mX
=
np
.
mean
(
X
)
mY
=
np
.
mean
(
Y
)
length
=
((
X
[
0
]
-
X
[
1
])
**
2
+
(
Y
[
0
]
-
Y
[
1
])
**
2
)
**
0.5
angle
=
math
.
degrees
(
math
.
atan2
(
X
[
0
]
-
X
[
1
],
Y
[
0
]
-
Y
[
1
]))
polygon
=
cv2
.
ellipse2Poly
((
int
(
mY
),
int
(
mX
)),
(
int
(
length
/
2
),
stickwidth
),
int
(
angle
),
0
,
360
,
1
)
cv2
.
fillConvexPoly
(
cur_canvas
,
polygon
,
colors
[
i
])
canvas
=
cv2
.
addWeighted
(
canvas
,
0.4
,
cur_canvas
,
0.6
,
0
)
# plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
# plt.imshow(canvas[:, :, [2, 1, 0]])
return
canvas
# image drawed by opencv is not good.
def
draw_handpose
(
canvas
,
all_hand_peaks
,
show_number
=
False
):
edges
=
[[
0
,
1
],
[
1
,
2
],
[
2
,
3
],
[
3
,
4
],
[
0
,
5
],
[
5
,
6
],
[
6
,
7
],
[
7
,
8
],
[
0
,
9
],
[
9
,
10
],
\
[
10
,
11
],
[
11
,
12
],
[
0
,
13
],
[
13
,
14
],
[
14
,
15
],
[
15
,
16
],
[
0
,
17
],
[
17
,
18
],
[
18
,
19
],
[
19
,
20
]]
for
peaks
in
all_hand_peaks
:
for
ie
,
e
in
enumerate
(
edges
):
if
np
.
sum
(
np
.
all
(
peaks
[
e
],
axis
=
1
)
==
0
)
==
0
:
x1
,
y1
=
peaks
[
e
[
0
]]
x2
,
y2
=
peaks
[
e
[
1
]]
cv2
.
line
(
canvas
,
(
x1
,
y1
),
(
x2
,
y2
),
matplotlib
.
colors
.
hsv_to_rgb
([
ie
/
float
(
len
(
edges
)),
1.0
,
1.0
])
*
255
,
thickness
=
2
)
for
i
,
keyponit
in
enumerate
(
peaks
):
x
,
y
=
keyponit
cv2
.
circle
(
canvas
,
(
x
,
y
),
4
,
(
0
,
0
,
255
),
thickness
=-
1
)
if
show_number
:
cv2
.
putText
(
canvas
,
str
(
i
),
(
x
,
y
),
cv2
.
FONT_HERSHEY_SIMPLEX
,
0.3
,
(
0
,
0
,
0
),
lineType
=
cv2
.
LINE_AA
)
return
canvas
# detect hand according to body pose keypoints
# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
def
handDetect
(
candidate
,
subset
,
oriImg
):
# right hand: wrist 4, elbow 3, shoulder 2
# left hand: wrist 7, elbow 6, shoulder 5
ratioWristElbow
=
0.33
detect_result
=
[]
image_height
,
image_width
=
oriImg
.
shape
[
0
:
2
]
for
person
in
subset
.
astype
(
int
):
# if any of three not detected
has_left
=
np
.
sum
(
person
[[
5
,
6
,
7
]]
==
-
1
)
==
0
has_right
=
np
.
sum
(
person
[[
2
,
3
,
4
]]
==
-
1
)
==
0
if
not
(
has_left
or
has_right
):
continue
hands
=
[]
#left hand
if
has_left
:
left_shoulder_index
,
left_elbow_index
,
left_wrist_index
=
person
[[
5
,
6
,
7
]]
x1
,
y1
=
candidate
[
left_shoulder_index
][:
2
]
x2
,
y2
=
candidate
[
left_elbow_index
][:
2
]
x3
,
y3
=
candidate
[
left_wrist_index
][:
2
]
hands
.
append
([
x1
,
y1
,
x2
,
y2
,
x3
,
y3
,
True
])
# right hand
if
has_right
:
right_shoulder_index
,
right_elbow_index
,
right_wrist_index
=
person
[[
2
,
3
,
4
]]
x1
,
y1
=
candidate
[
right_shoulder_index
][:
2
]
x2
,
y2
=
candidate
[
right_elbow_index
][:
2
]
x3
,
y3
=
candidate
[
right_wrist_index
][:
2
]
hands
.
append
([
x1
,
y1
,
x2
,
y2
,
x3
,
y3
,
False
])
for
x1
,
y1
,
x2
,
y2
,
x3
,
y3
,
is_left
in
hands
:
# pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
# handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
# handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
# const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
# const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
# handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
x
=
x3
+
ratioWristElbow
*
(
x3
-
x2
)
y
=
y3
+
ratioWristElbow
*
(
y3
-
y2
)
distanceWristElbow
=
math
.
sqrt
((
x3
-
x2
)
**
2
+
(
y3
-
y2
)
**
2
)
distanceElbowShoulder
=
math
.
sqrt
((
x2
-
x1
)
**
2
+
(
y2
-
y1
)
**
2
)
width
=
1.5
*
max
(
distanceWristElbow
,
0.9
*
distanceElbowShoulder
)
# x-y refers to the center --> offset to topLeft point
# handRectangle.x -= handRectangle.width / 2.f;
# handRectangle.y -= handRectangle.height / 2.f;
x
-=
width
/
2
y
-=
width
/
2
# width = height
# overflow the image
if
x
<
0
:
x
=
0
if
y
<
0
:
y
=
0
width1
=
width
width2
=
width
if
x
+
width
>
image_width
:
width1
=
image_width
-
x
if
y
+
width
>
image_height
:
width2
=
image_height
-
y
width
=
min
(
width1
,
width2
)
# the max hand box value is 20 pixels
if
width
>=
20
:
detect_result
.
append
([
int
(
x
),
int
(
y
),
int
(
width
),
is_left
])
'''
return value: [[x, y, w, True if left hand else False]].
width=height since the network require squared input.
x, y is the coordinate of top left
'''
return
detect_result
# get max index of 2d array
def
npmax
(
array
):
arrayindex
=
array
.
argmax
(
1
)
arrayvalue
=
array
.
max
(
1
)
i
=
arrayvalue
.
argmax
()
j
=
arrayindex
[
i
]
return
i
,
j
Prev
1
2
3
4
5
6
7
8
9
10
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment