Merge branch 'main' into 'main'

yolov5增加了mpi单机多卡和多机多卡启动方式，其readme文件进行了更新，对maskrcnn的debug输出日志进行了删除，并更新了该模型的readme文件 See merge request dcutoolkit/deeplearing/dlexamples_new!46

Merge branch 'main' into 'main'
yolov5增加了mpi单机多卡和多机多卡启动方式，其readme文件进行了更新，对maskrcnn的debug输出日志进行了删除，并更新了该模型的readme文件 See merge request dcutoolkit/deeplearing/dlexamples_new!46
17bc28d5 · sunxx1 · 7143f128 · 5a567950 · 17bc28d5 · 17bc28d5
Commit 17bc28d5 authored Jan 09, 2023 by sunxx1
20 changed files
--- a/PyTorch/Compute-Vision/Objection/yolov5/export.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/export.py
--- a/PyTorch/Compute-Vision/Objection/yolov5/hubconf.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/hubconf.py
@@ -5,7 +5,6 @@ PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
 Usage:
    import torch
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
-    model = torch.hub.load('ultralytics/yolov5:master', 'custom', 'path/to/yolov5s.onnx')  # file from branch
 """

 import torch
@@ -28,35 +27,36 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
    """
    from pathlib import Path

-    from models.common import AutoShape, DetectMultiBackend
    from models.yolo import Model
+    from models.experimental import attempt_load
+    from utils.general import check_requirements, set_logging
    from utils.downloads import attempt_download
-    from utils.general import check_requirements, intersect_dicts, set_logging
    from utils.torch_utils import select_device

+    file = Path(__file__).resolve()
    check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
    set_logging(verbose=verbose)

-    name = Path(name)
-    path = name.with_suffix('.pt') if name.suffix == '' else name  # checkpoint path
+    save_dir = Path('') if str(name).endswith('.pt') else file.parent
+    path = (save_dir / name).with_suffix('.pt')  # checkpoint path
    try:
        device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device)

        if pretrained and channels == 3 and classes == 80:
-            model = DetectMultiBackend(path, device=device)  # download/load FP32 model
-            # model = models.experimental.attempt_load(path, map_location=device)  # download/load FP32 model
+            model = attempt_load(path, map_location=device)  # download/load FP32 model
        else:
-            cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0]  # model.yaml path
+            cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0]  # model.yaml path
            model = Model(cfg, channels, classes)  # create model
            if pretrained:
                ckpt = torch.load(attempt_download(path), map_location=device)  # load
+                msd = model.state_dict()  # model state_dict
                csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
-                csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors'])  # intersect
+                csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape}  # filter
                model.load_state_dict(csd, strict=False)  # load
                if len(ckpt['model'].names) == classes:
                    model.names = ckpt['model'].names  # set class names attribute
        if autoshape:
-            model = AutoShape(model)  # for file/URI/PIL/cv2/np inputs and NMS
+            model = model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
        return model.to(device)

    except Exception as e:
@@ -125,11 +125,10 @@ if __name__ == '__main__':
    # model = custom(path='path/to/model.pt')  # custom

    # Verify inference
-    from pathlib import Path
-
    import cv2
    import numpy as np
    from PIL import Image
+    from pathlib import Path

    imgs = ['data/images/zidane.jpg',  # filename
            Path('data/images/zidane.jpg'),  # Path
@@ -138,6 +137,6 @@ if __name__ == '__main__':
            Image.open('data/images/bus.jpg'),  # PIL
            np.zeros((320, 640, 3))]  # numpy

-    results = model(imgs, size=320)  # batched inference
+    results = model(imgs)  # batched inference
    results.print()
    results.save()
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/common.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/common.py
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/experimental.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/experimental.py
@@ -2,7 +2,6 @@
 """
 Experimental modules
 """
-import math

 import numpy as np
 import torch
@@ -33,7 +32,7 @@ class Sum(nn.Module):
        self.weight = weight  # apply weights boolean
        self.iter = range(n - 1)  # iter object
        if weight:
-            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
+            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights

    def forward(self, x):
        y = x[0]  # no weight
@@ -49,27 +48,26 @@ class Sum(nn.Module):

 class MixConv2d(nn.Module):
    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
-    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
        super().__init__()
-        n = len(k)  # number of convolutions
+        groups = len(k)
        if equal_ch:  # equal c_ per group
-            i = torch.linspace(0, n - 1E-6, c2).floor()  # c2 indices
-            c_ = [(i == g).sum() for g in range(n)]  # intermediate channels
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
        else:  # equal weight.numel() per group
-            b = [c2] + [0] * n
-            a = np.eye(n + 1, n, k=-1)
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
            a -= np.roll(a, 1, axis=1)
            a *= np.array(k) ** 2
            a[0] = 1
            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b

-        self.m = nn.ModuleList(
-            [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
        self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.SiLU()
+        self.act = nn.LeakyReLU(0.1, inplace=True)

    def forward(self, x):
-        return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))


 class Ensemble(nn.ModuleList):
@@ -99,6 +97,7 @@ def attempt_load(weights, map_location=None, inplace=True, fuse=True):
        else:
            model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval())  # without layer fuse

+
    # Compatibility updates
    for m in model.modules():
        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-bifpn.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-bifpn.yaml
@@ -9,22 +9,22 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
+   [-1, 9, C3, [512]]
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
  ]

-# YOLOv5 v6.0 BiFPN head
+# YOLOv5 BiFPN head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
@@ -37,7 +37,7 @@ head:
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
-   [[-1, 14, 6], 1, Concat, [1]],  # cat P4 <--- BiFPN change
+   [[-1, 14, 6], 1, Concat, [1]],  # cat P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-fpn.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-fpn.yaml
@@ -9,34 +9,34 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, C3, [128]],
+   [-1, 3, Bottleneck, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
+   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 6, BottleneckCSP, [1024]],  # 9
  ]

-# YOLOv5 v6.0 FPN head
+# YOLOv5 FPN head
 head:
-  [[-1, 3, C3, [1024, False]],  # 10 (P5/32-large)
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)

   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, C3, [512, False]],  # 14 (P4/16-medium)
+   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)

   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 3, C3, [256, False]],  # 18 (P3/8-small)
+   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)

   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p2.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p2.yaml
@@ -4,24 +4,24 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+anchors: 3

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
  ]

-# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p34.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p34.yaml
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-
-# Parameters
-nc: 80  # number of classes
-depth_multiple: 0.33  # model depth multiple
-width_multiple: 0.50  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
-
-# YOLOv5 v6.0 backbone
-backbone:
-  # [from, number, module, args]
-  [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 6, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 1024 ] ],
-    [ -1, 1, SPPF, [ 1024, 5 ] ],  # 9
-  ]
-
-# YOLOv5 v6.0 head with (P3, P4) outputs
-head:
-  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 13
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 20 (P4/16-medium)
-
-    [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4)
-  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p6.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p6.yaml
@@ -4,26 +4,26 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+anchors: 3

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [768]],
   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 11
+   [-1, 1, SPP, [1024, [3, 5, 7]]],
+   [-1, 3, C3, [1024, False]],  # 11
  ]

-# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [768, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
@@ -50,7 +50,7 @@ head:

   [-1, 1, Conv, [768, 3, 2]],
   [[-1, 12], 1, Concat, [1]],  # cat head P6
-   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+   [-1, 3, C3, [1024, False]],  # 32 (P5/64-xlarge)

   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p7.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p7.yaml
@@ -4,16 +4,16 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+anchors: 3

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
@@ -21,11 +21,11 @@ backbone:
   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
   [-1, 3, C3, [1024]],
   [-1, 1, Conv, [1280, 3, 2]],  # 11-P7/128
-   [-1, 3, C3, [1280]],
-   [-1, 1, SPPF, [1280, 5]],  # 13
+   [-1, 1, SPP, [1280, [3, 5]]],
+   [-1, 3, C3, [1280, False]],  # 13
  ]

-# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [1024, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-panet.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-panet.yaml
@@ -9,40 +9,40 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, C3, [128]],
+   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
+   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]

-# YOLOv5 v6.0 PANet head
+# YOLOv5 PANet head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
-   [-1, 3, C3, [512, False]],  # 13
+   [-1, 3, BottleneckCSP, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
-   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
-   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
-   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-ghost.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-ghost.yaml
@@ -9,22 +9,22 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3Ghost, [128]],
   [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3Ghost, [256]],
+   [-1, 9, C3Ghost, [256]],
   [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3Ghost, [512]],
   [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3Ghost, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3Ghost, [1024, False]],  # 9
  ]

-# YOLOv5 v6.0 head
+# YOLOv5 head
 head:
  [[-1, 1, GhostConv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-transformer.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-transformer.yaml
@@ -9,22 +9,22 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3TR, [1024]],  # 9 <--- C3TR() Transformer module
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
  ]

-# YOLOv5 v6.0 head
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/tf.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/tf.py
@@ -11,6 +11,7 @@ Export:
 """

 import argparse
+import logging
 import sys
 from copy import deepcopy
 from pathlib import Path
@@ -27,17 +28,19 @@ import torch
 import torch.nn as nn
 from tensorflow import keras

-from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
+from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, autopad, C3
 from models.experimental import CrossConv, MixConv2d, attempt_load
 from models.yolo import Detect
+from utils.general import make_divisible, print_args, set_logging
 from utils.activations import SiLU
-from utils.general import LOGGER, make_divisible, print_args
+
+LOGGER = logging.getLogger(__name__)


 class TFBN(keras.layers.Layer):
    # TensorFlow BatchNormalization wrapper
    def __init__(self, w=None):
-        super().__init__()
+        super(TFBN, self).__init__()
        self.bn = keras.layers.BatchNormalization(
            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
@@ -51,7 +54,7 @@ class TFBN(keras.layers.Layer):

 class TFPad(keras.layers.Layer):
    def __init__(self, pad):
-        super().__init__()
+        super(TFPad, self).__init__()
        self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])

    def call(self, inputs):
@@ -62,7 +65,7 @@ class TFConv(keras.layers.Layer):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
        # ch_in, ch_out, weights, kernel, stride, padding, groups
-        super().__init__()
+        super(TFConv, self).__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
@@ -93,11 +96,11 @@ class TFFocus(keras.layers.Layer):
    # Focus wh information into c-space
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
        # ch_in, ch_out, kernel, stride, padding, groups
-        super().__init__()
+        super(TFFocus, self).__init__()
        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)

    def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
-        # inputs = inputs / 255  # normalize 0-255 to 0-1
+        # inputs = inputs / 255.  # normalize 0-255 to 0-1
        return self.conv(tf.concat([inputs[:, ::2, ::2, :],
                                    inputs[:, 1::2, ::2, :],
                                    inputs[:, ::2, 1::2, :],
@@ -107,7 +110,7 @@ class TFFocus(keras.layers.Layer):
 class TFBottleneck(keras.layers.Layer):
    # Standard bottleneck
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
-        super().__init__()
+        super(TFBottleneck, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
@@ -120,7 +123,7 @@ class TFBottleneck(keras.layers.Layer):
 class TFConv2d(keras.layers.Layer):
    # Substitution for PyTorch nn.Conv2D
    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
-        super().__init__()
+        super(TFConv2d, self).__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        self.conv = keras.layers.Conv2D(
            c2, k, s, 'VALID', use_bias=bias,
@@ -135,7 +138,7 @@ class TFBottleneckCSP(keras.layers.Layer):
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        # ch_in, ch_out, number, shortcut, groups, expansion
-        super().__init__()
+        super(TFBottleneckCSP, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
@@ -155,7 +158,7 @@ class TFC3(keras.layers.Layer):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        # ch_in, ch_out, number, shortcut, groups, expansion
-        super().__init__()
+        super(TFC3, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
@@ -169,7 +172,7 @@ class TFC3(keras.layers.Layer):
 class TFSPP(keras.layers.Layer):
    # Spatial pyramid pooling layer used in YOLOv3-SPP
    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
-        super().__init__()
+        super(TFSPP, self).__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
@@ -180,25 +183,9 @@ class TFSPP(keras.layers.Layer):
        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))


-class TFSPPF(keras.layers.Layer):
-    # Spatial pyramid pooling-Fast layer
-    def __init__(self, c1, c2, k=5, w=None):
-        super().__init__()
-        c_ = c1 // 2  # hidden channels
-        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
-        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
-        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
-
-    def call(self, inputs):
-        x = self.cv1(inputs)
-        y1 = self.m(x)
-        y2 = self.m(y1)
-        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
-
-
 class TFDetect(keras.layers.Layer):
    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
-        super().__init__()
+        super(TFDetect, self).__init__()
        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
@@ -226,13 +213,13 @@ class TFDetect(keras.layers.Layer):

            if not self.training:  # inference
                y = tf.sigmoid(x[i])
-                xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
                # Normalize xywh to 0-1 to reduce calibration error
                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                y = tf.concat([xy, wh, y[..., 4:]], -1)
-                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
+                z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))

        return x if self.training else (tf.concat(z, 1), x)

@@ -246,7 +233,7 @@ class TFDetect(keras.layers.Layer):

 class TFUpsample(keras.layers.Layer):
    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
-        super().__init__()
+        super(TFUpsample, self).__init__()
        assert scale_factor == 2, "scale_factor must be 2"
        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
@@ -260,7 +247,7 @@ class TFUpsample(keras.layers.Layer):

 class TFConcat(keras.layers.Layer):
    def __init__(self, dimension=1, w=None):
-        super().__init__()
+        super(TFConcat, self).__init__()
        assert dimension == 1, "convert only NCHW to NHWC concat"
        self.d = 3

@@ -269,7 +256,7 @@ class TFConcat(keras.layers.Layer):


 def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
-    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
@@ -285,7 +272,7 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
                pass

        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
+        if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
            c1, c2 = ch[f], args[0]
            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2

@@ -296,7 +283,7 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
-            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+            c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
        elif m is Detect:
            args.append([ch[x + 1] for x in f])
            if isinstance(args[1], int):  # number of anchors
@@ -309,11 +296,11 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
        m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
            else tf_m(*args, w=model.model[i])  # module

-        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
-        np = sum(x.numel() for x in torch_m_.parameters())  # number params
+        np = sum([x.numel() for x in torch_m_.parameters()])  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}')  # print
+        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        ch.append(c2)
@@ -322,7 +309,7 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)

 class TFModel:
    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
-        super().__init__()
+        super(TFModel, self).__init__()
        if isinstance(cfg, dict):
            self.yaml = cfg  # model dict
        else:  # is *.yaml
@@ -333,7 +320,7 @@ class TFModel:

        # Define model
        if nc and nc != self.yaml['nc']:
-            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
+            print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
            self.yaml['nc'] = nc  # override yaml value
        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)

@@ -410,10 +397,10 @@ class AgnosticNMS(keras.layers.Layer):

 def representative_dataset_gen(dataset, ncalib=100):
    # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
-    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
+    for n, (path, img, im0s, vid_cap) in enumerate(dataset):
        input = np.transpose(img, [1, 2, 0])
        input = np.expand_dims(input, axis=0).astype(np.float32)
-        input /= 255
+        input /= 255.0
        yield [input]
        if n >= ncalib:
            break
@@ -440,8 +427,6 @@ def run(weights=ROOT / 'yolov5s.pt',  # weights path
    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
    keras_model.summary()

-    LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
-

 def parse_opt():
    parser = argparse.ArgumentParser()
@@ -456,6 +441,7 @@ def parse_opt():


 def main(opt):
+    set_logging()
    run(**vars(opt))



--- a/PyTorch/Compute-Vision/Objection/yolov5/models/yolo.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/yolo.py
@@ -20,15 +20,18 @@ if str(ROOT) not in sys.path:
 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
-from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
+from utils.general import check_yaml, make_divisible, print_args, set_logging
 from utils.plots import feature_visualization
-from utils.torch_utils import fuse_conv_and_bn, initialize_weights, model_info, scale_img, select_device, time_sync
+from utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \
+    select_device, time_sync

 try:
    import thop  # for FLOPs computation
 except ImportError:
    thop = None

+LOGGER = logging.getLogger(__name__)
+

 class Detect(nn.Module):
    stride = None  # strides computed during build
@@ -54,15 +57,15 @@ class Detect(nn.Module):
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
-                if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                y = x[i].sigmoid()
                if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))
@@ -71,10 +74,7 @@ class Detect(nn.Module):

    def _make_grid(self, nx=20, ny=20, i=0):
        d = self.anchors[i].device
-        if check_version(torch.__version__, '1.10.0'):  # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility
-            yv, xv = torch.meshgrid([torch.arange(ny, device=d), torch.arange(nx, device=d)], indexing='ij')
-        else:
-            yv, xv = torch.meshgrid([torch.arange(ny, device=d), torch.arange(nx, device=d)])
+        yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
        grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()
        anchor_grid = (self.anchors[i].clone() * self.stride[i]) \
            .view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float()
@@ -89,7 +89,7 @@ class Model(nn.Module):
        else:  # is *.yaml
            import yaml  # for torch hub
            self.yaml_file = Path(cfg).name
-            with open(cfg, encoding='ascii', errors='ignore') as f:
+            with open(cfg, errors='ignore') as f:
                self.yaml = yaml.safe_load(f)  # model dict

        # Define model
@@ -200,7 +200,7 @@ class Model(nn.Module):
        for mi, s in zip(m.m, m.stride):  # from
            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)

    def _print_biases(self):
@@ -225,6 +225,12 @@ class Model(nn.Module):
        self.info()
        return self

+    def autoshape(self):  # add AutoShape module
+        LOGGER.info('Adding AutoShape... ')
+        m = AutoShape(self)  # wrap model
+        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        return m
+
    def info(self, verbose=False, img_size=640):  # print model information
        model_info(self, verbose, img_size)

@@ -241,7 +247,7 @@ class Model(nn.Module):


 def parse_model(d, ch):  # model_dict, input_channels(3)
-    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
@@ -269,7 +275,7 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
-            c2 = sum(ch[x] for x in f)
+            c2 = sum([ch[x] for x in f])
        elif m is Detect:
            args.append([ch[x] for x in f])
            if isinstance(args[1], int):  # number of anchors
@@ -281,11 +287,11 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
        else:
            c2 = ch[f]

-        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
-        np = sum(x.numel() for x in m_.parameters())  # number params
+        np = sum([x.numel() for x in m_.parameters()])  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}')  # print
+        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        if i == 0:
@@ -299,10 +305,10 @@ if __name__ == '__main__':
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--profile', action='store_true', help='profile model speed')
-    parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
    opt = parser.parse_args()
    opt.cfg = check_yaml(opt.cfg)  # check YAML
    print_args(FILE.stem, opt)
+    set_logging()
    device = select_device(opt.device)

    # Create model
@@ -314,14 +320,6 @@ if __name__ == '__main__':
        img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
        y = model(img, profile=True)

-    # Test all models
-    if opt.test:
-        for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
-            try:
-                _ = Model(cfg)
-            except Exception as e:
-                print(f'Error in {cfg}: {e}')
-
    # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
    # from torch.utils.tensorboard import SummaryWriter
    # tb_writer = SummaryWriter('.')

--- a/PyTorch/Compute-Vision/Objection/yolov5/requirements.txt
+++ b/PyTorch/Compute-Vision/Objection/yolov5/requirements.txt
@@ -27,7 +27,6 @@ seaborn>=0.11.0
 # scikit-learn==0.19.2  # CoreML quantization
 # tensorflow>=2.4.1  # TFLite export
 # tensorflowjs>=3.9.0  # TF.js export
-# openvino-dev  # OpenVINO export

 # Extras --------------------------------------
 # albumentations>=1.0.3

--- a/PyTorch/Compute-Vision/Objection/yolov5/setup.cfg
+++ b/PyTorch/Compute-Vision/Objection/yolov5/setup.cfg
-# Project-wide configuration file, can be used for package metadata and other toll configurations
-# Example usage: global configuration for PEP8 (via flake8) setting or default pytest arguments
-
-[metadata]
-license_file = LICENSE
-description-file = README.md
-
-
-[tool:pytest]
-norecursedirs =
-    .git
-    dist
-    build
-addopts =
-    --doctest-modules
-    --durations=25
-    --color=yes
-
-
-[flake8]
-max-line-length = 120
-exclude = .tox,*.egg,build,temp
-select = E,W,F
-doctests = True
-verbose = 2
-# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
-format = pylint
-# see: https://www.flake8rules.com/
-ignore =
-    E731  # Do not assign a lambda expression, use a def
-    F405
-    E402
-    F841
-    E741
-    F821
-    E722
-    F401
-    W504
-    E127
-    W504
-    E231
-    E501
-    F403
-    E302
-    F541
-
-
-[isort]
-# https://pycqa.github.io/isort/docs/configuration/options.html
-line_length = 120
-multi_line_output = 0
--- a/PyTorch/Compute-Vision/Objection/yolov5/single_process.sh
+++ b/PyTorch/Compute-Vision/Objection/yolov5/single_process.sh
+#!/bin/bash
+export MIOPEN_DEBUG_DISABLE_FIND_DB=1
+export NCCL_SOCKET_IFNAME=ib0
+export HSA_USERPTR_FOR_PAGED_MEM=0
+
+
+
+module rm compiler/dtk/21.10
+module load compiler/dtk/22.04.2
+
+lrank=$OMPI_COMM_WORLD_LOCAL_RANK
+comm_rank=$OMPI_COMM_WORLD_RANK
+comm_size=$OMPI_COMM_WORLD_SIZE
+echo $lrank
+echo $comm_rank
+echo $comm_size
+
+APP="python3 `pwd`/train_multi.py --batch 128 --dist-url tcp://${1}:34567 --dist-backend nccl --world-size=${comm_size} --rank=${comm_rank} --local_rank=${lrank} --data coco.yaml --weight yolov5m.pt --project yolov5m/train --hyp data/hyps/hyp.scratch-high.yaml --cfg yolov5m.yaml --epochs 5000"
+case ${lrank} in
+[0])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_0:1
+  export UCX_IB_PCI_BW=mlx5_0:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
+  
+  #echo GLOO_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP} 
+  #GLOO_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
+  ;;
+[1])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_1:1
+  export UCX_IB_PCI_BW=mlx5_1:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=1 --membind=1 ${APP}
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=1 --membind=1 ${APP}
+  ;;
+[2])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_2:1
+  export UCX_IB_PCI_BW=mlx5_2:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=2 --membind=2 ${APP} 
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=2 --membind=2 ${APP}
+  ;;
+[3])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_3:1
+  export UCX_IB_PCI_BW=mlx5_3:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=3 --membind=3 ${APP}
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=3 --membind=3 ${APP}
+  ;;
+esac
+
--- a/PyTorch/Compute-Vision/Objection/yolov5/train.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/train.py
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Train a YOLOv5 model on a custom dataset.
-
-Models and datasets download automatically from the latest YOLOv5 release.
-Models: https://github.com/ultralytics/yolov5/tree/master/models
-Datasets: https://github.com/ultralytics/yolov5/tree/master/data
-Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
+Train a YOLOv5 model on a custom dataset

 Usage:
-    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
-    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
 """

 import argparse
+import logging
 import math
 import os
 import random
 import sys
 import time
 from copy import deepcopy
-from datetime import datetime
 from pathlib import Path

 import numpy as np
@@ -29,7 +23,7 @@ import torch.nn as nn
 import yaml
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from torch.optim import Adam, SGD, lr_scheduler
 from tqdm import tqdm

 FILE = Path(__file__).resolve()
@@ -42,21 +36,21 @@ import val  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
 from utils.autoanchor import check_anchors
-from utils.autobatch import check_train_batch_size
-from utils.callbacks import Callbacks
 from utils.datasets import create_dataloader
+from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
+    strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
+    check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
 from utils.downloads import attempt_download
-from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements,
-                           check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
-                           intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle,
-                           print_args, print_mutation, strip_optimizer)
-from utils.loggers import Loggers
-from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.loss import ComputeLoss
+from utils.plots import plot_labels, plot_evolve
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
+    torch_distributed_zero_first
+from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.metrics import fitness
-from utils.plots import plot_evolve, plot_labels
-from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
+from utils.loggers import Loggers
+from utils.callbacks import Callbacks

+LOGGER = logging.getLogger(__name__)
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
@@ -67,7 +61,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
          device,
          callbacks
          ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze

@@ -83,14 +77,13 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))

    # Save run settings
-    if not evolve:
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.safe_dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.safe_dump(vars(opt), f, sort_keys=False)
+    data_dict = None

    # Loggers
-    data_dict = None
    if RANK in [-1, 0]:
        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
        if loggers.wandb:
@@ -112,7 +105,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
-    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
+    is_coco = data.endswith('coco.yaml') and nc == 80  # COCO dataset

    # Model
    check_suffix(weights, '.pt')  # check weights
@@ -131,22 +124,13 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create

    # Freeze
-    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    freeze = [f'model.{x}.' for x in range(freeze)]  # layers to freeze
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
-            LOGGER.info(f'freezing {k}')
+            print(f'freezing {k}')
            v.requires_grad = False

-    # Image size
-    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
-
-    # Batch size
-    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
-        batch_size = check_train_batch_size(model, imgsz)
-        loggers.on_params_update({"batch_size": batch_size})
-
    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
@@ -162,10 +146,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
            g1.append(v.weight)

-    if opt.optimizer == 'Adam':
+    if opt.adam:
        optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    elif opt.optimizer == 'AdamW':
-        optimizer = AdamW(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
    else:
        optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

@@ -208,9 +190,14 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary

        del ckpt, csd

+    # Image sizes
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
    # DP mode
    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
-        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+        logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
                        'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
        model = torch.nn.DataParallel(model)

@@ -223,7 +210,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
                                              hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
                                              workers=workers, image_weights=opt.image_weights, quad=opt.quad,
-                                              prefix=colorstr('train: '), shuffle=True)
+                                              prefix=colorstr('train: '))
    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
    nb = len(train_loader)  # number of batches
    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
@@ -254,11 +241,10 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    if cuda and RANK != -1:
        model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)

-    # Model attributes
-    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
-    hyp['box'] *= 3 / nl  # scale to layers
-    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    # Model parameters
+    hyp['box'] *= 3. / nl  # scale to layers
+    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
    hyp['label_smoothing'] = opt.label_smoothing
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
@@ -277,7 +263,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    stopper = EarlyStopping(patience=opt.patience)
    compute_loss = ComputeLoss(model)  # init loss class
    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
-                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+                f'Using {train_loader.num_workers} dataloader workers\n'
                f"Logging results to {colorstr('bold', save_dir)}\n"
                f'Starting training for {epochs} epochs...')
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
@@ -299,11 +285,11 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        pbar = enumerate(train_loader)
        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
        if RANK in [-1, 0]:
-            pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
+            pbar = tqdm(pbar, total=nb)  # progress bar
        optimizer.zero_grad()
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
@@ -390,8 +376,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                        'ema': deepcopy(ema.ema).half(),
                        'updates': ema.updates,
                        'optimizer': optimizer.state_dict(),
-                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
-                        'date': datetime.now().isoformat()}
+                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}

                # Save last, best and delete
                torch.save(ckpt, last)
@@ -438,10 +423,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                                            plots=True,
                                            callbacks=callbacks,
                                            compute_loss=compute_loss)  # val best model with plots
-                    if is_coco:
-                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)

-        callbacks.run('on_train_end', last, best, plots, epoch, results)
+        callbacks.run('on_train_end', last, best, plots, epoch)
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")

    torch.cuda.empty_cache()
@@ -455,13 +438,13 @@ def parse_opt(known=False):
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
-    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
-    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
@@ -469,9 +452,9 @@ def parse_opt(known=False):
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
-    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
-    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
@@ -479,13 +462,13 @@ def parse_opt(known=False):
    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
-    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
-    parser.add_argument('--save-period', type=int, default=1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

    # Weights & Biases arguments
    parser.add_argument('--entity', default=None, help='W&B: Entity')
-    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    parser.add_argument('--upload_dataset', action='store_true', help='W&B: Upload dataset as artifact table')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')

@@ -495,6 +478,7 @@ def parse_opt(known=False):

 def main(opt, callbacks=Callbacks()):
    # Checks
+    set_logging(RANK)
    if RANK in [-1, 0]:
        print_args(FILE.stem, opt)
        check_git_status()
@@ -618,7 +602,7 @@ def main(opt, callbacks=Callbacks()):

        # Plot results
        plot_evolve(evolve_csv)
-        LOGGER.info(f'Hyperparameter evolution finished\n'
+        print(f'Hyperparameter evolution finished\n'
              f"Results saved to {colorstr('bold', save_dir)}\n"
              f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')


--- a/PyTorch/Compute-Vision/Objection/yolov5/train_multi.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/train_multi.py