yolov5增加了mpi单机多卡和多机多卡启动方式，其readme文件进行了更新，对maskrcnn的debug输出日志进行了删除，并更新了该模型的readme文件

5a567950 · lidc · a30b77fe · 5a567950 · 5a567950 · 5a567950
Commit 5a567950 authored Dec 29, 2022 by lidc
20 changed files
--- a/PyTorch/Compute-Vision/Objection/yolov5/export.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/export.py
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
+Export a YOLOv5 PyTorch model to TorchScript, ONNX, CoreML, TensorFlow (saved_model, pb, TFLite, TF.js,) formats
+TensorFlow exports authored by https://github.com/zldrobit
-Format                      | `export.py --include`         | Model
---                         | ---                           | ---
-PyTorch                     | -                             | yolov5s.pt
-TorchScript                 | `torchscript`                 | yolov5s.torchscript
-ONNX                        | `onnx`                        | yolov5s.onnx
-OpenVINO                    | `openvino`                    | yolov5s_openvino_model/
-TensorRT                    | `engine`                      | yolov5s.engine
-CoreML                      | `coreml`                      | yolov5s.mlmodel
-TensorFlow SavedModel       | `saved_model`                 | yolov5s_saved_model/
-TensorFlow GraphDef         | `pb`                          | yolov5s.pb
-TensorFlow Lite             | `tflite`                      | yolov5s.tflite
-TensorFlow Edge TPU         | `edgetpu`                     | yolov5s_edgetpu.tflite
-TensorFlow.js               | `tfjs`                        | yolov5s_web_model/
 Usage:
-    $ python path/to/export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...
+    $ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
 Inference:
-    $ python path/to/detect.py --weights yolov5s.pt                 # PyTorch
+    $ python path/to/detect.py --weights yolov5s.pt
-                                         yolov5s.torchscript        # TorchScript
+                                         yolov5s.onnx  (must export with --dynamic)
-                                         yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                         yolov5s_saved_model
-                                         yolov5s.xml                # OpenVINO
+                                         yolov5s.pb
-                                         yolov5s.engine             # TensorRT
+                                         yolov5s.tflite
-                                         yolov5s.mlmodel            # CoreML (MacOS-only)
-                                         yolov5s_saved_model        # TensorFlow SavedModel
-                                         yolov5s.pb                 # TensorFlow GraphDef
-                                         yolov5s.tflite             # TensorFlow Lite
-                                         yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
 TensorFlow.js:
    $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
@@ -39,9 +21,7 @@ TensorFlow.js:
 """
 import argparse
-import json
 import os
-import platform
 import subprocess
 import sys
 import time
@@ -62,29 +42,23 @@ from models.experimental import attempt_load
 from models.yolo import Detect
 from utils.activations import SiLU
 from utils.datasets import LoadImages
-from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_version, colorstr,
+from utils.general import colorstr, check_dataset, check_img_size, check_requirements, file_size, print_args, \
-                           file_size, print_args, url2file)
+    set_logging, url2file
 from utils.torch_utils import select_device
 def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
    # YOLOv5 TorchScript model export
    try:
-        LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
+        print(f'\n{prefix} starting export with torch {torch.__version__}...')
-        f = file.with_suffix('.torchscript')
+        f = file.with_suffix('.torchscript.pt')
        ts = torch.jit.trace(model, im, strict=False)
-        d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
+        (optimize_for_mobile(ts) if optimize else ts).save(f)
-        extra_files = {'config.txt': json.dumps(d)}  # torch._C.ExtraFilesMap()
-        if optimize:  # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-            optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
-        else:
-            ts.save(str(f), _extra_files=extra_files)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
    except Exception as e:
-        LOGGER.info(f'{prefix} export failure: {e}')
+        print(f'{prefix} export failure: {e}')
 def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')):
@@ -93,7 +67,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
        check_requirements(('onnx',))
        import onnx
-        LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
+        print(f'\n{prefix} starting export with onnx {onnx.__version__}...')
        f = file.with_suffix('.onnx')
        torch.onnx.export(model, im, f, verbose=False, opset_version=opset,
@@ -108,7 +82,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
        # Checks
        model_onnx = onnx.load(f)  # load onnx model
        onnx.checker.check_model(model_onnx)  # check onnx model
-        # LOGGER.info(onnx.helper.printable_graph(model_onnx.graph))  # print
+        # print(onnx.helper.printable_graph(model_onnx.graph))  # print
        # Simplify
        if simplify:
@@ -116,7 +90,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
                check_requirements(('onnx-simplifier',))
                import onnxsim
-                LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
+                print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
                model_onnx, check = onnxsim.simplify(
                    model_onnx,
                    dynamic_input_shape=dynamic,
@@ -124,115 +98,46 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
                assert check, 'assert check failed'
                onnx.save(model_onnx, f)
            except Exception as e:
-                LOGGER.info(f'{prefix} simplifier failure: {e}')
+                print(f'{prefix} simplifier failure: {e}')
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
+        print(f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'")
-    except Exception as e:
-        LOGGER.info(f'{prefix} export failure: {e}')
-def export_openvino(model, im, file, prefix=colorstr('OpenVINO:')):
-    # YOLOv5 OpenVINO export
-    try:
-        check_requirements(('openvino-dev',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/
-        import openvino.inference_engine as ie
-        LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
-        f = str(file).replace('.pt', '_openvino_model' + os.sep)
-        cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f}"
-        subprocess.check_output(cmd, shell=True)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
+        print(f'{prefix} export failure: {e}')
 def export_coreml(model, im, file, prefix=colorstr('CoreML:')):
    # YOLOv5 CoreML export
+    ct_model = None
    try:
        check_requirements(('coremltools',))
        import coremltools as ct
-        LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
+        print(f'\n{prefix} starting export with coremltools {ct.__version__}...')
        f = file.with_suffix('.mlmodel')
+        model.train()  # CoreML exports should be placed in model.train() mode
        ts = torch.jit.trace(model, im, strict=False)  # TorchScript model
-        ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
+        ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255.0, bias=[0, 0, 0])])
        ct_model.save(f)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return ct_model, f
    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
+        print(f'\n{prefix} export failure: {e}')
-        return None, None
+    return ct_model
-def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
-    # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
-    try:
-        check_requirements(('tensorrt',))
-        import tensorrt as trt
-        if trt.__version__[0] == '7':  # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
-            grid = model.model[-1].anchor_grid
-            model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
-            export_onnx(model, im, file, 12, train, False, simplify)  # opset 12
-            model.model[-1].anchor_grid = grid
-        else:  # TensorRT >= 8
-            check_version(trt.__version__, '8.0.0', hard=True)  # require tensorrt>=8.0.0
-            export_onnx(model, im, file, 13, train, False, simplify)  # opset 13
-        onnx = file.with_suffix('.onnx')
-        assert onnx.exists(), f'failed to export ONNX file: {onnx}'
-        LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
-        f = file.with_suffix('.engine')  # TensorRT engine file
-        logger = trt.Logger(trt.Logger.INFO)
-        if verbose:
-            logger.min_severity = trt.Logger.Severity.VERBOSE
-        builder = trt.Builder(logger)
-        config = builder.create_builder_config()
-        config.max_workspace_size = workspace * 1 << 30
-        flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
-        network = builder.create_network(flag)
-        parser = trt.OnnxParser(network, logger)
-        if not parser.parse_from_file(str(onnx)):
-            raise RuntimeError(f'failed to load ONNX file: {onnx}')
-        inputs = [network.get_input(i) for i in range(network.num_inputs)]
-        outputs = [network.get_output(i) for i in range(network.num_outputs)]
-        LOGGER.info(f'{prefix} Network Description:')
-        for inp in inputs:
-            LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
-        for out in outputs:
-            LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
-        half &= builder.platform_has_fast_fp16
-        LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
-        if half:
-            config.set_flag(trt.BuilderFlag.FP16)
-        with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
-            t.write(engine.serialize())
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
-    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
 def export_saved_model(model, im, file, dynamic,
                       tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
-                       conf_thres=0.25, prefix=colorstr('TensorFlow SavedModel:')):
+                       conf_thres=0.25, prefix=colorstr('TensorFlow saved_model:')):
-    # YOLOv5 TensorFlow SavedModel export
+    # YOLOv5 TensorFlow saved_model export
+    keras_model = None
    try:
        import tensorflow as tf
        from tensorflow import keras
+        from models.tf import TFModel, TFDetect
-        from models.tf import TFDetect, TFModel
+        print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
-        LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
        f = str(file).replace('.pt', '_saved_model')
        batch_size, ch, *imgsz = list(im.shape)  # BCHW
@@ -246,11 +151,11 @@ def export_saved_model(model, im, file, dynamic,
        keras_model.summary()
        keras_model.save(f, save_format='tf')
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return keras_model, f
    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
+        print(f'\n{prefix} export failure: {e}')
-        return None, None
+    return keras_model
 def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
@@ -259,7 +164,7 @@ def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
        import tensorflow as tf
        from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
-        LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+        print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
        f = file.with_suffix('.pb')
        m = tf.function(lambda x: keras_model(x))  # full model
@@ -268,18 +173,18 @@ def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
        frozen_func.graph.as_graph_def()
        tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
+        print(f'\n{prefix} export failure: {e}')
 def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('TensorFlow Lite:')):
    # YOLOv5 TensorFlow Lite export
    try:
        import tensorflow as tf
+        from models.tf import representative_dataset_gen
-        LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+        print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
        batch_size, ch, *imgsz = list(im.shape)  # BCHW
        f = str(file).replace('.pt', '-fp16.tflite')
@@ -288,7 +193,6 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te
        converter.target_spec.supported_types = [tf.float16]
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        if int8:
-            from models.tf import representative_dataset_gen
            dataset = LoadImages(check_dataset(data)['train'], img_size=imgsz, auto=False)  # representative data
            converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib)
            converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
@@ -300,38 +204,10 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te
        tflite_model = converter.convert()
        open(f, "wb").write(tflite_model)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
-    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
-def export_edgetpu(keras_model, im, file, prefix=colorstr('Edge TPU:')):
-    # YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
-    try:
-        cmd = 'edgetpu_compiler --version'
-        help_url = 'https://coral.ai/docs/edgetpu/compiler/'
-        assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'
-        if subprocess.run(cmd, shell=True).returncode != 0:
-            LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')
-            for c in ['curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',
-                      'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',
-                      'sudo apt-get update',
-                      'sudo apt-get install edgetpu-compiler']:
-                subprocess.run(c, shell=True, check=True)
-        ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
-        LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')
-        f = str(file).replace('.pt', '-int8_edgetpu.tflite')  # Edge TPU model
-        f_tfl = str(file).replace('.pt', '-int8.tflite')  # TFLite model
-        cmd = f"edgetpu_compiler -s {f_tfl}"
-        subprocess.run(cmd, shell=True, check=True)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
+        print(f'\n{prefix} export failure: {e}')
 def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
@@ -339,16 +215,15 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
    try:
        check_requirements(('tensorflowjs',))
        import re
        import tensorflowjs as tfjs
-        LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
+        print(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
        f = str(file).replace('.pt', '_web_model')  # js dir
        f_pb = file.with_suffix('.pb')  # *.pb path
        f_json = f + '/model.json'  # *.json path
-        cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \
+        cmd = f"tensorflowjs_converter --input_format=tf_frozen_model " \
-              f'--output_node_names="Identity,Identity_1,Identity_2,Identity_3" {f_pb} {f}'
+              f"--output_node_names='Identity,Identity_1,Identity_2,Identity_3' {f_pb} {f}"
        subprocess.run(cmd, shell=True)
        json = open(f_json).read()
@@ -365,10 +240,9 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
                json)
            j.write(subst)
-        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        return f
    except Exception as e:
-        LOGGER.info(f'\n{prefix} export failure: {e}')
+        print(f'\n{prefix} export failure: {e}')
 @torch.no_grad()
@@ -377,7 +251,7 @@ def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
        imgsz=(640, 640),  # image (height, width)
        batch_size=1,  # batch size
        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
-        include=('torchscript', 'onnx'),  # include formats
+        include=('torchscript', 'onnx', 'coreml'),  # include formats
        half=False,  # FP16 half-precision export
        inplace=False,  # set YOLOv5 Detect() inplace=True
        train=False,  # model.train() mode
@@ -386,10 +260,6 @@ def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
        dynamic=False,  # ONNX/TF: dynamic axes
        simplify=False,  # ONNX: simplify model
        opset=12,  # ONNX: opset version
-        verbose=False,  # TensorRT: verbose log
-        workspace=4,  # TensorRT: workspace size (GB)
-        nms=False,  # TF: add NMS to model
-        agnostic_nms=False,  # TF: add agnostic NMS to model
        topk_per_class=100,  # TF.js NMS: topk per class to keep
        topk_all=100,  # TF.js NMS: topk for all classes to keep
        iou_thres=0.45,  # TF.js NMS: IoU threshold
@@ -397,12 +267,9 @@ def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
        ):
    t = time.time()
    include = [x.lower() for x in include]
-    tf_exports = list(x in include for x in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'))  # TensorFlow exports
+    tf_exports = list(x in include for x in ('saved_model', 'pb', 'tflite', 'tfjs'))  # TensorFlow exports
-    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)
-    # Checks
    imgsz *= 2 if len(imgsz) == 1 else 1  # expand
-    opset = 12 if ('openvino' in include) else opset  # OpenVINO requires opset <= 12
+    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)
    # Load PyTorch model
    device = select_device(device)
@@ -430,52 +297,40 @@ def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
    for _ in range(2):
        y = model(im)  # dry runs
-    LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} ({file_size(file):.1f} MB)")
+    print(f"\n{colorstr('PyTorch:')} starting from {file} ({file_size(file):.1f} MB)")
    # Exports
    if 'torchscript' in include:
-        f = export_torchscript(model, im, file, optimize)
+        export_torchscript(model, im, file, optimize)
-    if 'engine' in include:  # TensorRT required before ONNX
+    if 'onnx' in include:
-        f = export_engine(model, im, file, train, half, simplify, workspace, verbose)
+        export_onnx(model, im, file, opset, train, dynamic, simplify)
-    if ('onnx' in include) or ('openvino' in include):  # OpenVINO requires ONNX
-        f = export_onnx(model, im, file, opset, train, dynamic, simplify)
-    if 'openvino' in include:
-        f = export_openvino(model, im, file)
    if 'coreml' in include:
-        _, f = export_coreml(model, im, file)
+        export_coreml(model, im, file)
    # TensorFlow Exports
    if any(tf_exports):
-        pb, tflite, edgetpu, tfjs = tf_exports[1:]
+        pb, tflite, tfjs = tf_exports[1:]
-        if int8 or edgetpu:  # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707
-            check_requirements(('flatbuffers==1.12',))  # required before `import tensorflow`
        assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
-        model, f = export_saved_model(model, im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs,
+        model = export_saved_model(model, im, file, dynamic, tf_nms=tfjs, agnostic_nms=tfjs,
-                                      agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class,
+                                   topk_per_class=topk_per_class, topk_all=topk_all, conf_thres=conf_thres,
-                                      topk_all=topk_all,
+                                   iou_thres=iou_thres)  # keras model
-                                      conf_thres=conf_thres, iou_thres=iou_thres)  # keras model
        if pb or tfjs:  # pb prerequisite to tfjs
-            f = export_pb(model, im, file)
+            export_pb(model, im, file)
-        if tflite or edgetpu:
+        if tflite:
-            f = export_tflite(model, im, file, int8=int8 or edgetpu, data=data, ncalib=100)
+            export_tflite(model, im, file, int8=int8, data=data, ncalib=100)
-        if edgetpu:
-            f = export_edgetpu(model, im, file)
        if tfjs:
-            f = export_tfjs(model, im, file)
+            export_tfjs(model, im, file)
    # Finish
-    LOGGER.info(f'\nExport complete ({time.time() - t:.2f}s)'
+    print(f'\nExport complete ({time.time() - t:.2f}s)'
-                f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
+          f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
-                f"\nVisualize with https://netron.app"
+          f'\nVisualize with https://netron.app')
-                f"\nDetect with `python detect.py --weights {f}`"
-                f" or `model = torch.hub.load('ultralytics/yolov5', 'custom', '{f}')"
-                f"\nValidate with `python val.py --weights {f}`")
 def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
@@ -486,26 +341,22 @@ def parse_opt():
    parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
    parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
    parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
-    parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
+    parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
-    parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
-    parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
-    parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')
-    parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')
    parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
    parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
    parser.add_argument('--include', nargs='+',
                        default=['torchscript', 'onnx'],
-                        help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs')
+                        help='available formats are (torchscript, onnx, coreml, saved_model, pb, tflite, tfjs)')
    opt = parser.parse_args()
    print_args(FILE.stem, opt)
    return opt
 def main(opt):
-    for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]):
+    set_logging()
-        run(**vars(opt))
+    run(**vars(opt))
 if __name__ == "__main__":

--- a/PyTorch/Compute-Vision/Objection/yolov5/hubconf.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/hubconf.py
@@ -5,7 +5,6 @@ PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
 Usage:
    import torch
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
-    model = torch.hub.load('ultralytics/yolov5:master', 'custom', 'path/to/yolov5s.onnx')  # file from branch
 """
 import torch
@@ -28,35 +27,36 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
    """
    from pathlib import Path
-    from models.common import AutoShape, DetectMultiBackend
    from models.yolo import Model
+    from models.experimental import attempt_load
+    from utils.general import check_requirements, set_logging
    from utils.downloads import attempt_download
-    from utils.general import check_requirements, intersect_dicts, set_logging
    from utils.torch_utils import select_device
+    file = Path(__file__).resolve()
    check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
    set_logging(verbose=verbose)
-    name = Path(name)
+    save_dir = Path('') if str(name).endswith('.pt') else file.parent
-    path = name.with_suffix('.pt') if name.suffix == '' else name  # checkpoint path
+    path = (save_dir / name).with_suffix('.pt')  # checkpoint path
    try:
        device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device)
        if pretrained and channels == 3 and classes == 80:
-            model = DetectMultiBackend(path, device=device)  # download/load FP32 model
+            model = attempt_load(path, map_location=device)  # download/load FP32 model
-            # model = models.experimental.attempt_load(path, map_location=device)  # download/load FP32 model
        else:
-            cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0]  # model.yaml path
+            cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0]  # model.yaml path
            model = Model(cfg, channels, classes)  # create model
            if pretrained:
                ckpt = torch.load(attempt_download(path), map_location=device)  # load
+                msd = model.state_dict()  # model state_dict
                csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
-                csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors'])  # intersect
+                csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape}  # filter
                model.load_state_dict(csd, strict=False)  # load
                if len(ckpt['model'].names) == classes:
                    model.names = ckpt['model'].names  # set class names attribute
        if autoshape:
-            model = AutoShape(model)  # for file/URI/PIL/cv2/np inputs and NMS
+            model = model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
        return model.to(device)
    except Exception as e:
@@ -125,11 +125,10 @@ if __name__ == '__main__':
    # model = custom(path='path/to/model.pt')  # custom
    # Verify inference
-    from pathlib import Path
    import cv2
    import numpy as np
    from PIL import Image
+    from pathlib import Path
    imgs = ['data/images/zidane.jpg',  # filename
            Path('data/images/zidane.jpg'),  # Path
@@ -138,6 +137,6 @@ if __name__ == '__main__':
            Image.open('data/images/bus.jpg'),  # PIL
            np.zeros((320, 640, 3))]  # numpy
-    results = model(imgs, size=320)  # batched inference
+    results = model(imgs)  # batched inference
    results.print()
    results.save()
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/common.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/common.py
@@ -3,29 +3,27 @@
 Common modules
 """
-import json
+import logging
 import math
-import platform
 import warnings
-from collections import OrderedDict, namedtuple
 from copy import copy
 from pathlib import Path
-import cv2
 import numpy as np
 import pandas as pd
 import requests
 import torch
 import torch.nn as nn
-import yaml
 from PIL import Image
 from torch.cuda import amp
 from utils.datasets import exif_transpose, letterbox
-from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path,
+from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \
-                           make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
+    scale_coords, xyxy2xywh
-from utils.plots import Annotator, colors, save_one_box
+from utils.plots import Annotator, colors
-from utils.torch_utils import copy_attr, time_sync
+from utils.torch_utils import time_sync
+LOGGER = logging.getLogger(__name__)
 def autopad(k, p=None):  # kernel, padding
@@ -81,15 +79,15 @@ class TransformerBlock(nn.Module):
        if c1 != c2:
            self.conv = Conv(c1, c2)
        self.linear = nn.Linear(c2, c2)  # learnable position embedding
-        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
+        self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
        self.c2 = c2
    def forward(self, x):
        if self.conv is not None:
            x = self.conv(x)
        b, _, w, h = x.shape
-        p = x.flatten(2).permute(2, 0, 1)
+        p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
-        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
+        return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
 class Bottleneck(nn.Module):
@@ -115,8 +113,8 @@ class BottleneckCSP(nn.Module):
        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
        self.cv4 = Conv(2 * c_, c2, 1, 1)
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
-        self.act = nn.SiLU()
+        self.act = nn.LeakyReLU(0.1, inplace=True)
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
    def forward(self, x):
        y1 = self.cv3(self.m(self.cv1(x)))
@@ -132,7 +130,7 @@ class C3(nn.Module):
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
    def forward(self, x):
@@ -160,7 +158,7 @@ class C3Ghost(C3):
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels
-        self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
+        self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
 class SPP(nn.Module):
@@ -275,218 +273,30 @@ class Concat(nn.Module):
        return torch.cat(x, self.d)
-class DetectMultiBackend(nn.Module):
-    # YOLOv5 MultiBackend class for python inference on various backends
-    def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
-        # Usage:
-        #   PyTorch:      weights = *.pt
-        #   TorchScript:            *.torchscript
-        #   CoreML:                 *.mlmodel
-        #   OpenVINO:               *.xml
-        #   TensorFlow:             *_saved_model
-        #   TensorFlow:             *.pb
-        #   TensorFlow Lite:        *.tflite
-        #   TensorFlow Edge TPU:    *_edgetpu.tflite
-        #   ONNX Runtime:           *.onnx
-        #   OpenCV DNN:             *.onnx with dnn=True
-        #   TensorRT:               *.engine
-        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
-        super().__init__()
-        w = str(weights[0] if isinstance(weights, list) else weights)
-        suffix = Path(w).suffix.lower()
-        suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel', '.xml']
-        check_suffix(w, suffixes)  # check weights have acceptable suffix
-        pt, jit, onnx, engine, tflite, pb, saved_model, coreml, xml = (suffix == x for x in suffixes)  # backends
-        stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-        w = attempt_download(w)  # download if not local
-        if data:  # data.yaml path (optional)
-            with open(data, errors='ignore') as f:
-                names = yaml.safe_load(f)['names']  # class names
-        if pt:  # PyTorch
-            model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
-            stride = max(int(model.stride.max()), 32)  # model stride
-            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
-        elif jit:  # TorchScript
-            LOGGER.info(f'Loading {w} for TorchScript inference...')
-            extra_files = {'config.txt': ''}  # model metadata
-            model = torch.jit.load(w, _extra_files=extra_files)
-            if extra_files['config.txt']:
-                d = json.loads(extra_files['config.txt'])  # extra_files dict
-                stride, names = int(d['stride']), d['names']
-        elif dnn:  # ONNX OpenCV DNN
-            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
-            check_requirements(('opencv-python>=4.5.4',))
-            net = cv2.dnn.readNetFromONNX(w)
-        elif onnx:  # ONNX Runtime
-            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
-            cuda = torch.cuda.is_available()
-            check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
-            import onnxruntime
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
-            session = onnxruntime.InferenceSession(w, providers=providers)
-        elif xml:  # OpenVINO
-            LOGGER.info(f'Loading {w} for OpenVINO inference...')
-            check_requirements(('openvino-dev',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/
-            import openvino.inference_engine as ie
-            core = ie.IECore()
-            network = core.read_network(model=w, weights=Path(w).with_suffix('.bin'))  # *.xml, *.bin paths
-            executable_network = core.load_network(network, device_name='CPU', num_requests=1)
-        elif engine:  # TensorRT
-            LOGGER.info(f'Loading {w} for TensorRT inference...')
-            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
-            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
-            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
-            logger = trt.Logger(trt.Logger.INFO)
-            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
-                model = runtime.deserialize_cuda_engine(f.read())
-            bindings = OrderedDict()
-            for index in range(model.num_bindings):
-                name = model.get_binding_name(index)
-                dtype = trt.nptype(model.get_binding_dtype(index))
-                shape = tuple(model.get_binding_shape(index))
-                data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
-                bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
-            context = model.create_execution_context()
-            batch_size = bindings['images'].shape[0]
-        elif coreml:  # CoreML
-            LOGGER.info(f'Loading {w} for CoreML inference...')
-            import coremltools as ct
-            model = ct.models.MLModel(w)
-        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
-            if saved_model:  # SavedModel
-                LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
-                import tensorflow as tf
-                model = tf.keras.models.load_model(w)
-            elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
-                LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
-                import tensorflow as tf
-                def wrap_frozen_graph(gd, inputs, outputs):
-                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
-                    return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
-                                   tf.nest.map_structure(x.graph.as_graph_element, outputs))
-                graph_def = tf.Graph().as_graph_def()
-                graph_def.ParseFromString(open(w, 'rb').read())
-                frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
-            elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-                if 'edgetpu' in w.lower():  # Edge TPU
-                    LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
-                    import tflite_runtime.interpreter as tfli  # install https://coral.ai/software/#edgetpu-runtime
-                    delegate = {'Linux': 'libedgetpu.so.1',
-                                'Darwin': 'libedgetpu.1.dylib',
-                                'Windows': 'edgetpu.dll'}[platform.system()]
-                    interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
-                else:  # Lite
-                    LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
-                    import tensorflow as tf
-                    interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
-                interpreter.allocate_tensors()  # allocate
-                input_details = interpreter.get_input_details()  # inputs
-                output_details = interpreter.get_output_details()  # outputs
-        self.__dict__.update(locals())  # assign all variables to self
-    def forward(self, im, augment=False, visualize=False, val=False):
-        # YOLOv5 MultiBackend inference
-        b, ch, h, w = im.shape  # batch, channel, height, width
-        if self.pt or self.jit:  # PyTorch
-            y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
-            return y if val else y[0]
-        elif self.dnn:  # ONNX OpenCV DNN
-            im = im.cpu().numpy()  # torch to numpy
-            self.net.setInput(im)
-            y = self.net.forward()
-        elif self.onnx:  # ONNX Runtime
-            im = im.cpu().numpy()  # torch to numpy
-            y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
-        elif self.xml:  # OpenVINO
-            im = im.cpu().numpy()  # FP32
-            desc = self.ie.TensorDesc(precision='FP32', dims=im.shape, layout='NCHW')  # Tensor Description
-            request = self.executable_network.requests[0]  # inference request
-            request.set_blob(blob_name='images', blob=self.ie.Blob(desc, im))  # name=next(iter(request.input_blobs))
-            request.infer()
-            y = request.output_blobs['output'].buffer  # name=next(iter(request.output_blobs))
-        elif self.engine:  # TensorRT
-            assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
-            self.binding_addrs['images'] = int(im.data_ptr())
-            self.context.execute_v2(list(self.binding_addrs.values()))
-            y = self.bindings['output'].data
-        elif self.coreml:  # CoreML
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
-            im = Image.fromarray((im[0] * 255).astype('uint8'))
-            # im = im.resize((192, 320), Image.ANTIALIAS)
-            y = self.model.predict({'image': im})  # coordinates are xywh normalized
-            if 'confidence' in y:
-                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
-                conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
-                y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
-            else:
-                y = y[list(y)[-1]]  # last output
-        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
-            if self.saved_model:  # SavedModel
-                y = self.model(im, training=False).numpy()
-            elif self.pb:  # GraphDef
-                y = self.frozen_func(x=self.tf.constant(im)).numpy()
-            elif self.tflite:  # Lite
-                input, output = self.input_details[0], self.output_details[0]
-                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
-                if int8:
-                    scale, zero_point = input['quantization']
-                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
-                self.interpreter.set_tensor(input['index'], im)
-                self.interpreter.invoke()
-                y = self.interpreter.get_tensor(output['index'])
-                if int8:
-                    scale, zero_point = output['quantization']
-                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
-            y[..., 0] *= w  # x
-            y[..., 1] *= h  # y
-            y[..., 2] *= w  # w
-            y[..., 3] *= h  # h
-        y = torch.tensor(y) if isinstance(y, np.ndarray) else y
-        return (y, []) if val else y
-    def warmup(self, imgsz=(1, 3, 640, 640), half=False):
-        # Warmup model by running inference once
-        if self.pt or self.jit or self.onnx or self.engine:  # warmup types
-            if isinstance(self.device, torch.device) and self.device.type != 'cpu':  # only warmup GPU models
-                im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float)  # input image
-                self.forward(im)  # warmup
 class AutoShape(nn.Module):
    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
    conf = 0.25  # NMS confidence threshold
    iou = 0.45  # NMS IoU threshold
-    agnostic = False  # NMS class-agnostic
+    classes = None  # (optional list) filter by class
    multi_label = False  # NMS multiple labels per box
-    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
    max_det = 1000  # maximum number of detections per image
-    amp = False  # Automatic Mixed Precision (AMP) inference
    def __init__(self, model):
        super().__init__()
-        LOGGER.info('Adding AutoShape... ')
-        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
-        self.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instance
-        self.pt = not self.dmb or model.pt  # PyTorch model
        self.model = model.eval()
+    def autoshape(self):
+        LOGGER.info('AutoShape already enabled, skipping... ')  # model already converted to model.autoshape()
+        return self
    def _apply(self, fn):
        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
        self = super()._apply(fn)
-        if self.pt:
+        m = self.model.model[-1]  # Detect()
-            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+        m.stride = fn(m.stride)
-            m.stride = fn(m.stride)
+        m.grid = list(map(fn, m.grid))
-            m.grid = list(map(fn, m.grid))
+        if isinstance(m.anchor_grid, list):
-            if isinstance(m.anchor_grid, list):
+            m.anchor_grid = list(map(fn, m.anchor_grid))
-                m.anchor_grid = list(map(fn, m.anchor_grid))
        return self
    @torch.no_grad()
@@ -501,10 +311,9 @@ class AutoShape(nn.Module):
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
        t = [time_sync()]
-        p = next(self.model.parameters()) if self.pt else torch.zeros(1)  # for device and type
+        p = next(self.model.parameters())  # for device and type
-        autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
        if isinstance(imgs, torch.Tensor):  # torch
-            with amp.autocast(enabled=autocast):
+            with amp.autocast(enabled=p.device.type != 'cpu'):
                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
        # Pre-process
@@ -526,21 +335,21 @@ class AutoShape(nn.Module):
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
-        shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)]  # inference shape
+        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
-        x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs]  # pad
+        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
-        x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
        t.append(time_sync())
-        with amp.autocast(enabled=autocast):
+        with amp.autocast(enabled=p.device.type != 'cpu'):
            # Inference
-            y = self.model(x, augment, profile)  # forward
+            y = self.model(x, augment, profile)[0]  # forward
            t.append(time_sync())
            # Post-process
-            y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes,
+            y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes,
-                                    agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det)  # NMS
+                                    multi_label=self.multi_label, max_det=self.max_det)  # NMS
            for i in range(n):
                scale_coords(shape1, y[i][:, :4], shape0[i])
@@ -550,15 +359,14 @@ class AutoShape(nn.Module):
 class Detections:
    # YOLOv5 detections class for inference results
-    def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
+    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
        super().__init__()
        d = pred[0].device  # device
-        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs]  # normalizations
+        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
        self.imgs = imgs  # list of images as numpy arrays
        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
        self.names = names  # class names
        self.files = files  # image filenames
-        self.times = times  # profiling times
        self.xyxy = pred  # xyxy pixels
        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
@@ -638,11 +446,10 @@ class Detections:
    def tolist(self):
        # return a list of Detections objects, i.e. 'for result in results.tolist():'
-        r = range(self.n)  # iterable
+        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
-        x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
+        for d in x:
-        # for d in x:
+            for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
-        #    for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
+                setattr(d, k, getattr(d, k)[0])  # pop out of list
-        #        setattr(d, k, getattr(d, k)[0])  # pop out of list
        return x
    def __len__(self):

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/experimental.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/experimental.py
@@ -2,7 +2,6 @@
 """
 Experimental modules
 """
-import math
 import numpy as np
 import torch
@@ -33,7 +32,7 @@ class Sum(nn.Module):
        self.weight = weight  # apply weights boolean
        self.iter = range(n - 1)  # iter object
        if weight:
-            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
+            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
    def forward(self, x):
        y = x[0]  # no weight
@@ -49,27 +48,26 @@ class Sum(nn.Module):
 class MixConv2d(nn.Module):
    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
-    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
        super().__init__()
-        n = len(k)  # number of convolutions
+        groups = len(k)
        if equal_ch:  # equal c_ per group
-            i = torch.linspace(0, n - 1E-6, c2).floor()  # c2 indices
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
-            c_ = [(i == g).sum() for g in range(n)]  # intermediate channels
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
        else:  # equal weight.numel() per group
-            b = [c2] + [0] * n
+            b = [c2] + [0] * groups
-            a = np.eye(n + 1, n, k=-1)
+            a = np.eye(groups + 1, groups, k=-1)
            a -= np.roll(a, 1, axis=1)
            a *= np.array(k) ** 2
            a[0] = 1
            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
-        self.m = nn.ModuleList(
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
-            [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
        self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.SiLU()
+        self.act = nn.LeakyReLU(0.1, inplace=True)
    def forward(self, x):
-        return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 class Ensemble(nn.ModuleList):
@@ -99,6 +97,7 @@ def attempt_load(weights, map_location=None, inplace=True, fuse=True):
        else:
            model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval())  # without layer fuse
    # Compatibility updates
    for m in model.modules():
        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-bifpn.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-bifpn.yaml
@@ -9,22 +9,22 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
+   [-1, 9, C3, [512]]
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 3, C3, [1024, False]],  # 9
  ]
-# YOLOv5 v6.0 BiFPN head
+# YOLOv5 BiFPN head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
@@ -37,7 +37,7 @@ head:
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
-   [[-1, 14, 6], 1, Concat, [1]],  # cat P4 <--- BiFPN change
+   [[-1, 14, 6], 1, Concat, [1]],  # cat P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-fpn.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-fpn.yaml
@@ -9,34 +9,34 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, C3, [128]],
+   [-1, 3, Bottleneck, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
+   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 6, BottleneckCSP, [1024]],  # 9
  ]
-# YOLOv5 v6.0 FPN head
+# YOLOv5 FPN head
 head:
-  [[-1, 3, C3, [1024, False]],  # 10 (P5/32-large)
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, C3, [512, False]],  # 14 (P4/16-medium)
+   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 3, C3, [256, False]],  # 18 (P3/8-small)
+   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p2.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p2.yaml
@@ -4,24 +4,24 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+anchors: 3
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 3, C3, [1024, False]],  # 9
  ]
-# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p34.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p34.yaml
-# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
-# Parameters
-nc: 80  # number of classes
-depth_multiple: 0.33  # model depth multiple
-width_multiple: 0.50  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
-# YOLOv5 v6.0 backbone
-backbone:
-  # [from, number, module, args]
-  [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 6, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 1024 ] ],
-    [ -1, 1, SPPF, [ 1024, 5 ] ],  # 9
-  ]
-# YOLOv5 v6.0 head with (P3, P4) outputs
-head:
-  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 13
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 20 (P4/16-medium)
-    [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4)
-  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p6.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p6.yaml
@@ -4,26 +4,26 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+anchors: 3
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
   [-1, 3, C3, [768]],
   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
-   [-1, 3, C3, [1024]],
+   [-1, 1, SPP, [1024, [3, 5, 7]]],
-   [-1, 1, SPPF, [1024, 5]],  # 11
+   [-1, 3, C3, [1024, False]],  # 11
  ]
-# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [768, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
@@ -50,7 +50,7 @@ head:
   [-1, 1, Conv, [768, 3, 2]],
   [[-1, 12], 1, Concat, [1]],  # cat head P6
-   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+   [-1, 3, C3, [1024, False]],  # 32 (P5/64-xlarge)
   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p7.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-p7.yaml
@@ -4,16 +4,16 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+anchors: 3
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
@@ -21,11 +21,11 @@ backbone:
   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
   [-1, 3, C3, [1024]],
   [-1, 1, Conv, [1280, 3, 2]],  # 11-P7/128
-   [-1, 3, C3, [1280]],
+   [-1, 1, SPP, [1280, [3, 5]]],
-   [-1, 1, SPPF, [1280, 5]],  # 13
+   [-1, 3, C3, [1280, False]],  # 13
  ]
-# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [1024, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-panet.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5-panet.yaml
@@ -9,40 +9,40 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, C3, [128]],
+   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
+   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]
-# YOLOv5 v6.0 PANet head
+# YOLOv5 PANet head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
-   [-1, 3, C3, [512, False]],  # 13
+   [-1, 3, BottleneckCSP, [512, False]],  # 13
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
-   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
-   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
-   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]
--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-ghost.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-ghost.yaml
@@ -9,22 +9,22 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3Ghost, [128]],
   [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3Ghost, [256]],
+   [-1, 9, C3Ghost, [256]],
   [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3Ghost, [512]],
   [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3Ghost, [1024]],
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 3, C3Ghost, [1024, False]],  # 9
  ]
-# YOLOv5 v6.0 head
+# YOLOv5 head
 head:
  [[-1, 1, GhostConv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-transformer.yaml
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/hub/yolov5s-transformer.yaml
@@ -9,22 +9,22 @@ anchors:
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
-# YOLOv5 v6.0 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
+   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3TR, [1024]],  # 9 <--- C3TR() Transformer module
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
+   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
  ]
-# YOLOv5 v6.0 head
+# YOLOv5 head
 head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/tf.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/tf.py
@@ -11,6 +11,7 @@ Export:
 """
 import argparse
+import logging
 import sys
 from copy import deepcopy
 from pathlib import Path
@@ -27,17 +28,19 @@ import torch
 import torch.nn as nn
 from tensorflow import keras
-from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
+from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, autopad, C3
 from models.experimental import CrossConv, MixConv2d, attempt_load
 from models.yolo import Detect
+from utils.general import make_divisible, print_args, set_logging
 from utils.activations import SiLU
-from utils.general import LOGGER, make_divisible, print_args
+LOGGER = logging.getLogger(__name__)
 class TFBN(keras.layers.Layer):
    # TensorFlow BatchNormalization wrapper
    def __init__(self, w=None):
-        super().__init__()
+        super(TFBN, self).__init__()
        self.bn = keras.layers.BatchNormalization(
            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
@@ -51,7 +54,7 @@ class TFBN(keras.layers.Layer):
 class TFPad(keras.layers.Layer):
    def __init__(self, pad):
-        super().__init__()
+        super(TFPad, self).__init__()
        self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
    def call(self, inputs):
@@ -62,7 +65,7 @@ class TFConv(keras.layers.Layer):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
        # ch_in, ch_out, weights, kernel, stride, padding, groups
-        super().__init__()
+        super(TFConv, self).__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
@@ -93,11 +96,11 @@ class TFFocus(keras.layers.Layer):
    # Focus wh information into c-space
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
        # ch_in, ch_out, kernel, stride, padding, groups
-        super().__init__()
+        super(TFFocus, self).__init__()
        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
    def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
-        # inputs = inputs / 255  # normalize 0-255 to 0-1
+        # inputs = inputs / 255.  # normalize 0-255 to 0-1
        return self.conv(tf.concat([inputs[:, ::2, ::2, :],
                                    inputs[:, 1::2, ::2, :],
                                    inputs[:, ::2, 1::2, :],
@@ -107,7 +110,7 @@ class TFFocus(keras.layers.Layer):
 class TFBottleneck(keras.layers.Layer):
    # Standard bottleneck
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
-        super().__init__()
+        super(TFBottleneck, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
@@ -120,7 +123,7 @@ class TFBottleneck(keras.layers.Layer):
 class TFConv2d(keras.layers.Layer):
    # Substitution for PyTorch nn.Conv2D
    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
-        super().__init__()
+        super(TFConv2d, self).__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        self.conv = keras.layers.Conv2D(
            c2, k, s, 'VALID', use_bias=bias,
@@ -135,7 +138,7 @@ class TFBottleneckCSP(keras.layers.Layer):
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        # ch_in, ch_out, number, shortcut, groups, expansion
-        super().__init__()
+        super(TFBottleneckCSP, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
@@ -155,7 +158,7 @@ class TFC3(keras.layers.Layer):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        # ch_in, ch_out, number, shortcut, groups, expansion
-        super().__init__()
+        super(TFC3, self).__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
@@ -169,7 +172,7 @@ class TFC3(keras.layers.Layer):
 class TFSPP(keras.layers.Layer):
    # Spatial pyramid pooling layer used in YOLOv3-SPP
    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
-        super().__init__()
+        super(TFSPP, self).__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
@@ -180,25 +183,9 @@ class TFSPP(keras.layers.Layer):
        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
-class TFSPPF(keras.layers.Layer):
-    # Spatial pyramid pooling-Fast layer
-    def __init__(self, c1, c2, k=5, w=None):
-        super().__init__()
-        c_ = c1 // 2  # hidden channels
-        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
-        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
-        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
-    def call(self, inputs):
-        x = self.cv1(inputs)
-        y1 = self.m(x)
-        y2 = self.m(y1)
-        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
 class TFDetect(keras.layers.Layer):
    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
-        super().__init__()
+        super(TFDetect, self).__init__()
        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
@@ -226,13 +213,13 @@ class TFDetect(keras.layers.Layer):
            if not self.training:  # inference
                y = tf.sigmoid(x[i])
-                xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
                # Normalize xywh to 0-1 to reduce calibration error
                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                y = tf.concat([xy, wh, y[..., 4:]], -1)
-                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
+                z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))
        return x if self.training else (tf.concat(z, 1), x)
@@ -246,7 +233,7 @@ class TFDetect(keras.layers.Layer):
 class TFUpsample(keras.layers.Layer):
    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
-        super().__init__()
+        super(TFUpsample, self).__init__()
        assert scale_factor == 2, "scale_factor must be 2"
        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
@@ -260,7 +247,7 @@ class TFUpsample(keras.layers.Layer):
 class TFConcat(keras.layers.Layer):
    def __init__(self, dimension=1, w=None):
-        super().__init__()
+        super(TFConcat, self).__init__()
        assert dimension == 1, "convert only NCHW to NHWC concat"
        self.d = 3
@@ -269,7 +256,7 @@ class TFConcat(keras.layers.Layer):
 def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
-    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
@@ -285,7 +272,7 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
                pass
        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
+        if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
            c1, c2 = ch[f], args[0]
            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
@@ -296,7 +283,7 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
-            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+            c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
        elif m is Detect:
            args.append([ch[x + 1] for x in f])
            if isinstance(args[1], int):  # number of anchors
@@ -309,11 +296,11 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
        m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
            else tf_m(*args, w=model.model[i])  # module
-        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
-        np = sum(x.numel() for x in torch_m_.parameters())  # number params
+        np = sum([x.numel() for x in torch_m_.parameters()])  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}')  # print
+        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        ch.append(c2)
@@ -322,7 +309,7 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
 class TFModel:
    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
-        super().__init__()
+        super(TFModel, self).__init__()
        if isinstance(cfg, dict):
            self.yaml = cfg  # model dict
        else:  # is *.yaml
@@ -333,7 +320,7 @@ class TFModel:
        # Define model
        if nc and nc != self.yaml['nc']:
-            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
+            print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
            self.yaml['nc'] = nc  # override yaml value
        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
@@ -410,10 +397,10 @@ class AgnosticNMS(keras.layers.Layer):
 def representative_dataset_gen(dataset, ncalib=100):
    # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
-    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
+    for n, (path, img, im0s, vid_cap) in enumerate(dataset):
        input = np.transpose(img, [1, 2, 0])
        input = np.expand_dims(input, axis=0).astype(np.float32)
-        input /= 255
+        input /= 255.0
        yield [input]
        if n >= ncalib:
            break
@@ -440,8 +427,6 @@ def run(weights=ROOT / 'yolov5s.pt',  # weights path
    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
    keras_model.summary()
-    LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
 def parse_opt():
    parser = argparse.ArgumentParser()
@@ -456,6 +441,7 @@ def parse_opt():
 def main(opt):
+    set_logging()
    run(**vars(opt))

--- a/PyTorch/Compute-Vision/Objection/yolov5/models/yolo.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/models/yolo.py
@@ -20,15 +20,18 @@ if str(ROOT) not in sys.path:
 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
-from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
+from utils.general import check_yaml, make_divisible, print_args, set_logging
 from utils.plots import feature_visualization
-from utils.torch_utils import fuse_conv_and_bn, initialize_weights, model_info, scale_img, select_device, time_sync
+from utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \
+    select_device, time_sync
 try:
    import thop  # for FLOPs computation
 except ImportError:
    thop = None
+LOGGER = logging.getLogger(__name__)
 class Detect(nn.Module):
    stride = None  # strides computed during build
@@ -54,15 +57,15 @@ class Detect(nn.Module):
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
            if not self.training:  # inference
-                if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
                y = x[i].sigmoid()
                if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))
@@ -71,10 +74,7 @@ class Detect(nn.Module):
    def _make_grid(self, nx=20, ny=20, i=0):
        d = self.anchors[i].device
-        if check_version(torch.__version__, '1.10.0'):  # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility
+        yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
-            yv, xv = torch.meshgrid([torch.arange(ny, device=d), torch.arange(nx, device=d)], indexing='ij')
-        else:
-            yv, xv = torch.meshgrid([torch.arange(ny, device=d), torch.arange(nx, device=d)])
        grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()
        anchor_grid = (self.anchors[i].clone() * self.stride[i]) \
            .view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float()
@@ -89,7 +89,7 @@ class Model(nn.Module):
        else:  # is *.yaml
            import yaml  # for torch hub
            self.yaml_file = Path(cfg).name
-            with open(cfg, encoding='ascii', errors='ignore') as f:
+            with open(cfg, errors='ignore') as f:
                self.yaml = yaml.safe_load(f)  # model dict
        # Define model
@@ -200,7 +200,7 @@ class Model(nn.Module):
        for mi, s in zip(m.m, m.stride):  # from
            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
    def _print_biases(self):
@@ -225,6 +225,12 @@ class Model(nn.Module):
        self.info()
        return self
+    def autoshape(self):  # add AutoShape module
+        LOGGER.info('Adding AutoShape... ')
+        m = AutoShape(self)  # wrap model
+        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        return m
    def info(self, verbose=False, img_size=640):  # print model information
        model_info(self, verbose, img_size)
@@ -241,7 +247,7 @@ class Model(nn.Module):
 def parse_model(d, ch):  # model_dict, input_channels(3)
-    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
@@ -269,7 +275,7 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
-            c2 = sum(ch[x] for x in f)
+            c2 = sum([ch[x] for x in f])
        elif m is Detect:
            args.append([ch[x] for x in f])
            if isinstance(args[1], int):  # number of anchors
@@ -281,11 +287,11 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
        else:
            c2 = ch[f]
-        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
-        np = sum(x.numel() for x in m_.parameters())  # number params
+        np = sum([x.numel() for x in m_.parameters()])  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}')  # print
+        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        if i == 0:
@@ -299,10 +305,10 @@ if __name__ == '__main__':
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--profile', action='store_true', help='profile model speed')
-    parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
    opt = parser.parse_args()
    opt.cfg = check_yaml(opt.cfg)  # check YAML
    print_args(FILE.stem, opt)
+    set_logging()
    device = select_device(opt.device)
    # Create model
@@ -314,14 +320,6 @@ if __name__ == '__main__':
        img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
        y = model(img, profile=True)
-    # Test all models
-    if opt.test:
-        for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
-            try:
-                _ = Model(cfg)
-            except Exception as e:
-                print(f'Error in {cfg}: {e}')
    # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
    # from torch.utils.tensorboard import SummaryWriter
    # tb_writer = SummaryWriter('.')

--- a/PyTorch/Compute-Vision/Objection/yolov5/requirements.txt
+++ b/PyTorch/Compute-Vision/Objection/yolov5/requirements.txt
@@ -27,7 +27,6 @@ seaborn>=0.11.0
 # scikit-learn==0.19.2  # CoreML quantization
 # tensorflow>=2.4.1  # TFLite export
 # tensorflowjs>=3.9.0  # TF.js export
-# openvino-dev  # OpenVINO export
 # Extras --------------------------------------
 # albumentations>=1.0.3

--- a/PyTorch/Compute-Vision/Objection/yolov5/setup.cfg
+++ b/PyTorch/Compute-Vision/Objection/yolov5/setup.cfg
-# Project-wide configuration file, can be used for package metadata and other toll configurations
-# Example usage: global configuration for PEP8 (via flake8) setting or default pytest arguments
-[metadata]
-license_file = LICENSE
-description-file = README.md
-[tool:pytest]
-norecursedirs =
-    .git
-    dist
-    build
-addopts =
-    --doctest-modules
-    --durations=25
-    --color=yes
-[flake8]
-max-line-length = 120
-exclude = .tox,*.egg,build,temp
-select = E,W,F
-doctests = True
-verbose = 2
-# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
-format = pylint
-# see: https://www.flake8rules.com/
-ignore =
-    E731  # Do not assign a lambda expression, use a def
-    F405
-    E402
-    F841
-    E741
-    F821
-    E722
-    F401
-    W504
-    E127
-    W504
-    E231
-    E501
-    F403
-    E302
-    F541
-[isort]
-# https://pycqa.github.io/isort/docs/configuration/options.html
-line_length = 120
-multi_line_output = 0
--- a/PyTorch/Compute-Vision/Objection/yolov5/single_process.sh
+++ b/PyTorch/Compute-Vision/Objection/yolov5/single_process.sh
+#!/bin/bash
+export MIOPEN_DEBUG_DISABLE_FIND_DB=1
+export NCCL_SOCKET_IFNAME=ib0
+export HSA_USERPTR_FOR_PAGED_MEM=0
+module rm compiler/dtk/21.10
+module load compiler/dtk/22.04.2
+lrank=$OMPI_COMM_WORLD_LOCAL_RANK
+comm_rank=$OMPI_COMM_WORLD_RANK
+comm_size=$OMPI_COMM_WORLD_SIZE
+echo $lrank
+echo $comm_rank
+echo $comm_size
+APP="python3 `pwd`/train_multi.py --batch 128 --dist-url tcp://${1}:34567 --dist-backend nccl --world-size=${comm_size} --rank=${comm_rank} --local_rank=${lrank} --data coco.yaml --weight yolov5m.pt --project yolov5m/train --hyp data/hyps/hyp.scratch-high.yaml --cfg yolov5m.yaml --epochs 5000"
+case ${lrank} in
+[0])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_0:1
+  export UCX_IB_PCI_BW=mlx5_0:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
+  #echo GLOO_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP} 
+  #GLOO_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
+  ;;
+[1])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_1:1
+  export UCX_IB_PCI_BW=mlx5_1:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=1 --membind=1 ${APP}
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=1 --membind=1 ${APP}
+  ;;
+[2])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_2:1
+  export UCX_IB_PCI_BW=mlx5_2:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=2 --membind=2 ${APP} 
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=2 --membind=2 ${APP}
+  ;;
+[3])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_3:1
+  export UCX_IB_PCI_BW=mlx5_3:50Gbs
+  echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=3 --membind=3 ${APP}
+  NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=3 --membind=3 ${APP}
+  ;;
+esac
--- a/PyTorch/Compute-Vision/Objection/yolov5/train.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/train.py
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
-Train a YOLOv5 model on a custom dataset.
+Train a YOLOv5 model on a custom dataset
-Models and datasets download automatically from the latest YOLOv5 release.
-Models: https://github.com/ultralytics/yolov5/tree/master/models
-Datasets: https://github.com/ultralytics/yolov5/tree/master/data
-Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
 Usage:
-    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (RECOMMENDED)
+    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
-    $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
 """
 import argparse
+import logging
 import math
 import os
 import random
 import sys
 import time
 from copy import deepcopy
-from datetime import datetime
 from pathlib import Path
 import numpy as np
@@ -29,7 +23,7 @@ import torch.nn as nn
 import yaml
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from torch.optim import Adam, SGD, lr_scheduler
 from tqdm import tqdm
 FILE = Path(__file__).resolve()
@@ -42,21 +36,21 @@ import val  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
 from utils.autoanchor import check_anchors
-from utils.autobatch import check_train_batch_size
-from utils.callbacks import Callbacks
 from utils.datasets import create_dataloader
+from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
+    strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
+    check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
 from utils.downloads import attempt_download
-from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements,
-                           check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
-                           intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle,
-                           print_args, print_mutation, strip_optimizer)
-from utils.loggers import Loggers
-from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.loss import ComputeLoss
+from utils.plots import plot_labels, plot_evolve
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
+    torch_distributed_zero_first
+from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.metrics import fitness
-from utils.plots import plot_evolve, plot_labels
+from utils.loggers import Loggers
-from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
+from utils.callbacks import Callbacks
+LOGGER = logging.getLogger(__name__)
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
@@ -67,7 +61,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
          device,
          callbacks
          ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
@@ -83,14 +77,13 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
    # Save run settings
-    if not evolve:
+    with open(save_dir / 'hyp.yaml', 'w') as f:
-        with open(save_dir / 'hyp.yaml', 'w') as f:
+        yaml.safe_dump(hyp, f, sort_keys=False)
-            yaml.safe_dump(hyp, f, sort_keys=False)
+    with open(save_dir / 'opt.yaml', 'w') as f:
-        with open(save_dir / 'opt.yaml', 'w') as f:
+        yaml.safe_dump(vars(opt), f, sort_keys=False)
-            yaml.safe_dump(vars(opt), f, sort_keys=False)
+    data_dict = None
    # Loggers
-    data_dict = None
    if RANK in [-1, 0]:
        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
        if loggers.wandb:
@@ -112,7 +105,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
-    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
+    is_coco = data.endswith('coco.yaml') and nc == 80  # COCO dataset
    # Model
    check_suffix(weights, '.pt')  # check weights
@@ -131,22 +124,13 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
    # Freeze
-    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    freeze = [f'model.{x}.' for x in range(freeze)]  # layers to freeze
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
-            LOGGER.info(f'freezing {k}')
+            print(f'freezing {k}')
            v.requires_grad = False
-    # Image size
-    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
-    # Batch size
-    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
-        batch_size = check_train_batch_size(model, imgsz)
-        loggers.on_params_update({"batch_size": batch_size})
    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
@@ -162,10 +146,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
            g1.append(v.weight)
-    if opt.optimizer == 'Adam':
+    if opt.adam:
        optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
-    elif opt.optimizer == 'AdamW':
-        optimizer = AdamW(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
    else:
        optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
@@ -208,10 +190,15 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        del ckpt, csd
+    # Image sizes
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
    # DP mode
    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
-        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+        logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
-                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
+                        'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
        model = torch.nn.DataParallel(model)
    # SyncBatchNorm
@@ -223,7 +210,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
                                              hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
                                              workers=workers, image_weights=opt.image_weights, quad=opt.quad,
-                                              prefix=colorstr('train: '), shuffle=True)
+                                              prefix=colorstr('train: '))
    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
    nb = len(train_loader)  # number of batches
    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
@@ -254,11 +241,10 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    if cuda and RANK != -1:
        model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
-    # Model attributes
+    # Model parameters
-    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp['box'] *= 3. / nl  # scale to layers
-    hyp['box'] *= 3 / nl  # scale to layers
+    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
-    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
    hyp['label_smoothing'] = opt.label_smoothing
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
@@ -277,7 +263,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
    stopper = EarlyStopping(patience=opt.patience)
    compute_loss = ComputeLoss(model)  # init loss class
    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
-                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+                f'Using {train_loader.num_workers} dataloader workers\n'
                f"Logging results to {colorstr('bold', save_dir)}\n"
                f'Starting training for {epochs} epochs...')
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
@@ -299,11 +285,11 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
        pbar = enumerate(train_loader)
        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
        if RANK in [-1, 0]:
-            pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
+            pbar = tqdm(pbar, total=nb)  # progress bar
        optimizer.zero_grad()
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
            # Warmup
            if ni <= nw:
@@ -390,8 +376,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                        'ema': deepcopy(ema.ema).half(),
                        'updates': ema.updates,
                        'optimizer': optimizer.state_dict(),
-                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
+                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
-                        'date': datetime.now().isoformat()}
                # Save last, best and delete
                torch.save(ckpt, last)
@@ -438,10 +423,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                                            plots=True,
                                            callbacks=callbacks,
                                            compute_loss=compute_loss)  # val best model with plots
-                    if is_coco:
-                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
-        callbacks.run('on_train_end', last, best, plots, epoch, results)
+        callbacks.run('on_train_end', last, best, plots, epoch)
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
    torch.cuda.empty_cache()
@@ -455,13 +438,13 @@ def parse_opt(known=False):
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
-    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
-    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
@@ -469,9 +452,9 @@ def parse_opt(known=False):
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
-    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
+    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
-    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
+    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
@@ -479,13 +462,13 @@ def parse_opt(known=False):
    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
-    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
+    parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
-    parser.add_argument('--save-period', type=int, default=1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
    # Weights & Biases arguments
    parser.add_argument('--entity', default=None, help='W&B: Entity')
-    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
+    parser.add_argument('--upload_dataset', action='store_true', help='W&B: Upload dataset as artifact table')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
@@ -495,6 +478,7 @@ def parse_opt(known=False):
 def main(opt, callbacks=Callbacks()):
    # Checks
+    set_logging(RANK)
    if RANK in [-1, 0]:
        print_args(FILE.stem, opt)
        check_git_status()
@@ -618,9 +602,9 @@ def main(opt, callbacks=Callbacks()):
        # Plot results
        plot_evolve(evolve_csv)
-        LOGGER.info(f'Hyperparameter evolution finished\n'
+        print(f'Hyperparameter evolution finished\n'
-                    f"Results saved to {colorstr('bold', save_dir)}\n"
+              f"Results saved to {colorstr('bold', save_dir)}\n"
-                    f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
+              f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
 def run(**kwargs):

--- a/PyTorch/Compute-Vision/Objection/yolov5/train_multi.py
+++ b/PyTorch/Compute-Vision/Objection/yolov5/train_multi.py
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset
+Usage:
+    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
+"""
+import argparse
+import logging
+import math
+import os
+import random
+import sys
+import time
+from copy import deepcopy
+from pathlib import Path
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.cuda import amp
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.optim import Adam, SGD, lr_scheduler
+from tqdm import tqdm
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+import val  # for end-of-epoch mAP
+from models.experimental import attempt_load
+from models.yolo import Model
+from utils.autoanchor import check_anchors
+from utils.datasets import create_dataloader
+from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
+    strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
+    check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
+from utils.downloads import attempt_download
+from utils.loss import ComputeLoss
+from utils.plots import plot_labels, plot_evolve
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
+    torch_distributed_zero_first
+from utils.loggers.wandb.wandb_utils import check_wandb_resume
+from utils.metrics import fitness
+from utils.loggers import Loggers
+from utils.callbacks import Callbacks
+LOGGER = logging.getLogger(__name__)
+#LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
+#LOCAL_RANK = int(torch.cuda.device_count() - 1)
+#LOCAL_RANK2 = int(os.environ['LOCAL_RANK'])
+#LOCAL_RANK = int(os.getenv('LOCAL_RANK'),-1)  # https://pytorch.org/docs/stable/elastic/run.html
+#LOCAL_RANK = int(os.environ['LOCAL_RANK'])
+#print('LOCAL_RANK:',LOCAL_RANK)
+#print('LOCAL_RANK2:',LOCAL_RANK2)
+print('torch.cuda.device_count():',torch.cuda.device_count())
+#RANK = int(os.getenv('RANK', -1))
+#WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
+def train(hyp,  # path/to/hyp.yaml or hyp dictionary
+          opt,
+          device,
+          callbacks
+          ):
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
+    # Directories
+    w = save_dir / 'weights'  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / 'last.pt', w / 'best.pt'
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    # Save run settings
+    with open(save_dir / 'hyp.yaml', 'w') as f:
+        yaml.safe_dump(hyp, f, sort_keys=False)
+    with open(save_dir / 'opt.yaml', 'w') as f:
+        yaml.safe_dump(vars(opt), f, sort_keys=False)
+    data_dict = None
+    # Loggers
+    if opt.rank in [-1, 0]:
+        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
+        if loggers.wandb:
+            data_dict = loggers.wandb.data_dict
+            if resume:
+                weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+    # Config
+    plots = not evolve  # create plots
+    cuda = device.type != 'cpu'
+    init_seeds(1 + opt.rank)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict['train'], data_dict['val']
+    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
+    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
+    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
+    is_coco = data.endswith('coco.yaml') and nc == 80  # COCO dataset
+    # Model
+    check_suffix(weights, '.pt')  # check weights
+    pretrained = weights.endswith('.pt')
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location=device)  # load checkpoint
+        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
+        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
+    else:
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+    # Freeze
+    freeze = [f'model.{x}.' for x in range(freeze)]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        if any(x in k for x in freeze):
+            print(f'freezing {k}')
+            v.requires_grad = False
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
+    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
+    g0, g1, g2 = [], [], []  # optimizer parameter groups
+    for v in model.modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
+            g2.append(v.bias)
+        if isinstance(v, nn.BatchNorm2d):  # weight (no decay)
+            g0.append(v.weight)
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
+            g1.append(v.weight)
+    if opt.adam:
+        optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    else:
+        optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
+    optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']})  # add g1 with weight_decay
+    optimizer.add_param_group({'params': g2})  # add g2 (biases)
+    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
+                f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias")
+    del g0, g1, g2
+    # Scheduler
+    if opt.linear_lr:
+        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    else:
+        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+    # EMA
+    ema = ModelEMA(model) if opt.rank in [-1, 0] else None
+    # Resume
+    start_epoch, best_fitness = 0, 0.0
+    if pretrained:
+        # Optimizer
+        if ckpt['optimizer'] is not None:
+            optimizer.load_state_dict(ckpt['optimizer'])
+            best_fitness = ckpt['best_fitness']
+        # EMA
+        if ema and ckpt.get('ema'):
+            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
+            ema.updates = ckpt['updates']
+        # Epochs
+        start_epoch = ckpt['epoch'] + 1
+        if resume:
+            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
+        if epochs < start_epoch:
+            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
+            epochs += ckpt['epoch']  # finetune additional epochs
+        del ckpt, csd
+    # Image sizes
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+    # DP mode
+    if cuda and opt.rank == -1 and torch.cuda.device_count() > 1:
+        logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
+                        'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
+        model = torch.nn.DataParallel(model)
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and opt.rank != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info('Using SyncBatchNorm()')
+    # Trainloader
+    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // opt.world_size, gs, single_cls,
+                                              hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
+                                              workers=workers, image_weights=opt.image_weights, quad=opt.quad,
+                                              prefix=colorstr('train: '))
+    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
+    nb = len(train_loader)  # number of batches
+    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
+    # Process 0
+    if opt.rank in [-1, 0]:
+        val_loader = create_dataloader(val_path, imgsz, batch_size // opt.world_size * 2, gs, single_cls,
+                                       hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1,
+                                       workers=workers, pad=0.5,
+                                       prefix=colorstr('val: '))[0]
+        if not resume:
+            labels = np.concatenate(dataset.labels, 0)
+            # c = torch.tensor(labels[:, 0])  # classes
+            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
+            # model._initialize_biases(cf.to(device))
+            if plots:
+                plot_labels(labels, names, save_dir)
+            # Anchors
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+            model.half().float()  # pre-reduce anchor precision
+        callbacks.run('on_pretrain_routine_end')
+    # DDP mode
+    if cuda and opt.rank != -1:
+        model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
+    # Model parameters
+    hyp['box'] *= 3. / nl  # scale to layers
+    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
+    hyp['label_smoothing'] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+    # Start training
+    t0 = time.time()
+    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = amp.GradScaler(enabled=cuda)
+    stopper = EarlyStopping(patience=opt.patience)
+    compute_loss = ComputeLoss(model)  # init loss class
+    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
+                f'Using {train_loader.num_workers} dataloader workers\n'
+                f"Logging results to {colorstr('bold', save_dir)}\n"
+                f'Starting training for {epochs} epochs...')
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        model.train()
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+        mloss = torch.zeros(3, device=device)  # mean losses
+        if opt.rank != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
+        if opt.rank in [-1, 0]:
+            pbar = tqdm(pbar, total=nb)  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                    if 'momentum' in x:
+                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+            # Forward
+            with amp.autocast(enabled=cuda):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                if opt.rank != -1:
+                    loss *= opt.world_size  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.
+            # Backward
+            scaler.scale(loss).backward()
+            # Optimize
+            if ni - last_opt_step >= accumulate:
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+            # Log
+            if opt.rank in [-1, 0]:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
+                pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
+                    f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
+                callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots, opt.sync_bn)
+            # end batch ------------------------------------------------------------------------------------------------
+        # Scheduler
+        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+        if opt.rank in [-1, 0]:
+            # mAP
+            callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = val.run(data_dict,
+                                           batch_size=batch_size // opt.world_size * 2,
+                                           imgsz=imgsz,
+                                           model=ema.ema,
+                                           single_cls=single_cls,
+                                           dataloader=val_loader,
+                                           save_dir=save_dir,
+                                           plots=False,
+                                           callbacks=callbacks,
+                                           compute_loss=compute_loss)
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {'epoch': epoch,
+                        'best_fitness': best_fitness,
+                        'model': deepcopy(de_parallel(model)).half(),
+                        'ema': deepcopy(ema.ema).half(),
+                        'updates': ema.updates,
+                        'optimizer': optimizer.state_dict(),
+                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
+                    torch.save(ckpt, w / f'epoch{epoch}.pt')
+                del ckpt
+                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+            # Stop Single-GPU
+            if opt.rank == -1 and stopper(epoch=epoch, fitness=fi):
+                break
+            # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
+            # stop = stopper(epoch=epoch, fitness=fi)
+            # if RANK == 0:
+            #    dist.broadcast_object_list([stop], 0)  # broadcast 'stop' to all ranks
+        # Stop DPP
+        # with torch_distributed_zero_first(RANK):
+        # if stop:
+        #    break  # must break all DDP ranks
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if opt.rank in [-1, 0]:
+        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f'\nValidating {f}...')
+                    results, _, _ = val.run(data_dict,
+                                            batch_size=batch_size // opt.world_size * 2,
+                                            imgsz=imgsz,
+                                            model=attempt_load(f, device).half(),
+                                            iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
+                                            single_cls=single_cls,
+                                            dataloader=val_loader,
+                                            save_dir=save_dir,
+                                            save_json=is_coco,
+                                            verbose=True,
+                                            plots=True,
+                                            callbacks=callbacks,
+                                            compute_loss=compute_loss)  # val best model with plots
+        callbacks.run('on_train_end', last, best, plots, epoch)
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+    torch.cuda.empty_cache()
+    return results
+def parse_opt(known=False):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=300)
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
+    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
+    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
+    # Weights & Biases arguments
+    parser.add_argument('--entity', default=None, help='W&B: Entity')
+    parser.add_argument('--upload_dataset', action='store_true', help='W&B: Upload dataset as artifact table')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
+    parser.add_argument('--rank', default=-1, type=int,help='node rank for distributed training')
+    parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+    parser.add_argument('--dist-backend', default='nccl', type=str,
+                    help='distributed backend')
+    parser.add_argument('--world-size', default=-1, type=int,
+                    help='number of nodes for distributed training')
+    opt = parser.parse_known_args()[0] if known else parser.parse_args()
+    return opt
+def main(opt, callbacks=Callbacks()):
+    # Checks
+    set_logging(opt.rank)
+    if opt.rank in [-1, 0]:
+        print_args(FILE.stem, opt)
+        check_git_status()
+        check_requirements(exclude=['thop'])
+    # Resume
+    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
+        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
+        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
+        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
+            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
+        opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
+        LOGGER.info(f'Resuming training from {ckpt}')
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
+            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
+        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
+        if opt.evolve:
+            opt.project = str(ROOT / 'runs/evolve')
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
+        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
+        assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
+        assert not opt.evolve, '--evolve argument is not compatible with DDP training'
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device('cuda', LOCAL_RANK)
+        #dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo",init_method=opt.dist_url,
+                                world_size=opt.world_size, rank=opt.rank)
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+        if opt.world_size > 1 and opt.rank == 0:
+            LOGGER.info('Destroying process group... ')
+            dist.destroy_process_group()
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+                'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+                'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
+                'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+                'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
+                'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
+                'box': (1, 0.02, 0.2),  # box loss gain
+                'cls': (1, 0.2, 4.0),  # cls loss gain
+                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
+                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
+                'iou_t': (0, 0.1, 0.7),  # IoU training threshold
+                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
+                'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
+                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
+                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
+                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
+                'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+                'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
+                'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
+                'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
+                'mixup': (1, 0.0, 1.0),  # image mixup (probability)
+                'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)
+        with open(opt.hyp, errors='ignore') as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if 'anchors' not in hyp:  # anchors commented in hyp.yaml
+                hyp['anchors'] = 3
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
+        if opt.bucket:
+            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}')  # download evolve.csv if exists
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
+                # Select parent(s)
+                parent = 'single'  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
+                if parent == 'single' or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == 'weighted':
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 7] * v[i])  # mutate
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+            # Train mutation
+            results = train(hyp.copy(), opt, device, callbacks)
+            # Write mutation results
+            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
+        # Plot results
+        plot_evolve(evolve_csv)
+        print(f'Hyperparameter evolution finished\n'
+              f"Results saved to {colorstr('bold', save_dir)}\n"
+              f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
+def run(**kwargs):
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+if __name__ == "__main__":
+    #print('torch.backends.cudnn.enabled=False:',torch.backends.cudnn.enabled)
+    opt = parse_opt()
+    LOCAL_RANK = opt.local_rank
+    print('local_rank:',LOCAL_RANK)
+    main(opt)