yolov5-qat

7a650e36 · mashun1 · 7a650e36 · 7a650e36 · 7a650e36 · 7a650e36
Commit 7a650e36 authored Mar 21, 2024 by mashun1
20 changed files
--- a/readme_imgs/image-2.png
+++ b/readme_imgs/image-2.png
--- a/readme_imgs/trt.png
+++ b/readme_imgs/trt.png
--- a/requirements.txt
+++ b/requirements.txt
+# YOLOv5 requirements
+# Usage: pip install -r requirements.txt
+# Base ------------------------------------------------------------------------
+gitpython>=3.1.30
+matplotlib>=3.3
+numpy>=1.23.5
+opencv-python>=4.1.1
+Pillow>=9.4.0
+psutil  # system resources
+PyYAML>=5.3.1
+requests>=2.23.0
+scipy>=1.4.1
+thop>=0.1.1  # FLOPs computation
+torch>=1.8.0  # see https://pytorch.org/get-started/locally (recommended)
+torchvision>=0.9.0
+tqdm>=4.64.0
+ultralytics>=8.0.232
+# protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012
+# Logging ---------------------------------------------------------------------
+# tensorboard>=2.4.1
+# clearml>=1.2.0
+# comet
+# Plotting --------------------------------------------------------------------
+pandas>=1.1.4
+seaborn>=0.11.0
+# Export ----------------------------------------------------------------------
+# coremltools>=6.0  # CoreML export
+# onnx>=1.10.0  # ONNX export
+# onnx-simplifier>=0.4.1  # ONNX simplifier
+# nvidia-pyindex  # TensorRT export
+# nvidia-tensorrt  # TensorRT export
+# scikit-learn<=1.1.2  # CoreML quantization
+# tensorflow>=2.4.0,<=2.13.1  # TF exports (-cpu, -aarch64, -macos)
+# tensorflowjs>=3.9.0  # TF.js export
+# openvino-dev>=2023.0  # OpenVINO export
+# Deploy ----------------------------------------------------------------------
+setuptools>=65.5.1 # Snyk vulnerability fix
+# tritonclient[all]~=2.24.0
+# Extras ----------------------------------------------------------------------
+# ipython  # interactive notebook
+# mss  # screenshots
+# albumentations>=1.0.3
+# pycocotools>=2.0.6  # COCO mAP
--- a/scripts/coco2yolo.py
+++ b/scripts/coco2yolo.py
+"""
+2021/1/24
+COCO 格式的数据集转化为 YOLO 格式的数据集，源代码采取遍历方式，太慢，
+这里改进了一下时间复杂度，从O(nm)改为O(n+m)，但是牺牲了一些内存占用
+--json_path 输入的json文件路径
+--save_path 保存的文件夹名字，默认为当前目录下的labels。
+"""
+import os 
+import json
+from tqdm import tqdm
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('--json_path', default='./instances_val2017.json',type=str, help="input: coco format(json)")
+parser.add_argument('--save_path', default='./labels', type=str, help="specify where to save the output dir of labels")
+arg = parser.parse_args()
+def convert(size, box):
+    dw = 1. / (size[0])
+    dh = 1. / (size[1])
+    x = box[0] + box[2] / 2.0
+    y = box[1] + box[3] / 2.0
+    w = box[2]
+    h = box[3]
+    x = x * dw
+    w = w * dw
+    y = y * dh
+    h = h * dh
+    return (x, y, w, h)
+if __name__ == '__main__':
+    json_file =   arg.json_path # COCO Object Instance 类型的标注
+    ana_txt_save_path = arg.save_path  # 保存的路径
+    data = json.load(open(json_file, 'r'))
+    if not os.path.exists(ana_txt_save_path):
+        os.makedirs(ana_txt_save_path)
+    id_map = {} # coco数据集的id不连续！重新映射一下再输出！
+    for i, category in enumerate(data['categories']): 
+        id_map[category['id']] = i
+    # 通过事先建表来降低时间复杂度
+    max_id = 0
+    for img in data['images']:
+        max_id = max(max_id, img['id'])
+    # 注意这里不能写作 [[]]*(max_id+1)，否则列表内的空列表共享地址
+    img_ann_dict = [[] for i in range(max_id+1)] 
+    for i, ann in enumerate(data['annotations']):
+        img_ann_dict[ann['image_id']].append(i)
+    for img in tqdm(data['images']):
+        filename = img["file_name"]
+        img_width = img["width"]
+        img_height = img["height"]
+        img_id = img["id"]
+        head, tail = os.path.splitext(filename)
+        ana_txt_name = head + ".txt"  # 对应的txt名字，与jpg一致
+        f_txt = open(os.path.join(ana_txt_save_path, ana_txt_name), 'w')
+        '''for ann in data['annotations']:
+            if ann['image_id'] == img_id:
+                box = convert((img_width, img_height), ann["bbox"])
+                f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))'''
+        # 这里可以直接查表而无需重复遍历
+        for ann_id in img_ann_dict[img_id]:
+            ann = data['annotations'][ann_id]
+            box = convert((img_width, img_height), ann["bbox"])
+            f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
+        f_txt.close()
\ No newline at end of file
--- a/scripts/qat.py
+++ b/scripts/qat.py
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+import sys
+import os
+# Add the current directory to PYTHONPATH for yolov5s
+sys.path.insert(0, os.path.abspath("."))
+pydir = os.path.dirname(__file__)
+import yaml
+import collections
+import warnings
+import argparse
+import json
+from pathlib import Path
+# PyTorch
+import torch
+import torch.nn as nn
+# yolov5s
+import val
+from models.yolo import Model
+from models.common import Conv
+from utils.dataloaders import create_dataloader
+from utils.downloads import attempt_download
+from utils.general import init_seeds, check_dataset
+import quantization.quantize as quantize
+from copy import deepcopy
+# Disable all warning
+warnings.filterwarnings("ignore")
+from models.yolo import DetectionModel
+class SummaryTool:
+    def __init__(self, file):
+        self.file = file
+        self.data = []
+    def append(self, item):
+        self.data.append(item)
+        json.dump(self.data, open(self.file, "w"), indent=4)
+def load_yolov5_model(weight, device) -> Model:
+    if 'yolov5l' in weight:
+        cfg = "models/yolov5l.yaml"
+    elif 'yolov5m' in weight:
+        cfg = "models/yolov5m.yaml"
+    elif 'yolov5n' in weight:
+        cfg = "models/yolov5n.yaml"
+    elif 'yolov5s' in weight:
+        cfg = "models/yolov5s.yaml"
+    elif "yolov5x" in weight:
+        cfg = "models/yolov5x.yaml"
+    else:
+        raise NotImplementedError("Only support yolov5[l, m, n, s, x]")
+    model = DetectionModel(cfg=cfg).to(device)
+    attempt_download(weight)
+    weight = torch.load(weight, map_location=device)["model"].state_dict()
+    model.load_state_dict(weight,strict=False)
+    for m in model.modules():
+        if type(m) is nn.Upsample:
+            m.recompute_scale_factor = None  # torch 1.11.0 compatibility
+        elif type(m) is Conv:
+            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+    model.float()
+    model.eval()
+    with torch.no_grad():
+        model.fuse()
+    return model
+# ================== 构建数据集 ==================================
+def create_coco_train_dataloader(cocodir, batch_size=10):
+    with open("data/hyps/hyp.scratch-low.yaml") as f:
+        hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps
+    loader = create_dataloader(
+        f"{cocodir}/train2017.txt", 
+        imgsz=640, 
+        batch_size=batch_size, 
+        augment=True, hyp=hyp, rect=False, cache=False, stride=32,pad=0, image_weights=False)[0]
+    return loader
+def create_coco_val_dataloader(cocodir, batch_size=10, keep_images=None):
+    loader = create_dataloader(
+        f"{cocodir}/val2017.txt", 
+        imgsz=640, 
+        batch_size=batch_size, 
+        augment=False, hyp=None, rect=True, cache=False,stride=32,pad=0.5, image_weights=False)[0]
+    def subclass_len(self):
+        if keep_images is not None:
+            return keep_images
+        return len(self.img_files)
+    loader.dataset.__len__ = subclass_len
+    return loader
+# =======================在coco上测试模型性能=============================
+def evaluate_coco(model, dataloader, using_cocotools = False, save_dir=".", conf_thres=0.001, iou_thres=0.65):
+    if save_dir and os.path.dirname(save_dir) != "":
+        os.makedirs(os.path.dirname(save_dir), exist_ok=True)
+    model = deepcopy(model)
+    return val.run(
+        check_dataset("data/coco.yaml"), 
+        save_dir=Path(save_dir),
+        dataloader=dataloader, conf_thres=conf_thres,iou_thres=iou_thres,model=model,
+        plots=False,save_json=using_cocotools)[0][3]
+# ============================= 导出onnx模型 ======================================
+def export_onnx(model : Model, file, size=640, dynamic_batch=False, noanchor=False):
+    device = next(model.parameters()).device
+    model.float()
+    dummy = torch.zeros(1, 3, size, size, device=device)
+    model.model[-1].concat = True
+    grid_old_func = model.model[-1]._make_grid
+    model.model[-1]._make_grid = lambda *args: [torch.from_numpy(item.cpu().data.numpy()).to(item.device) for item in grid_old_func(*args)]
+    if noanchor:
+        def hook_forward(self, x):
+            for i in range(self.nl):
+                x[i] = self.m[i](x[i])
+                bs, _, ny, nx = map(int, x[i].shape)
+                #x[i] = x[i].view(bs, self.na, self.no, ny * nx).permute(0, 3, 1, 2).contiguous()
+            return x
+        model.model[-1].__class__.forward = hook_forward
+        quantize.export_onnx(model, dummy, file, opset_version=13, 
+            input_names=["images"], output_names=["s8", "s16", "s32"], 
+            dynamic_axes={"images": {0: "batch"}, "s32": {0: "batch"}, "s16": {0: "batch"}, "s8": {0: "batch"}} if dynamic_batch else None
+        )
+    else:
+        quantize.export_onnx(model, dummy, file, opset_version=13, 
+            input_names=["images"], output_names=["outputs"], 
+            dynamic_axes={"images": {0: "batch"}, "outputs": {0: "batch"}} if dynamic_batch else None
+        )
+    model.model[-1].concat = False
+    model.model[-1]._make_grid = grid_old_func
+def cmd_quantize(weight, cocodir, device, ignore_policy, save_ptq, save_qat, supervision_stride, iters, eval_origin, eval_ptq, all_node_with_qdq):
+    quantize.initialize(all_node_with_qdq=all_node_with_qdq)
+    if save_ptq and os.path.dirname(save_ptq) != "":
+        os.makedirs(os.path.dirname(save_ptq), exist_ok=True)
+    if save_qat and os.path.dirname(save_qat) != "":
+        os.makedirs(os.path.dirname(save_qat), exist_ok=True)
+    device  = torch.device(device)
+    model = load_yolov5_model(weight, device)
+    train_dataloader = create_coco_train_dataloader(cocodir)
+    val_dataloader   = create_coco_val_dataloader(cocodir)
+    quantize.replace_bottleneck_forward(model)
+    quantize.replace_to_quantization_module(model, ignore_policy=ignore_policy, all_node_with_qdq=all_node_with_qdq)
+    if not all_node_with_qdq:
+        quantize.apply_custom_rules_to_quantizer(model, export_onnx)
+    quantize.calibrate_model(model, train_dataloader, device)
+    json_save_dir = "." if os.path.dirname(save_ptq) == "" else os.path.dirname(save_ptq)
+    summary_file = os.path.join(json_save_dir, "summary.json")
+    summary = SummaryTool(summary_file)
+    if eval_origin:
+        print("Evaluate Origin...")
+        with quantize.disable_quantization(model):
+            ap = evaluate_coco(model, val_dataloader, True, json_save_dir)
+            summary.append(["Origin", ap])
+    if save_ptq:
+        print(f"Save ptq model to {save_ptq}")
+        torch.save({"model": model}, save_ptq)
+    if eval_ptq:
+        print("Evaluate PTQ...")
+        with quantize.disable_quantization(model.model[24]): #During ONNX export, the model is pruned, so quantization of that layer needs to be turned off when validating accuracy.
+            ap = evaluate_coco(model, val_dataloader, True, json_save_dir)
+            summary.append(["PTQ", ap])
+    if save_qat is None:
+        print("Done as save_qat is None.")
+        return
+    best_ap = 0
+    def per_epoch(model, epoch, lr):
+        nonlocal best_ap
+        with quantize.disable_quantization(model.model[24]):
+            ap = evaluate_coco(model, val_dataloader, True, json_save_dir)
+            summary.append([f"QAT{epoch}", ap])
+        if ap > best_ap:
+            print(f"Save qat model to {save_qat} @ {ap:.5f}")
+            best_ap = ap
+            torch.save({"model": model}, save_qat)
+    def preprocess(datas):
+        return datas[0].to(device).float() / 255.0
+    def supervision_policy():
+        supervision_list = []
+        for item in model.model:
+            supervision_list.append(id(item))
+        keep_idx = list(range(0, len(model.model) - 1, supervision_stride))
+        keep_idx.append(len(model.model) - 2)
+        def impl(name, module):
+            if id(module) not in supervision_list: return False
+            idx = supervision_list.index(id(module))
+            if idx in keep_idx:
+                print(f"Supervision: {name} will compute loss with origin model during QAT training")
+            else:
+                print(f"Supervision: {name} no compute loss during QAT training, that is unsupervised only and doesn't mean don't learn")
+            return idx in keep_idx
+        return impl
+    quantize.finetune(
+        model, train_dataloader, per_epoch, early_exit_batchs_per_epoch=iters, 
+        preprocess=preprocess, supervision_policy=supervision_policy())
+def cmd_export(weight, save, size, dynamic, noanchor, noqadd):
+    quantize.initialize()
+    if save is None:
+        name = os.path.basename(weight)
+        name = name[:name.rfind('.')]
+        save = os.path.join(os.path.dirname(weight), name + ".onnx")
+    model = torch.load(weight, map_location="cpu")["model"]
+    if not noqadd:
+        quantize.replace_bottleneck_forward(model)
+    export_onnx(model, save, size, dynamic_batch=dynamic, noanchor=noanchor)
+    print(f"Save onnx to {save}")
+def cmd_sensitive_analysis(weight, device, cocodir, summary_save, num_image, model):
+    quantize.initialize()
+    device  = torch.device(device)
+    # model   = load_yolov5s_model(weight, device)
+    model = load_yolov5_model(model, weight, device)
+    train_dataloader = create_coco_train_dataloader(cocodir)
+    val_dataloader   = create_coco_val_dataloader(cocodir, keep_images=None if num_image is None or num_image < 1 else num_image)
+    quantize.replace_to_quantization_module(model)
+    quantize.calibrate_model(model, train_dataloader)
+    summary = SummaryTool(summary_save)
+    print("Evaluate PTQ...")
+    ap = evaluate_coco(model, val_dataloader)
+    summary.append([ap, "PTQ"])
+    print("Sensitive analysis by each layer...")
+    for i in range(0, len(model.model)):
+        layer = model.model[i]
+        if quantize.have_quantizer(layer):
+            print(f"Quantization disable model.{i}")
+            quantize.disable_quantization(layer).apply()
+            ap = evaluate_coco(model, val_dataloader)
+            summary.append([ap, f"model.{i}"])
+            quantize.enable_quantization(layer).apply()
+        else:
+            print(f"ignore model.{i} because it is {type(layer)}")
+    summary = sorted(summary.data, key=lambda x:x[0], reverse=True)
+    print("Sensitive summary:")
+    for n, (ap, name) in enumerate(summary[:10]):
+        print(f"Top{n}: Using fp16 {name}, ap = {ap:.5f}")
+def cmd_test(weight, device, cocodir, confidence, nmsthres, model):
+    device  = torch.device(device)
+    # model   = load_yolov5s_model(weight, device)
+    model = load_yolov5_model(model, weight, device)
+    val_dataloader   = create_coco_val_dataloader(cocodir)
+    evaluate_coco(model, val_dataloader, True, conf_thres=confidence, iou_thres=nmsthres)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(prog='qat.py')
+    subps  = parser.add_subparsers(dest="cmd")
+    exp    = subps.add_parser("export", help="Export weight to onnx file")
+    exp.add_argument("weight", type=str, default="yolov5s.pt", help="export pt file")
+    exp.add_argument("--save", type=str, required=False, help="export onnx file")
+    exp.add_argument("--size", type=int, default=640, help="export input size")
+    exp.add_argument("--dynamic", action="store_true", help="export dynamic batch")
+    exp.add_argument("--noanchor", action="store_true", help="export no anchor nodes")
+    exp.add_argument("--noqadd", action="store_true", help="export do not add QuantAdd")
+    qat = subps.add_parser("quantize", help="PTQ/QAT finetune ...")
+    qat.add_argument("weight", type=str, nargs="?", default="yolov5s.pt", help="weight file")
+    qat.add_argument("--cocodir", type=str, default="datasets/coco", help="coco directory")
+    qat.add_argument("--device", type=str, default="cuda:0", help="device")
+    qat.add_argument("--ignore-policy", type=str, default="None", help="regx")
+    # qat.add_argument("--ignore-policy", type=str, default="model\.24\.m\.(.*)", help="regx")
+    qat.add_argument("--ptq", type=str, default="ptq.pt", help="file")
+    qat.add_argument("--qat", type=str, default=None, help="file")
+    qat.add_argument("--supervision-stride", type=int, default=1, help="supervision stride")
+    qat.add_argument("--iters", type=int, default=200, help="iters per epoch")
+    qat.add_argument("--eval-origin", action="store_true", help="do eval for origin model")
+    qat.add_argument("--eval-ptq", action="store_true", help="do eval for ptq model")
+    qat.add_argument("--all-node-with-qdq", action="store_true", help="insert qdq nodes for SiLU, Concat, Add")
+    sensitive = subps.add_parser("sensitive", help="Sensitive layer analysis")
+    sensitive.add_argument("weight", type=str, nargs="?", default="yolov5s.pt", help="weight file")
+    sensitive.add_argument("--device", type=str, default="cuda:0", help="device")
+    sensitive.add_argument("--cocodir", type=str, default="datasets/coco", help="coco directory")
+    sensitive.add_argument("--summary", type=str, default="sensitive-summary.json", help="summary save file")
+    sensitive.add_argument("--num-image", type=int, default=None, help="number of image to evaluate")
+    testcmd = subps.add_parser("test", help="Do evaluate")
+    testcmd.add_argument("weight", type=str, default="yolov5s.pt", help="weight file")
+    testcmd.add_argument("--cocodir", type=str, default="datasets/coco", help="coco directory")
+    testcmd.add_argument("--device", type=str, default="cuda:0", help="device")
+    testcmd.add_argument("--confidence", type=float, default=0.001, help="confidence threshold")
+    testcmd.add_argument("--nmsthres", type=float, default=0.65, help="nms threshold")
+    args = parser.parse_args()
+    init_seeds(57)
+    if args.cmd == "export":
+        cmd_export(args.weight, args.save, args.size, args.dynamic, args.noanchor, args.noqadd)
+    elif args.cmd == "quantize":
+        print(args) 
+        cmd_quantize(
+            args.weight, args.cocodir, args.device, args.ignore_policy, 
+            args.ptq, args.qat, args.supervision_stride, args.iters,
+            args.eval_origin, args.eval_ptq, args.all_node_with_qdq,
+        )
+    elif args.cmd == "sensitive":
+        cmd_sensitive_analysis(args.weight, args.device, args.cocodir, args.summary, args.num_image)
+    elif args.cmd == "test":
+        cmd_test(args.weight, args.device, args.cocodir, args.confidence, args.nmsthres)
+    else:
+        parser.print_help()
--- a/segment/predict.py
+++ b/segment/predict.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Run YOLOv5 segmentation inference on images, videos, directories, streams, etc.
+Usage - sources:
+    $ python segment/predict.py --weights yolov5s-seg.pt --source 0                               # webcam
+                                                                  img.jpg                         # image
+                                                                  vid.mp4                         # video
+                                                                  screen                          # screenshot
+                                                                  path/                           # directory
+                                                                  list.txt                        # list of images
+                                                                  list.streams                    # list of streams
+                                                                  'path/*.jpg'                    # glob
+                                                                  'https://youtu.be/LNwODJXcvt4'  # YouTube
+                                                                  'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+Usage - formats:
+    $ python segment/predict.py --weights yolov5s-seg.pt                 # PyTorch
+                                          yolov5s-seg.torchscript        # TorchScript
+                                          yolov5s-seg.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                          yolov5s-seg_openvino_model     # OpenVINO
+                                          yolov5s-seg.engine             # TensorRT
+                                          yolov5s-seg.mlmodel            # CoreML (macOS-only)
+                                          yolov5s-seg_saved_model        # TensorFlow SavedModel
+                                          yolov5s-seg.pb                 # TensorFlow GraphDef
+                                          yolov5s-seg.tflite             # TensorFlow Lite
+                                          yolov5s-seg_edgetpu.tflite     # TensorFlow Edge TPU
+                                          yolov5s-seg_paddle_model       # PaddlePaddle
+"""
+import argparse
+import os
+import platform
+import sys
+from pathlib import Path
+import torch
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+from ultralytics.utils.plotting import Annotator, colors, save_one_box
+from models.common import DetectMultiBackend
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
+from utils.general import (
+    LOGGER,
+    Profile,
+    check_file,
+    check_img_size,
+    check_imshow,
+    check_requirements,
+    colorstr,
+    cv2,
+    increment_path,
+    non_max_suppression,
+    print_args,
+    scale_boxes,
+    scale_segments,
+    strip_optimizer,
+)
+from utils.segment.general import masks2segments, process_mask, process_mask_native
+from utils.torch_utils import select_device, smart_inference_mode
+@smart_inference_mode()
+def run(
+    weights=ROOT / "yolov5s-seg.pt",  # model.pt path(s)
+    source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
+    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
+    imgsz=(640, 640),  # inference size (height, width)
+    conf_thres=0.25,  # confidence threshold
+    iou_thres=0.45,  # NMS IOU threshold
+    max_det=1000,  # maximum detections per image
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    view_img=False,  # show results
+    save_txt=False,  # save results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_crop=False,  # save cropped prediction boxes
+    nosave=False,  # do not save images/videos
+    classes=None,  # filter by class: --class 0, or --class 0 2 3
+    agnostic_nms=False,  # class-agnostic NMS
+    augment=False,  # augmented inference
+    visualize=False,  # visualize features
+    update=False,  # update all models
+    project=ROOT / "runs/predict-seg",  # save results to project/name
+    name="exp",  # save results to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    line_thickness=3,  # bounding box thickness (pixels)
+    hide_labels=False,  # hide labels
+    hide_conf=False,  # hide confidences
+    half=False,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    vid_stride=1,  # video frame-rate stride
+    retina_masks=False,
+):
+    source = str(source)
+    save_img = not nosave and not source.endswith(".txt")  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
+    webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
+    screenshot = source.lower().startswith("screen")
+    if is_url and is_file:
+        source = check_file(source)  # download
+    # Directories
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+    (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+    # Load model
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+    # Dataloader
+    bs = 1  # batch_size
+    if webcam:
+        view_img = check_imshow(warn=True)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+        bs = len(dataset)
+    elif screenshot:
+        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
+    else:
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+    vid_path, vid_writer = [None] * bs, [None] * bs
+    # Run inference
+    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
+    seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
+    for path, im, im0s, vid_cap, s in dataset:
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+        # Inference
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred, proto = model(im, augment=augment, visualize=visualize)[:2]
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
+        # Second-stage classifier (optional)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            if webcam:  # batch_size >= 1
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f"{i}: "
+            else:
+                p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}")  # im.txt
+            s += "%gx%g " % im.shape[2:]  # print string
+            imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
+            if len(det):
+                if retina_masks:
+                    # scale bbox first the crop masks
+                    det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()  # rescale boxes to im0 size
+                    masks = process_mask_native(proto[i], det[:, 6:], det[:, :4], im0.shape[:2])  # HWC
+                else:
+                    masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
+                    det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()  # rescale boxes to im0 size
+                # Segments
+                if save_txt:
+                    segments = [
+                        scale_segments(im0.shape if retina_masks else im.shape[2:], x, im0.shape, normalize=True)
+                        for x in reversed(masks2segments(masks))
+                    ]
+                # Print results
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                # Mask plotting
+                annotator.masks(
+                    masks,
+                    colors=[colors(x, True) for x in det[:, 5]],
+                    im_gpu=torch.as_tensor(im0, dtype=torch.float16).to(device).permute(2, 0, 1).flip(0).contiguous()
+                    / 255
+                    if retina_masks
+                    else im[i],
+                )
+                # Write results
+                for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
+                    if save_txt:  # Write to file
+                        seg = segments[j].reshape(-1)  # (n,2) to (n*2)
+                        line = (cls, *seg, conf) if save_conf else (cls, *seg)  # label format
+                        with open(f"{txt_path}.txt", "a") as f:
+                            f.write(("%g " * len(line)).rstrip() % line + "\n")
+                    if save_img or save_crop or view_img:  # Add bbox to image
+                        c = int(cls)  # integer class
+                        label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}")
+                        annotator.box_label(xyxy, label, color=colors(c, True))
+                        # annotator.draw.polygon(segments[j], outline=colors(c, True), width=3)
+                    if save_crop:
+                        save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True)
+            # Stream results
+            im0 = annotator.result()
+            if view_img:
+                if platform.system() == "Linux" and p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+                cv2.imshow(str(p), im0)
+                if cv2.waitKey(1) == ord("q"):  # 1 millisecond
+                    exit()
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == "image":
+                    cv2.imwrite(save_path, im0)
+                else:  # 'video' or 'stream'
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                        save_path = str(Path(save_path).with_suffix(".mp4"))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+                    vid_writer[i].write(im0)
+        # Print time (inference-only)
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
+    # Print results
+    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
+    LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
+def parse_opt():
+    """Parses command-line options for YOLOv5 inference including model paths, data sources, inference settings, and
+    output preferences.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
+    parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
+    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
+    parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
+    parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
+    parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--view-img", action="store_true", help="show results")
+    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
+    parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
+    parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes")
+    parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
+    parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
+    parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS")
+    parser.add_argument("--augment", action="store_true", help="augmented inference")
+    parser.add_argument("--visualize", action="store_true", help="visualize features")
+    parser.add_argument("--update", action="store_true", help="update all models")
+    parser.add_argument("--project", default=ROOT / "runs/predict-seg", help="save results to project/name")
+    parser.add_argument("--name", default="exp", help="save results to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)")
+    parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels")
+    parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
+    parser.add_argument("--retina-masks", action="store_true", help="whether to plot masks in native resolution")
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+def main(opt):
+    """Executes YOLOv5 model inference with given options, checking for requirements before launching."""
+    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+    run(**vars(opt))
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/segment/train.py
+++ b/segment/train.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Train a YOLOv5 segment model on a segment dataset Models and datasets download automatically from the latest YOLOv5
+release.
+Usage - Single-GPU training:
+    $ python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640  # from pretrained (recommended)
+    $ python segment/train.py --data coco128-seg.yaml --weights '' --cfg yolov5s-seg.yaml --img 640  # from scratch
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3
+Models:     https://github.com/ultralytics/yolov5/tree/master/models
+Datasets:   https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial:   https://docs.ultralytics.com/yolov5/tutorials/train_custom_data
+"""
+import argparse
+import math
+import os
+import random
+import subprocess
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.optim import lr_scheduler
+from tqdm import tqdm
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+import segment.val as validate  # for end-of-epoch mAP
+from models.experimental import attempt_load
+from models.yolo import SegmentationModel
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.downloads import attempt_download, is_url
+from utils.general import (
+    LOGGER,
+    TQDM_BAR_FORMAT,
+    check_amp,
+    check_dataset,
+    check_file,
+    check_git_info,
+    check_git_status,
+    check_img_size,
+    check_requirements,
+    check_suffix,
+    check_yaml,
+    colorstr,
+    get_latest_run,
+    increment_path,
+    init_seeds,
+    intersect_dicts,
+    labels_to_class_weights,
+    labels_to_image_weights,
+    one_cycle,
+    print_args,
+    print_mutation,
+    strip_optimizer,
+    yaml_save,
+)
+from utils.loggers import GenericLogger
+from utils.plots import plot_evolve, plot_labels
+from utils.segment.dataloaders import create_dataloader
+from utils.segment.loss import ComputeLoss
+from utils.segment.metrics import KEYS, fitness
+from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
+from utils.torch_utils import (
+    EarlyStopping,
+    ModelEMA,
+    de_parallel,
+    select_device,
+    smart_DDP,
+    smart_optimizer,
+    smart_resume,
+    torch_distributed_zero_first,
+)
+LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv("RANK", -1))
+WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
+GIT_INFO = check_git_info()
+def train(hyp, opt, device, callbacks):
+    """
+    Trains the YOLOv5 model on a dataset, managing hyperparameters, model optimization, logging, and validation.
+    `hyp` is path/to/hyp.yaml or hyp dictionary.
+    """
+    (
+        save_dir,
+        epochs,
+        batch_size,
+        weights,
+        single_cls,
+        evolve,
+        data,
+        cfg,
+        resume,
+        noval,
+        nosave,
+        workers,
+        freeze,
+        mask_ratio,
+    ) = (
+        Path(opt.save_dir),
+        opt.epochs,
+        opt.batch_size,
+        opt.weights,
+        opt.single_cls,
+        opt.evolve,
+        opt.data,
+        opt.cfg,
+        opt.resume,
+        opt.noval,
+        opt.nosave,
+        opt.workers,
+        opt.freeze,
+        opt.mask_ratio,
+    )
+    # callbacks.run('on_pretrain_routine_start')
+    # Directories
+    w = save_dir / "weights"  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / "last.pt", w / "best.pt"
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
+    opt.hyp = hyp.copy()  # for saving hyps to checkpoints
+    # Save run settings
+    if not evolve:
+        yaml_save(save_dir / "hyp.yaml", hyp)
+        yaml_save(save_dir / "opt.yaml", vars(opt))
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        logger = GenericLogger(opt=opt, console_logger=LOGGER)
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
+    overlap = not opt.no_overlap
+    cuda = device.type != "cpu"
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict["train"], data_dict["val"]
+    nc = 1 if single_cls else int(data_dict["nc"])  # number of classes
+    names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"]  # class names
+    is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt")  # COCO dataset
+    # Model
+    check_suffix(weights, ".pt")  # check weights
+    pretrained = weights.endswith(".pt")
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location="cpu")  # load checkpoint to CPU to avoid CUDA memory leak
+        model = SegmentationModel(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)
+        exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else []  # exclude keys
+        csd = ckpt["model"].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}")  # report
+    else:
+        model = SegmentationModel(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
+    amp = check_amp(model)  # check AMP
+    # Freeze
+    freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
+        if any(x in k for x in freeze):
+            LOGGER.info(f"freezing {k}")
+            v.requires_grad = False
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        logger.update_params({"batch_size": batch_size})
+        # loggers.on_params_update({"batch_size": batch_size})
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp["weight_decay"] *= batch_size * accumulate / nbs  # scale weight_decay
+    optimizer = smart_optimizer(model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"])
+    # Scheduler
+    if opt.cos_lr:
+        lf = one_cycle(1, hyp["lrf"], epochs)  # cosine 1->hyp['lrf']
+    else:
+        lf = lambda x: (1 - x / epochs) * (1.0 - hyp["lrf"]) + hyp["lrf"]  # linear
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+    # Resume
+    best_fitness, start_epoch = 0.0, 0
+    if pretrained:
+        if resume:
+            best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
+        del ckpt, csd
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning(
+            "WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n"
+            "See Multi-GPU Tutorial at https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training to get started."
+        )
+        model = torch.nn.DataParallel(model)
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info("Using SyncBatchNorm()")
+    # Trainloader
+    train_loader, dataset = create_dataloader(
+        train_path,
+        imgsz,
+        batch_size // WORLD_SIZE,
+        gs,
+        single_cls,
+        hyp=hyp,
+        augment=True,
+        cache=None if opt.cache == "val" else opt.cache,
+        rect=opt.rect,
+        rank=LOCAL_RANK,
+        workers=workers,
+        image_weights=opt.image_weights,
+        quad=opt.quad,
+        prefix=colorstr("train: "),
+        shuffle=True,
+        mask_downsample_ratio=mask_ratio,
+        overlap_mask=overlap,
+    )
+    labels = np.concatenate(dataset.labels, 0)
+    mlc = int(labels[:, 0].max())  # max label class
+    assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
+    # Process 0
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(
+            val_path,
+            imgsz,
+            batch_size // WORLD_SIZE * 2,
+            gs,
+            single_cls,
+            hyp=hyp,
+            cache=None if noval else opt.cache,
+            rect=True,
+            rank=-1,
+            workers=workers * 2,
+            pad=0.5,
+            mask_downsample_ratio=mask_ratio,
+            overlap_mask=overlap,
+            prefix=colorstr("val: "),
+        )[0]
+        if not resume:
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz)  # run AutoAnchor
+            model.half().float()  # pre-reduce anchor precision
+            if plots:
+                plot_labels(labels, names, save_dir)
+        # callbacks.run('on_pretrain_routine_end', labels, names)
+    # DDP mode
+    if cuda and RANK != -1:
+        model = smart_DDP(model)
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp["box"] *= 3 / nl  # scale to layers
+    hyp["cls"] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp["obj"] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp["label_smoothing"] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+    # Start training
+    t0 = time.time()
+    nb = len(train_loader)  # number of batches
+    nw = max(round(hyp["warmup_epochs"] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
+    compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
+    # callbacks.run('on_train_start')
+    LOGGER.info(
+        f'Image sizes {imgsz} train, {imgsz} val\n'
+        f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+        f"Logging results to {colorstr('bold', save_dir)}\n"
+        f'Starting training for {epochs} epochs...'
+    )
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        # callbacks.run('on_train_epoch_start')
+        model.train()
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+        mloss = torch.zeros(4, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(
+            ("\n" + "%11s" * 8)
+            % ("Epoch", "GPU_mem", "box_loss", "seg_loss", "obj_loss", "cls_loss", "Instances", "Size")
+        )
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT)  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _, masks) in pbar:  # batch ------------------------------------------------------
+            # callbacks.run('on_train_batch_start')
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x["lr"] = np.interp(ni, xi, [hyp["warmup_bias_lr"] if j == 0 else 0.0, x["initial_lr"] * lf(epoch)])
+                    if "momentum" in x:
+                        x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
+            # Forward
+            with torch.cuda.amp.autocast(amp):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float())
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.0
+            # Backward
+            scaler.scale(loss).backward()
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
+            if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+            # Log
+            if RANK in {-1, 0}:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G"  # (GB)
+                pbar.set_description(
+                    ("%11s" * 2 + "%11.4g" * 6)
+                    % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])
+                )
+                # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths)
+                # if callbacks.stop_training:
+                #    return
+                # Mosaic plots
+                if plots:
+                    if ni < 3:
+                        plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg")
+                    if ni == 10:
+                        files = sorted(save_dir.glob("train*.jpg"))
+                        logger.log_images(files, "Mosaics", epoch)
+            # end batch ------------------------------------------------------------------------------------------------
+        # Scheduler
+        lr = [x["lr"] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+        if RANK in {-1, 0}:
+            # mAP
+            # callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = validate.run(
+                    data_dict,
+                    batch_size=batch_size // WORLD_SIZE * 2,
+                    imgsz=imgsz,
+                    half=amp,
+                    model=ema.ema,
+                    single_cls=single_cls,
+                    dataloader=val_loader,
+                    save_dir=save_dir,
+                    plots=False,
+                    callbacks=callbacks,
+                    compute_loss=compute_loss,
+                    mask_downsample_ratio=mask_ratio,
+                    overlap=overlap,
+                )
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
+            # Log val metrics and media
+            metrics_dict = dict(zip(KEYS, log_vals))
+            logger.log_metrics(metrics_dict, epoch)
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    "epoch": epoch,
+                    "best_fitness": best_fitness,
+                    "model": deepcopy(de_parallel(model)).half(),
+                    "ema": deepcopy(ema.ema).half(),
+                    "updates": ema.updates,
+                    "optimizer": optimizer.state_dict(),
+                    "opt": vars(opt),
+                    "git": GIT_INFO,  # {remote, branch, commit} if a git repo
+                    "date": datetime.now().isoformat(),
+                }
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f"epoch{epoch}.pt")
+                    logger.log_model(w / f"epoch{epoch}.pt")
+                del ckpt
+                # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.")
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f"\nValidating {f}...")
+                    results, _, _ = validate.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools at iou 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss,
+                        mask_downsample_ratio=mask_ratio,
+                        overlap=overlap,
+                    )  # val best model with plots
+                    if is_coco:
+                        # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+                        metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
+                        logger.log_metrics(metrics_dict, epoch)
+        # callbacks.run('on_train_end', last, best, epoch, results)
+        # on train end callback using genericLogger
+        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs)
+        if not opt.evolve:
+            logger.log_model(best, epoch)
+        if plots:
+            plot_results_with_masks(file=save_dir / "results.csv")  # save results.png
+            files = ["results.png", "confusion_matrix.png", *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R"))]
+            files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
+            LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+            logger.log_images(files, "Results", epoch + 1)
+            logger.log_images(sorted(save_dir.glob("val*.jpg")), "Validation", epoch + 1)
+    torch.cuda.empty_cache()
+    return results
+def parse_opt(known=False):
+    """
+    Parses command line arguments for training configurations, returning parsed arguments.
+    Supports both known and unknown args.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s-seg.pt", help="initial weights path")
+    parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
+    parser.add_argument("--hyp", type=str, default=ROOT / "data/hyps/hyp.scratch-low.yaml", help="hyperparameters path")
+    parser.add_argument("--epochs", type=int, default=100, help="total training epochs")
+    parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs, -1 for autobatch")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)")
+    parser.add_argument("--rect", action="store_true", help="rectangular training")
+    parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training")
+    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
+    parser.add_argument("--noval", action="store_true", help="only validate final epoch")
+    parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
+    parser.add_argument("--noplots", action="store_true", help="save no plot files")
+    parser.add_argument("--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations")
+    parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
+    parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk")
+    parser.add_argument("--image-weights", action="store_true", help="use weighted image selection for training")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
+    parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class")
+    parser.add_argument("--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer")
+    parser.add_argument("--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--project", default=ROOT / "runs/train-seg", help="save to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--quad", action="store_true", help="quad dataloader")
+    parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler")
+    parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
+    parser.add_argument("--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)")
+    parser.add_argument("--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2")
+    parser.add_argument("--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)")
+    parser.add_argument("--seed", type=int, default=0, help="Global training seed")
+    parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")
+    # Instance Segmentation Args
+    parser.add_argument("--mask-ratio", type=int, default=4, help="Downsample the truth masks to saving memory")
+    parser.add_argument("--no-overlap", action="store_true", help="Overlap masks train faster at slightly less mAP")
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+def main(opt, callbacks=Callbacks()):
+    """Initializes training or evolution of YOLOv5 models based on provided configuration and options."""
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(ROOT / "requirements.txt")
+    # Resume
+    if opt.resume and not opt.evolve:  # resume from specified or most recent last.pt
+        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
+        opt_yaml = last.parent.parent / "opt.yaml"  # train options yaml
+        opt_data = opt.data  # original dataset
+        if opt_yaml.is_file():
+            with open(opt_yaml, errors="ignore") as f:
+                d = yaml.safe_load(f)
+        else:
+            d = torch.load(last, map_location="cpu")["opt"]
+        opt = argparse.Namespace(**d)  # replace
+        opt.cfg, opt.weights, opt.resume = "", str(last), True  # reinstate
+        if is_url(opt_data):
+            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
+            check_file(opt.data),
+            check_yaml(opt.cfg),
+            check_yaml(opt.hyp),
+            str(opt.weights),
+            str(opt.project),
+        )  # checks
+        assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
+        if opt.evolve:
+            if opt.project == str(ROOT / "runs/train-seg"):  # if default project name, rename to runs/evolve-seg
+                opt.project = str(ROOT / "runs/evolve-seg")
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == "cfg":
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
+        assert not opt.image_weights, f"--image-weights {msg}"
+        assert not opt.evolve, f"--evolve {msg}"
+        assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
+        assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
+        assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device("cuda", LOCAL_RANK)
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {
+            "lr0": (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            "lrf": (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            "momentum": (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+            "weight_decay": (1, 0.0, 0.001),  # optimizer weight decay
+            "warmup_epochs": (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+            "warmup_momentum": (1, 0.0, 0.95),  # warmup initial momentum
+            "warmup_bias_lr": (1, 0.0, 0.2),  # warmup initial bias lr
+            "box": (1, 0.02, 0.2),  # box loss gain
+            "cls": (1, 0.2, 4.0),  # cls loss gain
+            "cls_pw": (1, 0.5, 2.0),  # cls BCELoss positive_weight
+            "obj": (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            "obj_pw": (1, 0.5, 2.0),  # obj BCELoss positive_weight
+            "iou_t": (0, 0.1, 0.7),  # IoU training threshold
+            "anchor_t": (1, 2.0, 8.0),  # anchor-multiple threshold
+            "anchors": (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            "fl_gamma": (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            "hsv_h": (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            "hsv_s": (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            "hsv_v": (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            "degrees": (1, 0.0, 45.0),  # image rotation (+/- deg)
+            "translate": (1, 0.0, 0.9),  # image translation (+/- fraction)
+            "scale": (1, 0.0, 0.9),  # image scale (+/- gain)
+            "shear": (1, 0.0, 10.0),  # image shear (+/- deg)
+            "perspective": (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            "flipud": (1, 0.0, 1.0),  # image flip up-down (probability)
+            "fliplr": (0, 0.0, 1.0),  # image flip left-right (probability)
+            "mosaic": (1, 0.0, 1.0),  # image mixup (probability)
+            "mixup": (1, 0.0, 1.0),  # image mixup (probability)
+            "copy_paste": (1, 0.0, 1.0),
+        }  # segment copy-paste (probability)
+        with open(opt.hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if "anchors" not in hyp:  # anchors commented in hyp.yaml
+                hyp["anchors"] = 3
+        if opt.noautoanchor:
+            del hyp["anchors"], meta["anchors"]
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / "hyp_evolve.yaml", save_dir / "evolve.csv"
+        if opt.bucket:
+            # download evolve.csv if exists
+            subprocess.run(
+                [
+                    "gsutil",
+                    "cp",
+                    f"gs://{opt.bucket}/evolve.csv",
+                    str(evolve_csv),
+                ]
+            )
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
+                # Select parent(s)
+                parent = "single"  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=",", skiprows=1)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min() + 1e-6  # weights (sum > 0)
+                if parent == "single" or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == "weighted":
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 12] * v[i])  # mutate
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+            # Train mutation
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
+            # Write mutation results
+            print_mutation(KEYS[4:16], results, hyp.copy(), save_dir, opt.bucket)
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(
+            f'Hyperparameter evolution finished {opt.evolve} generations\n'
+            f"Results saved to {colorstr('bold', save_dir)}\n"
+            f'Usage example: $ python train.py --hyp {evolve_yaml}'
+        )
+def run(**kwargs):
+    """
+    Executes YOLOv5 training with given parameters, altering options programmatically; returns updated options.
+    Example: mport train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    """
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/segment/tutorial.ipynb
+++ b/segment/tutorial.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t6MPjfT5NrKQ"
+      },
+      "source": [
+        "<div align=\"center\">\n",
+        "\n",
+        "  <a href=\"https://ultralytics.com/yolov5\" target=\"_blank\">\n",
+        "    <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png\"></a>\n",
+        "\n",
+        "\n",
+        "<br>\n",
+        "  <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a>\n",
+        "  <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/segment/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
+        "  <a href=\"https://www.kaggle.com/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
+        "<br>\n",
+        "\n",
+        "This <a href=\"https://github.com/ultralytics/yolov5\">YOLOv5</a> 🚀 notebook by <a href=\"https://ultralytics.com\">Ultralytics</a> presents simple train, validate and predict examples to help start your AI adventure.<br>See <a href=\"https://github.com/ultralytics/yolov5/issues/new/choose\">GitHub</a> for community support or <a href=\"https://ultralytics.com/contact\">contact us</a> for professional support.\n",
+        "\n",
+        "</div>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7mGmQbAO5pQb"
+      },
+      "source": [
+        "# Setup\n",
+        "\n",
+        "Clone GitHub [repository](https://github.com/ultralytics/yolov5), install [dependencies](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) and check PyTorch and GPU."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wbvMlHd_QwMG",
+        "outputId": "171b23f0-71b9-4cbf-b666-6fa2ecef70c8"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 22.6/78.2 GB disk)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!git clone https://github.com/ultralytics/yolov5  # clone\n",
+        "%cd yolov5\n",
+        "%pip install -qr requirements.txt comet_ml  # install\n",
+        "\n",
+        "import torch\n",
+        "import utils\n",
+        "display = utils.notebook_init()  # checks"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4JnkELT0cIJg"
+      },
+      "source": [
+        "# 1. Predict\n",
+        "\n",
+        "`segment/predict.py` runs YOLOv5 instance segmentation inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/predict`. Example inference sources are:\n",
+        "\n",
+        "```shell\n",
+        "python segment/predict.py --source 0  # webcam\n",
+        "                             img.jpg  # image \n",
+        "                             vid.mp4  # video\n",
+        "                             screen  # screenshot\n",
+        "                             path/  # directory\n",
+        "                             'path/*.jpg'  # glob\n",
+        "                             'https://youtu.be/LNwODJXcvt4'  # YouTube\n",
+        "                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zR9ZbuQCH7FX",
+        "outputId": "3f67f1c7-f15e-4fa5-d251-967c3b77eaad"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[34m\u001b[1msegment/predict: \u001b[0mweights=['yolov5s-seg.pt'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/predict-seg, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1, retina_masks=False\n",
+            "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
+            "\n",
+            "Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-seg.pt to yolov5s-seg.pt...\n",
+            "100% 14.9M/14.9M [00:01<00:00, 12.0MB/s]\n",
+            "\n",
+            "Fusing layers... \n",
+            "YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
+            "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, 18.2ms\n",
+            "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 1 tie, 13.4ms\n",
+            "Speed: 0.5ms pre-process, 15.8ms inference, 18.5ms NMS per image at shape (1, 3, 640, 640)\n",
+            "Results saved to \u001b[1mruns/predict-seg/exp\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "!python segment/predict.py --weights yolov5s-seg.pt --img 640 --conf 0.25 --source data/images\n",
+        "#display.Image(filename='runs/predict-seg/exp/zidane.jpg', width=600)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hkAzDWJ7cWTr"
+      },
+      "source": [
+        "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\n",
+        "<img align=\"left\" src=\"https://user-images.githubusercontent.com/26833433/199030123-08c72f8d-6871-4116-8ed3-c373642cf28e.jpg\" width=\"600\">"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0eq1SMWl6Sfn"
+      },
+      "source": [
+        "# 2. Validate\n",
+        "Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WQPtK1QYVaD_",
+        "outputId": "9d751d8c-bee8-4339-cf30-9854ca530449"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Downloading https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip  ...\n",
+            "Downloading http://images.cocodataset.org/zips/val2017.zip ...\n",
+            "######################################################################## 100.0%\n",
+            "######################################################################## 100.0%\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Download COCO val\n",
+        "!bash data/scripts/get_coco.sh --val --segments  # download (780M - 5000 images)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "X58w8JLpMnjH",
+        "outputId": "a140d67a-02da-479e-9ddb-7d54bf9e407a"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[34m\u001b[1msegment/val: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5s-seg.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val-seg, name=exp, exist_ok=False, half=True, dnn=False\n",
+            "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
+            "\n",
+            "Fusing layers... \n",
+            "YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco/val2017... 4952 images, 48 backgrounds, 0 corrupt: 100% 5000/5000 [00:03<00:00, 1361.31it/s]\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco/val2017.cache\n",
+            "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 157/157 [01:54<00:00,  1.37it/s]\n",
+            "                   all       5000      36335      0.673      0.517      0.566      0.373      0.672       0.49      0.532      0.319\n",
+            "Speed: 0.6ms pre-process, 4.4ms inference, 2.9ms NMS per image at shape (32, 3, 640, 640)\n",
+            "Results saved to \u001b[1mruns/val-seg/exp\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Validate YOLOv5s-seg on COCO val\n",
+        "!python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 --half"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZY2VXXXu74w5"
+      },
+      "source": [
+        "# 3. Train\n",
+        "\n",
+        "<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/im/integrations-loop.png\"/></a></p>\n",
+        "Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n",
+        "<br><br>\n",
+        "\n",
+        "Train a YOLOv5s-seg model on the [COCO128](https://www.kaggle.com/ultralytics/coco128) dataset with `--data coco128-seg.yaml`, starting from pretrained `--weights yolov5s-seg.pt`, or from randomly initialized `--weights '' --cfg yolov5s-seg.yaml`.\n",
+        "\n",
+        "- **Pretrained [Models](https://github.com/ultralytics/yolov5/tree/master/models)** are downloaded\n",
+        "automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases)\n",
+        "- **[Datasets](https://github.com/ultralytics/yolov5/tree/master/data)** available for autodownload include: [COCO](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml), [COCO128](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml), [Argoverse](https://github.com/ultralytics/yolov5/blob/master/data/Argoverse.yaml), [VisDrone](https://github.com/ultralytics/yolov5/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov5/blob/master/data/GlobalWheat2020.yaml), [xView](https://github.com/ultralytics/yolov5/blob/master/data/xView.yaml), [Objects365](https://github.com/ultralytics/yolov5/blob/master/data/Objects365.yaml), [SKU-110K](https://github.com/ultralytics/yolov5/blob/master/data/SKU-110K.yaml).\n",
+        "- **Training Results** are saved to `runs/train-seg/` with incrementing run directories, i.e. `runs/train-seg/exp2`, `runs/train-seg/exp3` etc.\n",
+        "<br><br>\n",
+        "\n",
+        "A **Mosaic Dataloader** is used for training which combines 4 images into 1 mosaic.\n",
+        "\n",
+        "## Train on Custom Data with Roboflow 🌟 NEW\n",
+        "\n",
+        "[Roboflow](https://roboflow.com/?ref=ultralytics) enables you to easily **organize, label, and prepare** a high quality dataset with your own custom data. Roboflow also makes it easy to establish an active learning pipeline, collaborate with your team on dataset improvement, and integrate directly into your model building workflow with the `roboflow` pip package.\n",
+        "\n",
+        "- Custom Training Example: [https://blog.roboflow.com/train-yolov5-instance-segmentation-custom-dataset/](https://blog.roboflow.com/train-yolov5-instance-segmentation-custom-dataset/?ref=ultralytics)\n",
+        "- Custom Training Notebook: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1JTz7kpmHsg-5qwVz2d2IH3AaenI1tv0N?usp=sharing)\n",
+        "<br>\n",
+        "\n",
+        "<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"480\" src=\"https://robflow-public-assets.s3.amazonaws.com/how-to-train-yolov5-segmentation-annotation.gif\"/></a></p>Label images lightning fast (including with model-assisted labeling)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i3oKtE4g-aNn"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Select YOLOv5 🚀 logger {run: 'auto'}\n",
+        "logger = 'Comet' #@param ['Comet', 'ClearML', 'TensorBoard']\n",
+        "\n",
+        "if logger == 'Comet':\n",
+        "  %pip install -q comet_ml\n",
+        "  import comet_ml; comet_ml.init()\n",
+        "elif logger == 'ClearML':\n",
+        "  %pip install -q clearml\n",
+        "  import clearml; clearml.browser_login()\n",
+        "elif logger == 'TensorBoard':\n",
+        "  %load_ext tensorboard\n",
+        "  %tensorboard --logdir runs/train"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1NcFxRcFdJ_O",
+        "outputId": "3a3e0cf7-e79c-47a5-c8e7-2d26eeeab988"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[34m\u001b[1msegment/train: \u001b[0mweights=yolov5s-seg.pt, cfg=, data=coco128-seg.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train-seg, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, mask_ratio=4, no_overlap=False\n",
+            "\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
+            "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
+            "\n",
+            "\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0\n",
+            "\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/train-seg', view at http://localhost:6006/\n",
+            "\n",
+            "Dataset not found ⚠️, missing paths ['/content/datasets/coco128-seg/images/train2017']\n",
+            "Downloading https://ultralytics.com/assets/coco128-seg.zip to coco128-seg.zip...\n",
+            "100% 6.79M/6.79M [00:01<00:00, 6.73MB/s]\n",
+            "Dataset download success ✅ (1.9s), saved to \u001b[1m/content/datasets\u001b[0m\n",
+            "\n",
+            "                 from  n    params  module                                  arguments                     \n",
+            "  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              \n",
+            "  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                \n",
+            "  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   \n",
+            "  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               \n",
+            "  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 \n",
+            "  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              \n",
+            "  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 \n",
+            "  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              \n",
+            "  8                -1  1   1182720  models.common.C3                        [512, 512, 1]                 \n",
+            "  9                -1  1    656896  models.common.SPPF                      [512, 512, 5]                 \n",
+            " 10                -1  1    131584  models.common.Conv                      [512, 256, 1, 1]              \n",
+            " 11                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          \n",
+            " 12           [-1, 6]  1         0  models.common.Concat                    [1]                           \n",
+            " 13                -1  1    361984  models.common.C3                        [512, 256, 1, False]          \n",
+            " 14                -1  1     33024  models.common.Conv                      [256, 128, 1, 1]              \n",
+            " 15                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          \n",
+            " 16           [-1, 4]  1         0  models.common.Concat                    [1]                           \n",
+            " 17                -1  1     90880  models.common.C3                        [256, 128, 1, False]          \n",
+            " 18                -1  1    147712  models.common.Conv                      [128, 128, 3, 2]              \n",
+            " 19          [-1, 14]  1         0  models.common.Concat                    [1]                           \n",
+            " 20                -1  1    296448  models.common.C3                        [256, 256, 1, False]          \n",
+            " 21                -1  1    590336  models.common.Conv                      [256, 256, 3, 2]              \n",
+            " 22          [-1, 10]  1         0  models.common.Concat                    [1]                           \n",
+            " 23                -1  1   1182720  models.common.C3                        [512, 512, 1, False]          \n",
+            " 24      [17, 20, 23]  1    615133  models.yolo.Segment                     [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], 32, 128, [128, 256, 512]]\n",
+            "Model summary: 225 layers, 7621277 parameters, 7621277 gradients, 26.6 GFLOPs\n",
+            "\n",
+            "Transferred 367/367 items from yolov5s-seg.pt\n",
+            "\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed ✅\n",
+            "\u001b[34m\u001b[1moptimizer:\u001b[0m SGD(lr=0.01) with parameter groups 60 weight(decay=0.0), 63 weight(decay=0.0005), 63 bias\n",
+            "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco128-seg/labels/train2017... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<00:00, 1389.59it/s]\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco128-seg/labels/train2017.cache\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:00<00:00, 238.86it/s]\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco128-seg/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<?, ?it/s]\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:01<00:00, 98.90it/s]\n",
+            "\n",
+            "\u001b[34m\u001b[1mAutoAnchor: \u001b[0m4.27 anchors/target, 0.994 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅\n",
+            "Plotting labels to runs/train-seg/exp/labels.jpg... \n",
+            "Image sizes 640 train, 640 val\n",
+            "Using 2 dataloader workers\n",
+            "Logging results to \u001b[1mruns/train-seg/exp\u001b[0m\n",
+            "Starting training for 3 epochs...\n",
+            "\n",
+            "      Epoch    GPU_mem   box_loss   seg_loss   obj_loss   cls_loss  Instances       Size\n",
+            "        0/2      4.92G     0.0417    0.04646    0.06066    0.02126        192        640: 100% 8/8 [00:08<00:00,  1.10s/it]\n",
+            "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:02<00:00,  1.81it/s]\n",
+            "                   all        128        929      0.737      0.649      0.715      0.492      0.719      0.617      0.658      0.408\n",
+            "\n",
+            "      Epoch    GPU_mem   box_loss   seg_loss   obj_loss   cls_loss  Instances       Size\n",
+            "        1/2      6.29G    0.04157    0.04503    0.05772    0.01777        208        640: 100% 8/8 [00:09<00:00,  1.21s/it]\n",
+            "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:02<00:00,  1.87it/s]\n",
+            "                   all        128        929      0.756      0.674      0.738      0.506      0.725       0.64       0.68      0.422\n",
+            "\n",
+            "      Epoch    GPU_mem   box_loss   seg_loss   obj_loss   cls_loss  Instances       Size\n",
+            "        2/2      6.29G     0.0425    0.04793    0.06784    0.01863        161        640: 100% 8/8 [00:03<00:00,  2.02it/s]\n",
+            "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:02<00:00,  1.88it/s]\n",
+            "                   all        128        929      0.736      0.694      0.747      0.522      0.769      0.622      0.683      0.427\n",
+            "\n",
+            "3 epochs completed in 0.009 hours.\n",
+            "Optimizer stripped from runs/train-seg/exp/weights/last.pt, 15.6MB\n",
+            "Optimizer stripped from runs/train-seg/exp/weights/best.pt, 15.6MB\n",
+            "\n",
+            "Validating runs/train-seg/exp/weights/best.pt...\n",
+            "Fusing layers... \n",
+            "Model summary: 165 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
+            "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:06<00:00,  1.59s/it]\n",
+            "                   all        128        929      0.738      0.694      0.746      0.522      0.759      0.625      0.682      0.426\n",
+            "                person        128        254      0.845      0.756      0.836       0.55      0.861      0.669      0.759      0.407\n",
+            "               bicycle        128          6      0.475      0.333      0.549      0.341      0.711      0.333      0.526      0.322\n",
+            "                   car        128         46      0.612      0.565      0.539      0.257      0.555      0.435      0.477      0.171\n",
+            "            motorcycle        128          5       0.73        0.8      0.752      0.571      0.747        0.8      0.752       0.42\n",
+            "              airplane        128          6          1      0.943      0.995      0.732       0.92      0.833      0.839      0.555\n",
+            "                   bus        128          7      0.677      0.714      0.722      0.653      0.711      0.714      0.722      0.593\n",
+            "                 train        128          3          1      0.951      0.995      0.551          1      0.884      0.995      0.781\n",
+            "                 truck        128         12      0.555      0.417      0.457      0.285      0.624      0.417      0.397      0.277\n",
+            "                  boat        128          6      0.624        0.5      0.584      0.186          1      0.326      0.412      0.133\n",
+            "         traffic light        128         14      0.513      0.302      0.411      0.247      0.435      0.214      0.376      0.251\n",
+            "             stop sign        128          2      0.824          1      0.995      0.796      0.906          1      0.995      0.747\n",
+            "                 bench        128          9       0.75      0.667      0.763      0.367      0.724      0.585      0.698      0.209\n",
+            "                  bird        128         16      0.961          1      0.995      0.686      0.918      0.938       0.91      0.525\n",
+            "                   cat        128          4      0.771      0.857      0.945      0.752       0.76        0.8      0.945      0.728\n",
+            "                   dog        128          9      0.987      0.778      0.963      0.681          1      0.705       0.89      0.574\n",
+            "                 horse        128          2      0.703          1      0.995      0.697      0.759          1      0.995      0.249\n",
+            "              elephant        128         17      0.916      0.882       0.93      0.691      0.811      0.765      0.829      0.537\n",
+            "                  bear        128          1      0.664          1      0.995      0.995      0.701          1      0.995      0.895\n",
+            "                 zebra        128          4      0.864          1      0.995      0.921      0.879          1      0.995      0.804\n",
+            "               giraffe        128          9      0.883      0.889       0.94      0.683      0.845      0.778       0.78      0.463\n",
+            "              backpack        128          6          1       0.59      0.701      0.372          1      0.474       0.52      0.252\n",
+            "              umbrella        128         18      0.654      0.839      0.887       0.52      0.517      0.556      0.427      0.229\n",
+            "               handbag        128         19       0.54      0.211      0.408      0.221      0.796      0.206      0.396      0.196\n",
+            "                   tie        128          7      0.864      0.857      0.857      0.577      0.925      0.857      0.857      0.534\n",
+            "              suitcase        128          4      0.716          1      0.945      0.647      0.767          1      0.945      0.634\n",
+            "               frisbee        128          5      0.708        0.8      0.761      0.643      0.737        0.8      0.761      0.501\n",
+            "                  skis        128          1      0.691          1      0.995      0.796      0.761          1      0.995      0.199\n",
+            "             snowboard        128          7      0.918      0.857      0.904      0.604       0.32      0.286      0.235      0.137\n",
+            "           sports ball        128          6      0.902      0.667      0.701      0.466      0.727        0.5      0.497      0.471\n",
+            "                  kite        128         10      0.586        0.4      0.511      0.231      0.663      0.394      0.417      0.139\n",
+            "          baseball bat        128          4      0.359        0.5      0.401      0.169      0.631        0.5      0.526      0.133\n",
+            "        baseball glove        128          7          1      0.519       0.58      0.327      0.687      0.286      0.455      0.328\n",
+            "            skateboard        128          5      0.729        0.8      0.862      0.631      0.599        0.6      0.604      0.379\n",
+            "         tennis racket        128          7       0.57      0.714      0.645      0.448      0.608      0.714      0.645      0.412\n",
+            "                bottle        128         18      0.469      0.393      0.537      0.357      0.661      0.389      0.543      0.349\n",
+            "            wine glass        128         16      0.677      0.938      0.866      0.441       0.53      0.625       0.67      0.334\n",
+            "                   cup        128         36      0.777      0.722      0.812      0.466      0.725      0.583      0.762      0.467\n",
+            "                  fork        128          6      0.948      0.333      0.425       0.27      0.527      0.167       0.18      0.102\n",
+            "                 knife        128         16      0.757      0.587      0.669      0.458       0.79        0.5      0.552       0.34\n",
+            "                 spoon        128         22       0.74      0.364      0.559      0.269      0.925      0.364      0.513      0.213\n",
+            "                  bowl        128         28      0.766      0.714      0.725      0.559      0.803      0.584      0.665      0.353\n",
+            "                banana        128          1      0.408          1      0.995      0.398      0.539          1      0.995      0.497\n",
+            "              sandwich        128          2          1          0      0.695      0.536          1          0      0.498      0.448\n",
+            "                orange        128          4      0.467          1      0.995      0.693      0.518          1      0.995      0.663\n",
+            "              broccoli        128         11      0.462      0.455      0.383      0.259      0.548      0.455      0.384      0.256\n",
+            "                carrot        128         24      0.631      0.875       0.77      0.533      0.757      0.909      0.853      0.499\n",
+            "               hot dog        128          2      0.555          1      0.995      0.995      0.578          1      0.995      0.796\n",
+            "                 pizza        128          5       0.89        0.8      0.962      0.796          1      0.778      0.962      0.766\n",
+            "                 donut        128         14      0.695          1      0.893      0.772      0.704          1      0.893      0.696\n",
+            "                  cake        128          4      0.826          1      0.995       0.92      0.862          1      0.995      0.846\n",
+            "                 chair        128         35       0.53      0.571      0.613      0.336       0.67        0.6      0.538      0.271\n",
+            "                 couch        128          6      0.972      0.667      0.833      0.627          1       0.62      0.696      0.394\n",
+            "          potted plant        128         14        0.7      0.857      0.883      0.552      0.836      0.857      0.883      0.473\n",
+            "                   bed        128          3      0.979      0.667       0.83      0.366          1          0       0.83      0.373\n",
+            "          dining table        128         13      0.775      0.308      0.505      0.364      0.644      0.231       0.25     0.0804\n",
+            "                toilet        128          2      0.836          1      0.995      0.846      0.887          1      0.995      0.797\n",
+            "                    tv        128          2        0.6          1      0.995      0.846      0.655          1      0.995      0.896\n",
+            "                laptop        128          3      0.822      0.333      0.445      0.307          1          0      0.392       0.12\n",
+            "                 mouse        128          2          1          0          0          0          1          0          0          0\n",
+            "                remote        128          8      0.745        0.5       0.62      0.459      0.821        0.5      0.624      0.449\n",
+            "            cell phone        128          8      0.686      0.375      0.502      0.272      0.488       0.25       0.28      0.132\n",
+            "             microwave        128          3      0.831          1      0.995      0.722      0.867          1      0.995      0.592\n",
+            "                  oven        128          5      0.439        0.4      0.435      0.294      0.823        0.6      0.645      0.418\n",
+            "                  sink        128          6      0.677        0.5      0.565      0.448      0.722        0.5       0.46      0.362\n",
+            "          refrigerator        128          5      0.533        0.8      0.783      0.524      0.558        0.8      0.783      0.527\n",
+            "                  book        128         29      0.732      0.379      0.423      0.196       0.69      0.207       0.38      0.131\n",
+            "                 clock        128          9      0.889      0.778      0.917      0.677      0.908      0.778      0.875      0.604\n",
+            "                  vase        128          2      0.375          1      0.995      0.995      0.455          1      0.995      0.796\n",
+            "              scissors        128          1          1          0     0.0166    0.00166          1          0          0          0\n",
+            "            teddy bear        128         21      0.813      0.829      0.841      0.457      0.826      0.678      0.786      0.422\n",
+            "            toothbrush        128          5      0.806          1      0.995      0.733      0.991          1      0.995      0.628\n",
+            "Results saved to \u001b[1mruns/train-seg/exp\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Train YOLOv5s on COCO128 for 3 epochs\n",
+        "!python segment/train.py --img 640 --batch 16 --epochs 3 --data coco128-seg.yaml --weights yolov5s-seg.pt --cache"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "15glLzbQx5u0"
+      },
+      "source": [
+        "# 4. Visualize"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nWOsI5wJR1o3"
+      },
+      "source": [
+        "## Comet Logging and Visualization 🌟 NEW\n",
+        "\n",
+        "[Comet](https://www.comet.com/site/lp/yolov5-with-comet/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab) is now fully integrated with YOLOv5. Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://www.comet.com/docs/v2/guides/comet-dashboard/code-panels/about-panels/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab)! Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!\n",
+        "\n",
+        "Getting started is easy:\n",
+        "```shell\n",
+        "pip install comet_ml  # 1. install\n",
+        "export COMET_API_KEY=<Your API Key>  # 2. paste API key\n",
+        "python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt  # 3. train\n",
+        "```\n",
+        "To learn more about all of the supported Comet features for this integration, check out the [Comet Tutorial](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration). If you'd like to learn more about Comet, head over to our [documentation](https://www.comet.com/docs/v2/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab). Get started by trying out the Comet Colab Notebook:\n",
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)\n",
+        "\n",
+        "<a href=\"https://bit.ly/yolov5-readme-comet2\">\n",
+        "<img alt=\"Comet Dashboard\" src=\"https://user-images.githubusercontent.com/26833433/202851203-164e94e1-2238-46dd-91f8-de020e9d6b41.png\" width=\"1280\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lay2WsTjNJzP"
+      },
+      "source": [
+        "## ClearML Logging and Automation 🌟 NEW\n",
+        "\n",
+        "[ClearML](https://cutt.ly/yolov5-notebook-clearml) is completely integrated into YOLOv5 to track your experimentation, manage dataset versions and even remotely execute training runs. To enable ClearML (check cells above):\n",
+        "\n",
+        "- `pip install clearml`\n",
+        "- run `clearml-init` to connect to a ClearML server (**deploy your own [open-source server](https://github.com/allegroai/clearml-server)**, or use our [free hosted server](https://cutt.ly/yolov5-notebook-clearml))\n",
+        "\n",
+        "You'll get all the great expected features from an experiment manager: live updates, model upload, experiment comparison etc. but ClearML also tracks uncommitted changes and installed packages for example. Thanks to that ClearML Tasks (which is what we call experiments) are also reproducible on different machines! With only 1 extra line, we can schedule a YOLOv5 training task on a queue to be executed by any number of ClearML Agents (workers).\n",
+        "\n",
+        "You can use ClearML Data to version your dataset and then pass it to YOLOv5 simply using its unique ID. This will help you keep track of your data without adding extra hassle. Explore the [ClearML Tutorial](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration) for details!\n",
+        "\n",
+        "<a href=\"https://cutt.ly/yolov5-notebook-clearml\">\n",
+        "<img alt=\"ClearML Experiment Management UI\" src=\"https://github.com/thepycoder/clearml_screenshots/raw/main/scalars.jpg\" width=\"1280\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-WPvRbS5Swl6"
+      },
+      "source": [
+        "## Local Logging\n",
+        "\n",
+        "Training results are automatically logged with [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) loggers to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc.\n",
+        "\n",
+        "This directory contains train and val statistics, mosaics, labels, predictions and augmentated mosaics, as well as metrics and charts including precision-recall (PR) curves and confusion matrices. \n",
+        "\n",
+        "<img alt=\"Local logging results\" src=\"https://user-images.githubusercontent.com/26833433/183222430-e1abd1b7-782c-4cde-b04d-ad52926bf818.jpg\" width=\"1280\"/>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Zelyeqbyt3GD"
+      },
+      "source": [
+        "# Environments\n",
+        "\n",
+        "YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n",
+        "\n",
+        "- **Notebooks** with free GPU: <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a> <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a> <a href=\"https://www.kaggle.com/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
+        "- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/)\n",
+        "- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/)\n",
+        "- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) <a href=\"https://hub.docker.com/r/ultralytics/yolov5\"><img src=\"https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker\" alt=\"Docker Pulls\"></a>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6Qu7Iesl0p54"
+      },
+      "source": [
+        "# Status\n",
+        "\n",
+        "![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)\n",
+        "\n",
+        "If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on macOS, Windows, and Ubuntu every 24 hours and on every commit.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IEijrePND_2I"
+      },
+      "source": [
+        "# Appendix\n",
+        "\n",
+        "Additional content below."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GMusP4OAxFu6"
+      },
+      "outputs": [],
+      "source": [
+        "# YOLOv5 PyTorch HUB Inference (DetectionModels only)\n",
+        "import torch\n",
+        "\n",
+        "model = torch.hub.load('ultralytics/yolov5', 'yolov5s-seg', force_reload=True, trust_repo=True)  # or yolov5n - yolov5x6 or custom\n",
+        "im = 'https://ultralytics.com/images/zidane.jpg'  # file, Path, PIL.Image, OpenCV, nparray, list\n",
+        "results = model(im)  # inference\n",
+        "results.print()  # or .show(), .save(), .crop(), .pandas(), etc."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "YOLOv5 Segmentation Tutorial",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/segment/val.py
+++ b/segment/val.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Validate a trained YOLOv5 segment model on a segment dataset.
+Usage:
+    $ bash data/scripts/get_coco.sh --val --segments  # download COCO-segments val split (1G, 5000 images)
+    $ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640  # validate COCO-segments
+Usage - formats:
+    $ python segment/val.py --weights yolov5s-seg.pt                 # PyTorch
+                                      yolov5s-seg.torchscript        # TorchScript
+                                      yolov5s-seg.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                      yolov5s-seg_openvino_label     # OpenVINO
+                                      yolov5s-seg.engine             # TensorRT
+                                      yolov5s-seg.mlmodel            # CoreML (macOS-only)
+                                      yolov5s-seg_saved_model        # TensorFlow SavedModel
+                                      yolov5s-seg.pb                 # TensorFlow GraphDef
+                                      yolov5s-seg.tflite             # TensorFlow Lite
+                                      yolov5s-seg_edgetpu.tflite     # TensorFlow Edge TPU
+                                      yolov5s-seg_paddle_model       # PaddlePaddle
+"""
+import argparse
+import json
+import os
+import subprocess
+import sys
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+import numpy as np
+import torch
+from tqdm import tqdm
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+import torch.nn.functional as F
+from models.common import DetectMultiBackend
+from models.yolo import SegmentationModel
+from utils.callbacks import Callbacks
+from utils.general import (
+    LOGGER,
+    NUM_THREADS,
+    TQDM_BAR_FORMAT,
+    Profile,
+    check_dataset,
+    check_img_size,
+    check_requirements,
+    check_yaml,
+    coco80_to_coco91_class,
+    colorstr,
+    increment_path,
+    non_max_suppression,
+    print_args,
+    scale_boxes,
+    xywh2xyxy,
+    xyxy2xywh,
+)
+from utils.metrics import ConfusionMatrix, box_iou
+from utils.plots import output_to_target, plot_val_study
+from utils.segment.dataloaders import create_dataloader
+from utils.segment.general import mask_iou, process_mask, process_mask_native, scale_image
+from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
+from utils.segment.plots import plot_images_and_masks
+from utils.torch_utils import de_parallel, select_device, smart_inference_mode
+def save_one_txt(predn, save_conf, shape, file):
+    """Saves detection results in txt format; includes class, xywh (normalized), optionally confidence if `save_conf` is
+    True.
+    """
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, "a") as f:
+            f.write(("%g " * len(line)).rstrip() % line + "\n")
+def save_one_json(predn, jdict, path, class_map, pred_masks):
+    """
+    Saves a JSON file with detection results including bounding boxes, category IDs, scores, and segmentation masks.
+    Example JSON result: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}.
+    """
+    from pycocotools.mask import encode
+    def single_encode(x):
+        rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
+        rle["counts"] = rle["counts"].decode("utf-8")
+        return rle
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+    pred_masks = np.transpose(pred_masks, (2, 0, 1))
+    with ThreadPool(NUM_THREADS) as pool:
+        rles = pool.map(single_encode, pred_masks)
+    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
+        jdict.append(
+            {
+                "image_id": image_id,
+                "category_id": class_map[int(p[5])],
+                "bbox": [round(x, 3) for x in b],
+                "score": round(p[4], 5),
+                "segmentation": rles[i],
+            }
+        )
+def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
+    """
+    Return correct prediction matrix
+    Arguments:
+        detections (array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (array[N, 10]), for 10 IoU levels
+    """
+    if masks:
+        if overlap:
+            nl = len(labels)
+            index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+            gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
+            gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+        if gt_masks.shape[1:] != pred_masks.shape[1:]:
+            gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
+            gt_masks = gt_masks.gt_(0.5)
+        iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
+    else:  # boxes
+        iou = box_iou(labels[:, 1:], detections[:, :4])
+    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+    correct_class = labels[:, 0:1] == detections[:, 5]
+    for i in range(len(iouv)):
+        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
+        if x[0].shape[0]:
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            correct[matches[:, 1].astype(int), i] = True
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+@smart_inference_mode()
+def run(
+    data,
+    weights=None,  # model.pt path(s)
+    batch_size=32,  # batch size
+    imgsz=640,  # inference size (pixels)
+    conf_thres=0.001,  # confidence threshold
+    iou_thres=0.6,  # NMS IoU threshold
+    max_det=300,  # maximum detections per image
+    task="val",  # train, val, test, speed or study
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    workers=8,  # max dataloader workers (per RANK in DDP mode)
+    single_cls=False,  # treat as single-class dataset
+    augment=False,  # augmented inference
+    verbose=False,  # verbose output
+    save_txt=False,  # save results to *.txt
+    save_hybrid=False,  # save label+prediction hybrid results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_json=False,  # save a COCO-JSON results file
+    project=ROOT / "runs/val-seg",  # save to project/name
+    name="exp",  # save to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    half=True,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    model=None,
+    dataloader=None,
+    save_dir=Path(""),
+    plots=True,
+    overlap=False,
+    mask_downsample_ratio=1,
+    compute_loss=None,
+    callbacks=Callbacks(),
+):
+    if save_json:
+        check_requirements("pycocotools>=2.0.6")
+        process = process_mask_native  # more accurate
+    else:
+        process = process_mask  # faster
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
+        half &= device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if half else model.float()
+        nm = de_parallel(model).model[-1].nm  # number of masks
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+        # Load model
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32  # number of masks
+        if engine:
+            batch_size = model.batch_size
+        else:
+            device = model.device
+            if not (pt or jit):
+                batch_size = 1  # export.py models default to batch-size 1
+                LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
+        # Data
+        data = check_dataset(data)  # check
+    # Configure
+    model.eval()
+    cuda = device.type != "cpu"
+    is_coco = isinstance(data.get("val"), str) and data["val"].endswith(f"coco{os.sep}val2017.txt")  # COCO dataset
+    nc = 1 if single_cls else int(data["nc"])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+    # Dataloader
+    if not training:
+        if pt and not single_cls:  # check --weights are trained on --data
+            ncm = model.model.nc
+            assert ncm == nc, (
+                f"{weights} ({ncm} classes) trained on different --data than what you passed ({nc} "
+                f"classes). Pass correct combination of --weights and --data that are trained together."
+            )
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        pad, rect = (0.0, False) if task == "speed" else (0.5, pt)  # square inference for benchmarks
+        task = task if task in ("train", "val", "test") else "val"  # path to train/val/test images
+        dataloader = create_dataloader(
+            data[task],
+            imgsz,
+            batch_size,
+            stride,
+            single_cls,
+            pad=pad,
+            rect=rect,
+            workers=workers,
+            prefix=colorstr(f"{task}: "),
+            overlap_mask=overlap,
+            mask_downsample_ratio=mask_downsample_ratio,
+        )[0]
+    seen = 0
+    confusion_matrix = ConfusionMatrix(nc=nc)
+    names = model.names if hasattr(model, "names") else model.module.names  # get class names
+    if isinstance(names, (list, tuple)):  # old format
+        names = dict(enumerate(names))
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ("%22s" + "%11s" * 10) % (
+        "Class",
+        "Images",
+        "Instances",
+        "Box(P",
+        "R",
+        "mAP50",
+        "mAP50-95)",
+        "Mask(P",
+        "R",
+        "mAP50",
+        "mAP50-95)",
+    )
+    dt = Profile(device=device), Profile(device=device), Profile(device=device)
+    metrics = Metrics()
+    loss = torch.zeros(4, device=device)
+    jdict, stats = [], []
+    # callbacks.run('on_val_start')
+    pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT)  # progress bar
+    for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
+        # callbacks.run('on_val_batch_start')
+        with dt[0]:
+            if cuda:
+                im = im.to(device, non_blocking=True)
+                targets = targets.to(device)
+                masks = masks.to(device)
+            masks = masks.float()
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            nb, _, height, width = im.shape  # batch size, channels, height, width
+        # Inference
+        with dt[1]:
+            preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None)
+        # Loss
+        if compute_loss:
+            loss += compute_loss((train_out, protos), targets, masks)[1]  # box, obj, cls
+        # NMS
+        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        with dt[2]:
+            preds = non_max_suppression(
+                preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm
+            )
+        # Metrics
+        plot_masks = []  # masks for plotting
+        for si, (pred, proto) in enumerate(zip(preds, protos)):
+            labels = targets[targets[:, 0] == si, 1:]
+            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
+            path, shape = Path(paths[si]), shapes[si][0]
+            correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            seen += 1
+            if npr == 0:
+                if nl:
+                    stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
+                    if plots:
+                        confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
+                continue
+            # Masks
+            midx = [si] if overlap else targets[:, 0] == si
+            gt_masks = masks[midx]
+            pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
+            # Predictions
+            if single_cls:
+                pred[:, 5] = 0
+            predn = pred.clone()
+            scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+            # Evaluate
+            if nl:
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct_bboxes = process_batch(predn, labelsn, iouv)
+                correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True)
+                if plots:
+                    confusion_matrix.process_batch(predn, labelsn)
+            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (conf, pcls, tcls)
+            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
+            if plots and batch_i < 3:
+                plot_masks.append(pred_masks[:15])  # filter top 15 to plot
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / "labels" / f"{path.stem}.txt")
+            if save_json:
+                pred_masks = scale_image(
+                    im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]
+                )
+                save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
+            # callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
+        # Plot images
+        if plots and batch_i < 3:
+            if len(plot_masks):
+                plot_masks = torch.cat(plot_masks, dim=0)
+            plot_images_and_masks(im, targets, masks, paths, save_dir / f"val_batch{batch_i}_labels.jpg", names)
+            plot_images_and_masks(
+                im,
+                output_to_target(preds, max_det=15),
+                plot_masks,
+                paths,
+                save_dir / f"val_batch{batch_i}_pred.jpg",
+                names,
+            )  # pred
+        # callbacks.run('on_val_batch_end')
+    # Compute metrics
+    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
+        metrics.update(results)
+    nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
+    # Print results
+    pf = "%22s" + "%11i" * 2 + "%11.3g" * 8  # print format
+    LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
+    if nt.sum() == 0:
+        LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels")
+    # Print results per class
+    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(metrics.ap_class_index):
+            LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
+    # Print speeds
+    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
+    if not training:
+        shape = (batch_size, 3, imgsz, imgsz)
+        LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
+    # Plots
+    if plots:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+    # callbacks.run('on_val_end')
+    mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results()
+    # Save JSON
+    if save_json and len(jdict):
+        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ""  # weights
+        anno_json = str(Path("../datasets/coco/annotations/instances_val2017.json"))  # annotations
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions
+        LOGGER.info(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
+        with open(pred_json, "w") as f:
+            json.dump(jdict, f)
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            results = []
+            for eval in COCOeval(anno, pred, "bbox"), COCOeval(anno, pred, "segm"):
+                if is_coco:
+                    eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # img ID to evaluate
+                eval.evaluate()
+                eval.accumulate()
+                eval.summarize()
+                results.extend(eval.stats[:2])  # update results (mAP@0.5:0.95, mAP@0.5)
+            map_bbox, map50_bbox, map_mask, map50_mask = results
+        except Exception as e:
+            LOGGER.info(f"pycocotools unable to run: {e}")
+    # Return results
+    model.float()  # for training
+    if not training:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask
+    return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t
+def parse_opt():
+    """Parses command line arguments for configuring YOLOv5 options like dataset path, weights, batch size, and
+    inference settings.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
+    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
+    parser.add_argument("--batch-size", type=int, default=32, help="batch size")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
+    parser.add_argument("--conf-thres", type=float, default=0.001, help="confidence threshold")
+    parser.add_argument("--iou-thres", type=float, default=0.6, help="NMS IoU threshold")
+    parser.add_argument("--max-det", type=int, default=300, help="maximum detections per image")
+    parser.add_argument("--task", default="val", help="train, val, test, speed or study")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
+    parser.add_argument("--augment", action="store_true", help="augmented inference")
+    parser.add_argument("--verbose", action="store_true", help="report mAP by class")
+    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
+    parser.add_argument("--save-hybrid", action="store_true", help="save label+prediction hybrid results to *.txt")
+    parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
+    parser.add_argument("--save-json", action="store_true", help="save a COCO-JSON results file")
+    parser.add_argument("--project", default=ROOT / "runs/val-seg", help="save results to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
+    # opt.save_json |= opt.data.endswith('coco.yaml')
+    opt.save_txt |= opt.save_hybrid
+    print_args(vars(opt))
+    return opt
+def main(opt):
+    """Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
+    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+    if opt.task in ("train", "val", "test"):  # run normally
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.warning(f"WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results")
+        if opt.save_hybrid:
+            LOGGER.warning("WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone")
+        run(**vars(opt))
+    else:
+        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
+        opt.half = torch.cuda.is_available() and opt.device != "cpu"  # FP16 for fastest results
+        if opt.task == "speed":  # speed benchmarks
+            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
+            for opt.weights in weights:
+                run(**vars(opt), plots=False)
+        elif opt.task == "study":  # speed vs mAP benchmarks
+            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+            for opt.weights in weights:
+                f = f"study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt"  # filename to save to
+                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
+                for opt.imgsz in x:  # img-size
+                    LOGGER.info(f"\nRunning {f} --imgsz {opt.imgsz}...")
+                    r, _, t = run(**vars(opt), plots=False)
+                    y.append(r + t)  # results and times
+                np.savetxt(f, y, fmt="%10.4g")  # save
+            subprocess.run(["zip", "-r", "study.zip", "study_*.txt"])
+            plot_val_study(x=x)  # plot
+        else:
+            raise NotImplementedError(f'--task {opt.task} not in ("train", "val", "test", "speed", "study")')
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/train.py
+++ b/train.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset. Models and datasets download automatically from the latest YOLOv5 release.
+Usage - Single-GPU training:
+    $ python train.py --data coco128.yaml --weights yolov5s.pt --img 640  # from pretrained (recommended)
+    $ python train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640  # from scratch
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 train.py --data coco128.yaml --weights yolov5s.pt --img 640 --device 0,1,2,3
+Models:     https://github.com/ultralytics/yolov5/tree/master/models
+Datasets:   https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial:   https://docs.ultralytics.com/yolov5/tutorials/train_custom_data
+"""
+import argparse
+import math
+import os
+import random
+import subprocess
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime, timedelta
+from pathlib import Path
+try:
+    import comet_ml  # must be imported before torch (if installed)
+except ImportError:
+    comet_ml = None
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.optim import lr_scheduler
+from tqdm import tqdm
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+import val as validate  # for end-of-epoch mAP
+from models.experimental import attempt_load
+from models.yolo import Model
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.dataloaders import create_dataloader
+from utils.downloads import attempt_download, is_url
+from utils.general import (
+    LOGGER,
+    TQDM_BAR_FORMAT,
+    check_amp,
+    check_dataset,
+    check_file,
+    check_git_info,
+    check_git_status,
+    check_img_size,
+    check_requirements,
+    check_suffix,
+    check_yaml,
+    colorstr,
+    get_latest_run,
+    increment_path,
+    init_seeds,
+    intersect_dicts,
+    labels_to_class_weights,
+    labels_to_image_weights,
+    methods,
+    one_cycle,
+    print_args,
+    print_mutation,
+    strip_optimizer,
+    yaml_save,
+)
+from utils.loggers import LOGGERS, Loggers
+from utils.loggers.comet.comet_utils import check_comet_resume
+from utils.loss import ComputeLoss
+from utils.metrics import fitness
+from utils.plots import plot_evolve
+from utils.torch_utils import (
+    EarlyStopping,
+    ModelEMA,
+    de_parallel,
+    select_device,
+    smart_DDP,
+    smart_optimizer,
+    smart_resume,
+    torch_distributed_zero_first,
+)
+LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv("RANK", -1))
+WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
+GIT_INFO = check_git_info()
+def train(hyp, opt, device, callbacks):
+    """
+    Trains YOLOv5 model with given hyperparameters, options, and device, managing datasets, model architecture, loss
+    computation, and optimizer steps.
+    `hyp` argument is path/to/hyp.yaml or hyp dictionary.
+    """
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = (
+        Path(opt.save_dir),
+        opt.epochs,
+        opt.batch_size,
+        opt.weights,
+        opt.single_cls,
+        opt.evolve,
+        opt.data,
+        opt.cfg,
+        opt.resume,
+        opt.noval,
+        opt.nosave,
+        opt.workers,
+        opt.freeze,
+    )
+    callbacks.run("on_pretrain_routine_start")
+    # Directories
+    w = save_dir / "weights"  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / "last.pt", w / "best.pt"
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
+    opt.hyp = hyp.copy()  # for saving hyps to checkpoints
+    # Save run settings
+    if not evolve:
+        yaml_save(save_dir / "hyp.yaml", hyp)
+        yaml_save(save_dir / "opt.yaml", vars(opt))
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        include_loggers = list(LOGGERS)
+        if getattr(opt, "ndjson_console", False):
+            include_loggers.append("ndjson_console")
+        if getattr(opt, "ndjson_file", False):
+            include_loggers.append("ndjson_file")
+        loggers = Loggers(
+            save_dir=save_dir,
+            weights=weights,
+            opt=opt,
+            hyp=hyp,
+            logger=LOGGER,
+            include=tuple(include_loggers),
+        )
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+        # Process custom dataset artifact link
+        data_dict = loggers.remote_dataset
+        if resume:  # If resuming runs from remote artifact
+            weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
+    cuda = device.type != "cpu"
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict["train"], data_dict["val"]
+    nc = 1 if single_cls else int(data_dict["nc"])  # number of classes
+    names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"]  # class names
+    is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt")  # COCO dataset
+    # Model
+    check_suffix(weights, ".pt")  # check weights
+    pretrained = weights.endswith(".pt")
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location="cpu")  # load checkpoint to CPU to avoid CUDA memory leak
+        model = Model(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
+        exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else []  # exclude keys
+        csd = ckpt["model"].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}")  # report
+    else:
+        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
+    amp = check_amp(model)  # check AMP
+    # Freeze
+    freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
+        if any(x in k for x in freeze):
+            LOGGER.info(f"freezing {k}")
+            v.requires_grad = False
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        loggers.on_params_update({"batch_size": batch_size})
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp["weight_decay"] *= batch_size * accumulate / nbs  # scale weight_decay
+    optimizer = smart_optimizer(model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"])
+    # Scheduler
+    if opt.cos_lr:
+        lf = one_cycle(1, hyp["lrf"], epochs)  # cosine 1->hyp['lrf']
+    else:
+        lf = lambda x: (1 - x / epochs) * (1.0 - hyp["lrf"]) + hyp["lrf"]  # linear
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+    # Resume
+    best_fitness, start_epoch = 0.0, 0
+    if pretrained:
+        if resume:
+            best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
+        del ckpt, csd
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning(
+            "WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n"
+            "See Multi-GPU Tutorial at https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training to get started."
+        )
+        model = torch.nn.DataParallel(model)
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info("Using SyncBatchNorm()")
+    # Trainloader
+    train_loader, dataset = create_dataloader(
+        train_path,
+        imgsz,
+        batch_size // WORLD_SIZE,
+        gs,
+        single_cls,
+        hyp=hyp,
+        augment=True,
+        cache=None if opt.cache == "val" else opt.cache,
+        rect=opt.rect,
+        rank=LOCAL_RANK,
+        workers=workers,
+        image_weights=opt.image_weights,
+        quad=opt.quad,
+        prefix=colorstr("train: "),
+        shuffle=True,
+        seed=opt.seed,
+    )
+    labels = np.concatenate(dataset.labels, 0)
+    mlc = int(labels[:, 0].max())  # max label class
+    assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
+    # Process 0
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(
+            val_path,
+            imgsz,
+            batch_size // WORLD_SIZE * 2,
+            gs,
+            single_cls,
+            hyp=hyp,
+            cache=None if noval else opt.cache,
+            rect=True,
+            rank=-1,
+            workers=workers * 2,
+            pad=0.5,
+            prefix=colorstr("val: "),
+        )[0]
+        if not resume:
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz)  # run AutoAnchor
+            model.half().float()  # pre-reduce anchor precision
+        callbacks.run("on_pretrain_routine_end", labels, names)
+    # DDP mode
+    if cuda and RANK != -1:
+        model = smart_DDP(model)
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp["box"] *= 3 / nl  # scale to layers
+    hyp["cls"] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp["obj"] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp["label_smoothing"] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+    # Start training
+    t0 = time.time()
+    nb = len(train_loader)  # number of batches
+    nw = max(round(hyp["warmup_epochs"] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
+    compute_loss = ComputeLoss(model)  # init loss class
+    callbacks.run("on_train_start")
+    LOGGER.info(
+        f'Image sizes {imgsz} train, {imgsz} val\n'
+        f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
+        f"Logging results to {colorstr('bold', save_dir)}\n"
+        f'Starting training for {epochs} epochs...'
+    )
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        callbacks.run("on_train_epoch_start")
+        model.train()
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+        mloss = torch.zeros(3, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(("\n" + "%11s" * 7) % ("Epoch", "GPU_mem", "box_loss", "obj_loss", "cls_loss", "Instances", "Size"))
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT)  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+            callbacks.run("on_train_batch_start")
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x["lr"] = np.interp(ni, xi, [hyp["warmup_bias_lr"] if j == 0 else 0.0, x["initial_lr"] * lf(epoch)])
+                    if "momentum" in x:
+                        x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
+            # Forward
+            with torch.cuda.amp.autocast(amp):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.0
+            # Backward
+            scaler.scale(loss).backward()
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
+            if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+            # Log
+            if RANK in {-1, 0}:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G"  # (GB)
+                pbar.set_description(
+                    ("%11s" * 2 + "%11.4g" * 5)
+                    % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])
+                )
+                callbacks.run("on_train_batch_end", model, ni, imgs, targets, paths, list(mloss))
+                if callbacks.stop_training:
+                    return
+            # end batch ------------------------------------------------------------------------------------------------
+        # Scheduler
+        lr = [x["lr"] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+        if RANK in {-1, 0}:
+            # mAP
+            callbacks.run("on_train_epoch_end", epoch=epoch)
+            ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = validate.run(
+                    data_dict,
+                    batch_size=batch_size // WORLD_SIZE * 2,
+                    imgsz=imgsz,
+                    half=amp,
+                    model=ema.ema,
+                    single_cls=single_cls,
+                    dataloader=val_loader,
+                    save_dir=save_dir,
+                    plots=False,
+                    callbacks=callbacks,
+                    compute_loss=compute_loss,
+                )
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            callbacks.run("on_fit_epoch_end", log_vals, epoch, best_fitness, fi)
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    "epoch": epoch,
+                    "best_fitness": best_fitness,
+                    "model": deepcopy(de_parallel(model)).half(),
+                    "ema": deepcopy(ema.ema).half(),
+                    "updates": ema.updates,
+                    "optimizer": optimizer.state_dict(),
+                    "opt": vars(opt),
+                    "git": GIT_INFO,  # {remote, branch, commit} if a git repo
+                    "date": datetime.now().isoformat(),
+                }
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f"epoch{epoch}.pt")
+                del ckpt
+                callbacks.run("on_model_save", last, epoch, final_epoch, best_fitness, fi)
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.")
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f"\nValidating {f}...")
+                    results, _, _ = validate.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools at iou 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss,
+                    )  # val best model with plots
+                    if is_coco:
+                        callbacks.run("on_fit_epoch_end", list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+        callbacks.run("on_train_end", last, best, epoch, results)
+    torch.cuda.empty_cache()
+    return results
+def parse_opt(known=False):
+    """Parses command-line arguments for YOLOv5 training, validation, and testing."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path")
+    parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path")
+    parser.add_argument("--hyp", type=str, default=ROOT / "data/hyps/hyp.scratch-low.yaml", help="hyperparameters path")
+    parser.add_argument("--epochs", type=int, default=100, help="total training epochs")
+    parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs, -1 for autobatch")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)")
+    parser.add_argument("--rect", action="store_true", help="rectangular training")
+    parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training")
+    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
+    parser.add_argument("--noval", action="store_true", help="only validate final epoch")
+    parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
+    parser.add_argument("--noplots", action="store_true", help="save no plot files")
+    parser.add_argument("--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations")
+    parser.add_argument(
+        "--evolve_population", type=str, default=ROOT / "data/hyps", help="location for loading population"
+    )
+    parser.add_argument("--resume_evolve", type=str, default=None, help="resume evolve from last generation")
+    parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
+    parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk")
+    parser.add_argument("--image-weights", action="store_true", help="use weighted image selection for training")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
+    parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class")
+    parser.add_argument("--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer")
+    parser.add_argument("--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--project", default=ROOT / "runs/train", help="save to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--quad", action="store_true", help="quad dataloader")
+    parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler")
+    parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
+    parser.add_argument("--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)")
+    parser.add_argument("--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2")
+    parser.add_argument("--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)")
+    parser.add_argument("--seed", type=int, default=0, help="Global training seed")
+    parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")
+    # Logger arguments
+    parser.add_argument("--entity", default=None, help="Entity")
+    parser.add_argument("--upload_dataset", nargs="?", const=True, default=False, help='Upload data, "val" option')
+    parser.add_argument("--bbox_interval", type=int, default=-1, help="Set bounding-box image logging interval")
+    parser.add_argument("--artifact_alias", type=str, default="latest", help="Version of dataset artifact to use")
+    # NDJSON logging
+    parser.add_argument("--ndjson-console", action="store_true", help="Log ndjson to console")
+    parser.add_argument("--ndjson-file", action="store_true", help="Log ndjson to file")
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+def main(opt, callbacks=Callbacks()):
+    """Runs training or hyperparameter evolution with specified options and optional callbacks."""
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(ROOT / "requirements.txt")
+    # Resume (from specified or most recent last.pt)
+    if opt.resume and not check_comet_resume(opt) and not opt.evolve:
+        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
+        opt_yaml = last.parent.parent / "opt.yaml"  # train options yaml
+        opt_data = opt.data  # original dataset
+        if opt_yaml.is_file():
+            with open(opt_yaml, errors="ignore") as f:
+                d = yaml.safe_load(f)
+        else:
+            d = torch.load(last, map_location="cpu")["opt"]
+        opt = argparse.Namespace(**d)  # replace
+        opt.cfg, opt.weights, opt.resume = "", str(last), True  # reinstate
+        if is_url(opt_data):
+            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
+            check_file(opt.data),
+            check_yaml(opt.cfg),
+            check_yaml(opt.hyp),
+            str(opt.weights),
+            str(opt.project),
+        )  # checks
+        assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
+        if opt.evolve:
+            if opt.project == str(ROOT / "runs/train"):  # if default project name, rename to runs/evolve
+                opt.project = str(ROOT / "runs/evolve")
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == "cfg":
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
+        assert not opt.image_weights, f"--image-weights {msg}"
+        assert not opt.evolve, f"--evolve {msg}"
+        assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
+        assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
+        assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device("cuda", LOCAL_RANK)
+        dist.init_process_group(
+            backend="nccl" if dist.is_nccl_available() else "gloo", timeout=timedelta(seconds=10800)
+        )
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (including this hyperparameter True-False, lower_limit, upper_limit)
+        meta = {
+            "lr0": (False, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            "lrf": (False, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            "momentum": (False, 0.6, 0.98),  # SGD momentum/Adam beta1
+            "weight_decay": (False, 0.0, 0.001),  # optimizer weight decay
+            "warmup_epochs": (False, 0.0, 5.0),  # warmup epochs (fractions ok)
+            "warmup_momentum": (False, 0.0, 0.95),  # warmup initial momentum
+            "warmup_bias_lr": (False, 0.0, 0.2),  # warmup initial bias lr
+            "box": (False, 0.02, 0.2),  # box loss gain
+            "cls": (False, 0.2, 4.0),  # cls loss gain
+            "cls_pw": (False, 0.5, 2.0),  # cls BCELoss positive_weight
+            "obj": (False, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            "obj_pw": (False, 0.5, 2.0),  # obj BCELoss positive_weight
+            "iou_t": (False, 0.1, 0.7),  # IoU training threshold
+            "anchor_t": (False, 2.0, 8.0),  # anchor-multiple threshold
+            "anchors": (False, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            "fl_gamma": (False, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            "hsv_h": (True, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            "hsv_s": (True, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            "hsv_v": (True, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            "degrees": (True, 0.0, 45.0),  # image rotation (+/- deg)
+            "translate": (True, 0.0, 0.9),  # image translation (+/- fraction)
+            "scale": (True, 0.0, 0.9),  # image scale (+/- gain)
+            "shear": (True, 0.0, 10.0),  # image shear (+/- deg)
+            "perspective": (True, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            "flipud": (True, 0.0, 1.0),  # image flip up-down (probability)
+            "fliplr": (True, 0.0, 1.0),  # image flip left-right (probability)
+            "mosaic": (True, 0.0, 1.0),  # image mixup (probability)
+            "mixup": (True, 0.0, 1.0),  # image mixup (probability)
+            "copy_paste": (True, 0.0, 1.0),
+        }  # segment copy-paste (probability)
+        # GA configs
+        pop_size = 50
+        mutation_rate_min = 0.01
+        mutation_rate_max = 0.5
+        crossover_rate_min = 0.5
+        crossover_rate_max = 1
+        min_elite_size = 2
+        max_elite_size = 5
+        tournament_size_min = 2
+        tournament_size_max = 10
+        with open(opt.hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if "anchors" not in hyp:  # anchors commented in hyp.yaml
+                hyp["anchors"] = 3
+        if opt.noautoanchor:
+            del hyp["anchors"], meta["anchors"]
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / "hyp_evolve.yaml", save_dir / "evolve.csv"
+        if opt.bucket:
+            # download evolve.csv if exists
+            subprocess.run(
+                [
+                    "gsutil",
+                    "cp",
+                    f"gs://{opt.bucket}/evolve.csv",
+                    str(evolve_csv),
+                ]
+            )
+        # Delete the items in meta dictionary whose first value is False
+        del_ = [item for item, value_ in meta.items() if value_[0] is False]
+        hyp_GA = hyp.copy()  # Make a copy of hyp dictionary
+        for item in del_:
+            del meta[item]  # Remove the item from meta dictionary
+            del hyp_GA[item]  # Remove the item from hyp_GA dictionary
+        # Set lower_limit and upper_limit arrays to hold the search space boundaries
+        lower_limit = np.array([meta[k][1] for k in hyp_GA.keys()])
+        upper_limit = np.array([meta[k][2] for k in hyp_GA.keys()])
+        # Create gene_ranges list to hold the range of values for each gene in the population
+        gene_ranges = [(lower_limit[i], upper_limit[i]) for i in range(len(upper_limit))]
+        # Initialize the population with initial_values or random values
+        initial_values = []
+        # If resuming evolution from a previous checkpoint
+        if opt.resume_evolve is not None:
+            assert os.path.isfile(ROOT / opt.resume_evolve), "evolve population path is wrong!"
+            with open(ROOT / opt.resume_evolve, errors="ignore") as f:
+                evolve_population = yaml.safe_load(f)
+                for value in evolve_population.values():
+                    value = np.array([value[k] for k in hyp_GA.keys()])
+                    initial_values.append(list(value))
+        # If not resuming from a previous checkpoint, generate initial values from .yaml files in opt.evolve_population
+        else:
+            yaml_files = [f for f in os.listdir(opt.evolve_population) if f.endswith(".yaml")]
+            for file_name in yaml_files:
+                with open(os.path.join(opt.evolve_population, file_name)) as yaml_file:
+                    value = yaml.safe_load(yaml_file)
+                    value = np.array([value[k] for k in hyp_GA.keys()])
+                    initial_values.append(list(value))
+        # Generate random values within the search space for the rest of the population
+        if initial_values is None:
+            population = [generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size)]
+        elif pop_size > 1:
+            population = [generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size - len(initial_values))]
+            for initial_value in initial_values:
+                population = [initial_value] + population
+        # Run the genetic algorithm for a fixed number of generations
+        list_keys = list(hyp_GA.keys())
+        for generation in range(opt.evolve):
+            if generation >= 1:
+                save_dict = {}
+                for i in range(len(population)):
+                    little_dict = {list_keys[j]: float(population[i][j]) for j in range(len(population[i]))}
+                    save_dict[f"gen{str(generation)}number{str(i)}"] = little_dict
+                with open(save_dir / "evolve_population.yaml", "w") as outfile:
+                    yaml.dump(save_dict, outfile, default_flow_style=False)
+            # Adaptive elite size
+            elite_size = min_elite_size + int((max_elite_size - min_elite_size) * (generation / opt.evolve))
+            # Evaluate the fitness of each individual in the population
+            fitness_scores = []
+            for individual in population:
+                for key, value in zip(hyp_GA.keys(), individual):
+                    hyp_GA[key] = value
+                hyp.update(hyp_GA)
+                results = train(hyp.copy(), opt, device, callbacks)
+                callbacks = Callbacks()
+                # Write mutation results
+                keys = (
+                    "metrics/precision",
+                    "metrics/recall",
+                    "metrics/mAP_0.5",
+                    "metrics/mAP_0.5:0.95",
+                    "val/box_loss",
+                    "val/obj_loss",
+                    "val/cls_loss",
+                )
+                print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket)
+                fitness_scores.append(results[2])
+            # Select the fittest individuals for reproduction using adaptive tournament selection
+            selected_indices = []
+            for _ in range(pop_size - elite_size):
+                # Adaptive tournament size
+                tournament_size = max(
+                    max(2, tournament_size_min),
+                    int(min(tournament_size_max, pop_size) - (generation / (opt.evolve / 10))),
+                )
+                # Perform tournament selection to choose the best individual
+                tournament_indices = random.sample(range(pop_size), tournament_size)
+                tournament_fitness = [fitness_scores[j] for j in tournament_indices]
+                winner_index = tournament_indices[tournament_fitness.index(max(tournament_fitness))]
+                selected_indices.append(winner_index)
+            # Add the elite individuals to the selected indices
+            elite_indices = [i for i in range(pop_size) if fitness_scores[i] in sorted(fitness_scores)[-elite_size:]]
+            selected_indices.extend(elite_indices)
+            # Create the next generation through crossover and mutation
+            next_generation = []
+            for _ in range(pop_size):
+                parent1_index = selected_indices[random.randint(0, pop_size - 1)]
+                parent2_index = selected_indices[random.randint(0, pop_size - 1)]
+                # Adaptive crossover rate
+                crossover_rate = max(
+                    crossover_rate_min, min(crossover_rate_max, crossover_rate_max - (generation / opt.evolve))
+                )
+                if random.uniform(0, 1) < crossover_rate:
+                    crossover_point = random.randint(1, len(hyp_GA) - 1)
+                    child = population[parent1_index][:crossover_point] + population[parent2_index][crossover_point:]
+                else:
+                    child = population[parent1_index]
+                # Adaptive mutation rate
+                mutation_rate = max(
+                    mutation_rate_min, min(mutation_rate_max, mutation_rate_max - (generation / opt.evolve))
+                )
+                for j in range(len(hyp_GA)):
+                    if random.uniform(0, 1) < mutation_rate:
+                        child[j] += random.uniform(-0.1, 0.1)
+                        child[j] = min(max(child[j], gene_ranges[j][0]), gene_ranges[j][1])
+                next_generation.append(child)
+            # Replace the old population with the new generation
+            population = next_generation
+        # Print the best solution found
+        best_index = fitness_scores.index(max(fitness_scores))
+        best_individual = population[best_index]
+        print("Best solution found:", best_individual)
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(
+            f'Hyperparameter evolution finished {opt.evolve} generations\n'
+            f"Results saved to {colorstr('bold', save_dir)}\n"
+            f'Usage example: $ python train.py --hyp {evolve_yaml}'
+        )
+def generate_individual(input_ranges, individual_length):
+    """Generates a list of random values within specified input ranges for each gene in the individual."""
+    individual = []
+    for i in range(individual_length):
+        lower_bound, upper_bound = input_ranges[i]
+        individual.append(random.uniform(lower_bound, upper_bound))
+    return individual
+def run(**kwargs):
+    """
+    Executes YOLOv5 training with given options, overriding with any kwargs provided.
+    Example: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    """
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/trt_utils/__init__.py
+++ b/trt_utils/__init__.py
--- a/trt_utils/trt.py
+++ b/trt_utils/trt.py
+import pycuda.driver as cuda
+import pycuda.autoinit
+import tensorrt as trt
+import numpy as np
+class HostDeviceMem(object):
+    def __init__(self, host_mem, device_mem):
+        self.host = host_mem
+        self.device = device_mem
+    def __str__(self):
+        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
+    def __repr__(self):
+        return self.__str__()
+class TrtModel:
+    def __init__(self,
+                 engine_path,
+                 max_batch_size=1,
+                 dtype=np.float32):
+        self.engine_path = engine_path
+        self.dtype = dtype
+        self.logger = trt.Logger(trt.Logger.ERROR)
+        self.runtime = trt.Runtime(self.logger)
+        self.engine = self.load_engine(self.runtime, self.engine_path)
+        self.max_batch_size = max_batch_size
+        self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
+        self.context = self.engine.create_execution_context()
+    @staticmethod
+    def load_engine(trt_runtime, engine_path):
+        trt.init_libnvinfer_plugins(None, "")             
+        with open(engine_path, 'rb') as f:
+            engine_data = f.read()
+        engine = trt_runtime.deserialize_cuda_engine(engine_data)
+        return engine
+    def allocate_buffers(self):
+        inputs = []
+        outputs = []
+        bindings = []
+        stream = cuda.Stream()
+        for binding in self.engine:
+            size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
+            host_mem = cuda.pagelocked_empty(size, self.dtype)
+            device_mem = cuda.mem_alloc(host_mem.nbytes)
+            bindings.append(int(device_mem))
+            if self.engine.binding_is_input(binding):
+                inputs.append(HostDeviceMem(host_mem, device_mem))
+            else:
+                outputs.append(HostDeviceMem(host_mem, device_mem))
+        return inputs, outputs, bindings, stream
+    def __call__(self,
+                 x,
+                 batch_size=1):
+        x = x.astype(self.dtype)
+        np.copyto(self.inputs[0].host, x.ravel())
+        for inp in self.inputs:
+            cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
+        self.context.execute_async(batch_size=batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
+        for out in self.outputs:
+            cuda.memcpy_dtoh_async(out.host, out.device, self.stream) 
+        self.stream.synchronize()
+        return [out.host.reshape(batch_size,-1) for out in self.outputs]
\ No newline at end of file
--- a/tutorial.ipynb
+++ b/tutorial.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "YOLOv5 Tutorial",
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t6MPjfT5NrKQ"
+      },
+      "source": [
+        "<div align=\"center\">\n",
+        "\n",
+        "  <a href=\"https://ultralytics.com/yolov5\" target=\"_blank\">\n",
+        "    <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png\"></a>\n",
+        "\n",
+        "[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
+        "\n",
+        "  <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a>\n",
+        "  <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
+        "  <a href=\"https://www.kaggle.com/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
+        "\n",
+        "This <a href=\"https://github.com/ultralytics/yolov5\">YOLOv5</a> 🚀 notebook by <a href=\"https://ultralytics.com\">Ultralytics</a> presents simple train, validate and predict examples to help start your AI adventure.<br>We hope that the resources in this notebook will help you get the most out of YOLOv5. Please browse the YOLOv5 <a href=\"https://docs.ultralytics.com/yolov5\">Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/yolov5\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
+        "\n",
+        "</div>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7mGmQbAO5pQb"
+      },
+      "source": [
+        "# Setup\n",
+        "\n",
+        "Clone GitHub [repository](https://github.com/ultralytics/yolov5), install [dependencies](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) and check PyTorch and GPU."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "wbvMlHd_QwMG",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e8225db4-e61d-4640-8b1f-8bfce3331cea"
+      },
+      "source": [
+        "!git clone https://github.com/ultralytics/yolov5  # clone\n",
+        "%cd yolov5\n",
+        "%pip install -qr requirements.txt comet_ml  # install\n",
+        "\n",
+        "import torch\n",
+        "import utils\n",
+        "display = utils.notebook_init()  # checks"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "YOLOv5 🚀 v7.0-136-g71244ae Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 23.3/166.8 GB disk)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4JnkELT0cIJg"
+      },
+      "source": [
+        "# 1. Detect\n",
+        "\n",
+        "`detect.py` runs YOLOv5 inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/detect`. Example inference sources are:\n",
+        "\n",
+        "```shell\n",
+        "python detect.py --source 0  # webcam\n",
+        "                          img.jpg  # image\n",
+        "                          vid.mp4  # video\n",
+        "                          screen  # screenshot\n",
+        "                          path/  # directory\n",
+        "                         'path/*.jpg'  # glob\n",
+        "                         'https://youtu.be/LNwODJXcvt4'  # YouTube\n",
+        "                         'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zR9ZbuQCH7FX",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "284ef04b-1596-412f-88f6-948828dd2b49"
+      },
+      "source": [
+        "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images\n",
+        "# display.Image(filename='runs/detect/exp/zidane.jpg', width=600)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['yolov5s.pt'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1\n",
+            "YOLOv5 🚀 v7.0-136-g71244ae Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n",
+            "\n",
+            "Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...\n",
+            "100% 14.1M/14.1M [00:00<00:00, 24.5MB/s]\n",
+            "\n",
+            "Fusing layers... \n",
+            "YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients\n",
+            "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, 41.5ms\n",
+            "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, 60.0ms\n",
+            "Speed: 0.5ms pre-process, 50.8ms inference, 37.7ms NMS per image at shape (1, 3, 640, 640)\n",
+            "Results saved to \u001b[1mruns/detect/exp\u001b[0m\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hkAzDWJ7cWTr"
+      },
+      "source": [
+        "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\n",
+        "<img align=\"left\" src=\"https://user-images.githubusercontent.com/26833433/127574988-6a558aa1-d268-44b9-bf6b-62d4c605cc72.jpg\" width=\"600\">"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0eq1SMWl6Sfn"
+      },
+      "source": [
+        "# 2. Validate\n",
+        "Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WQPtK1QYVaD_",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "cf7d52f0-281c-4c96-a488-79f5908f8426"
+      },
+      "source": [
+        "# Download COCO val\n",
+        "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')  # download (780M - 5000 images)\n",
+        "!unzip -q tmp.zip -d ../datasets && rm tmp.zip  # unzip"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 780M/780M [00:12<00:00, 66.6MB/s]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "X58w8JLpMnjH",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "3e234e05-ee8b-4ad1-b1a4-f6a55d5e4f3d"
+      },
+      "source": [
+        "# Validate YOLOv5s on COCO val\n",
+        "!python val.py --weights yolov5s.pt --data coco.yaml --img 640 --half"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[34m\u001b[1mval: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5s.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False\n",
+            "YOLOv5 🚀 v7.0-136-g71244ae Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n",
+            "\n",
+            "Fusing layers... \n",
+            "YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco/val2017... 4952 images, 48 backgrounds, 0 corrupt: 100% 5000/5000 [00:02<00:00, 2024.59it/s]\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco/val2017.cache\n",
+            "                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 157/157 [01:25<00:00,  1.84it/s]\n",
+            "                   all       5000      36335      0.671      0.519      0.566      0.371\n",
+            "Speed: 0.1ms pre-process, 3.1ms inference, 2.3ms NMS per image at shape (32, 3, 640, 640)\n",
+            "\n",
+            "Evaluating pycocotools mAP... saving runs/val/exp/yolov5s_predictions.json...\n",
+            "loading annotations into memory...\n",
+            "Done (t=0.43s)\n",
+            "creating index...\n",
+            "index created!\n",
+            "Loading and preparing results...\n",
+            "DONE (t=5.32s)\n",
+            "creating index...\n",
+            "index created!\n",
+            "Running per image evaluation...\n",
+            "Evaluate annotation type *bbox*\n",
+            "DONE (t=78.89s).\n",
+            "Accumulating evaluation results...\n",
+            "DONE (t=14.51s).\n",
+            " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374\n",
+            " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.572\n",
+            " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.402\n",
+            " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211\n",
+            " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.423\n",
+            " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.489\n",
+            " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.311\n",
+            " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.516\n",
+            " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.566\n",
+            " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.378\n",
+            " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.625\n",
+            " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.722\n",
+            "Results saved to \u001b[1mruns/val/exp\u001b[0m\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZY2VXXXu74w5"
+      },
+      "source": [
+        "# 3. Train\n",
+        "\n",
+        "<p align=\"\"><a href=\"https://bit.ly/ultralytics_hub\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/im/integrations-loop.png\"/></a></p>\n",
+        "Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n",
+        "<br><br>\n",
+        "\n",
+        "Train a YOLOv5s model on the [COCO128](https://www.kaggle.com/ultralytics/coco128) dataset with `--data coco128.yaml`, starting from pretrained `--weights yolov5s.pt`, or from randomly initialized `--weights '' --cfg yolov5s.yaml`.\n",
+        "\n",
+        "- **Pretrained [Models](https://github.com/ultralytics/yolov5/tree/master/models)** are downloaded\n",
+        "automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases)\n",
+        "- **[Datasets](https://github.com/ultralytics/yolov5/tree/master/data)** available for autodownload include: [COCO](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml), [COCO128](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml), [Argoverse](https://github.com/ultralytics/yolov5/blob/master/data/Argoverse.yaml), [VisDrone](https://github.com/ultralytics/yolov5/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov5/blob/master/data/GlobalWheat2020.yaml), [xView](https://github.com/ultralytics/yolov5/blob/master/data/xView.yaml), [Objects365](https://github.com/ultralytics/yolov5/blob/master/data/Objects365.yaml), [SKU-110K](https://github.com/ultralytics/yolov5/blob/master/data/SKU-110K.yaml).\n",
+        "- **Training Results** are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc.\n",
+        "<br>\n",
+        "\n",
+        "A **Mosaic Dataloader** is used for training which combines 4 images into 1 mosaic.\n",
+        "\n",
+        "## Label a dataset on Roboflow (optional)\n",
+        "\n",
+        "[Roboflow](https://roboflow.com/?ref=ultralytics) enables you to easily **organize, label, and prepare** a high quality dataset with your own custom data. Roboflow also makes it easy to establish an active learning pipeline, collaborate with your team on dataset improvement, and integrate directly into your model building workflow with the `roboflow` pip package."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Select YOLOv5 🚀 logger {run: 'auto'}\n",
+        "logger = 'Comet' #@param ['Comet', 'ClearML', 'TensorBoard']\n",
+        "\n",
+        "if logger == 'Comet':\n",
+        "  %pip install -q comet_ml\n",
+        "  import comet_ml; comet_ml.init()\n",
+        "elif logger == 'ClearML':\n",
+        "  %pip install -q clearml\n",
+        "  import clearml; clearml.browser_login()\n",
+        "elif logger == 'TensorBoard':\n",
+        "  %load_ext tensorboard\n",
+        "  %tensorboard --logdir runs/train"
+      ],
+      "metadata": {
+        "id": "i3oKtE4g-aNn"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1NcFxRcFdJ_O",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "bbeeea2b-04fc-4185-aa64-258690495b5a"
+      },
+      "source": [
+        "# Train YOLOv5s on COCO128 for 3 epochs\n",
+        "!python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "2023-04-09 14:11:38.063605: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+            "To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+            "2023-04-09 14:11:39.026661: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov5s.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest\n",
+            "\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
+            "YOLOv5 🚀 v7.0-136-g71244ae Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n",
+            "\n",
+            "\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0\n",
+            "\u001b[34m\u001b[1mClearML: \u001b[0mrun 'pip install clearml' to automatically track, visualize and remotely train YOLOv5 🚀 in ClearML\n",
+            "\u001b[34m\u001b[1mComet: \u001b[0mrun 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet\n",
+            "\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/\n",
+            "\n",
+            "Dataset not found ⚠️, missing paths ['/content/datasets/coco128/images/train2017']\n",
+            "Downloading https://ultralytics.com/assets/coco128.zip to coco128.zip...\n",
+            "100% 6.66M/6.66M [00:00<00:00, 75.6MB/s]\n",
+            "Dataset download success ✅ (0.6s), saved to \u001b[1m/content/datasets\u001b[0m\n",
+            "\n",
+            "                 from  n    params  module                                  arguments                     \n",
+            "  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              \n",
+            "  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                \n",
+            "  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   \n",
+            "  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               \n",
+            "  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 \n",
+            "  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              \n",
+            "  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 \n",
+            "  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              \n",
+            "  8                -1  1   1182720  models.common.C3                        [512, 512, 1]                 \n",
+            "  9                -1  1    656896  models.common.SPPF                      [512, 512, 5]                 \n",
+            " 10                -1  1    131584  models.common.Conv                      [512, 256, 1, 1]              \n",
+            " 11                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          \n",
+            " 12           [-1, 6]  1         0  models.common.Concat                    [1]                           \n",
+            " 13                -1  1    361984  models.common.C3                        [512, 256, 1, False]          \n",
+            " 14                -1  1     33024  models.common.Conv                      [256, 128, 1, 1]              \n",
+            " 15                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          \n",
+            " 16           [-1, 4]  1         0  models.common.Concat                    [1]                           \n",
+            " 17                -1  1     90880  models.common.C3                        [256, 128, 1, False]          \n",
+            " 18                -1  1    147712  models.common.Conv                      [128, 128, 3, 2]              \n",
+            " 19          [-1, 14]  1         0  models.common.Concat                    [1]                           \n",
+            " 20                -1  1    296448  models.common.C3                        [256, 256, 1, False]          \n",
+            " 21                -1  1    590336  models.common.Conv                      [256, 256, 3, 2]              \n",
+            " 22          [-1, 10]  1         0  models.common.Concat                    [1]                           \n",
+            " 23                -1  1   1182720  models.common.C3                        [512, 512, 1, False]          \n",
+            " 24      [17, 20, 23]  1    229245  models.yolo.Detect                      [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]\n",
+            "Model summary: 214 layers, 7235389 parameters, 7235389 gradients, 16.6 GFLOPs\n",
+            "\n",
+            "Transferred 349/349 items from yolov5s.pt\n",
+            "\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed ✅\n",
+            "\u001b[34m\u001b[1moptimizer:\u001b[0m SGD(lr=0.01) with parameter groups 57 weight(decay=0.0), 60 weight(decay=0.0005), 60 bias\n",
+            "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco128/labels/train2017... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<00:00, 1709.36it/s]\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco128/labels/train2017.cache\n",
+            "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:00<00:00, 264.35it/s]\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<?, ?it/s]\n",
+            "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:01<00:00, 107.05it/s]\n",
+            "\n",
+            "\u001b[34m\u001b[1mAutoAnchor: \u001b[0m4.27 anchors/target, 0.994 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅\n",
+            "Plotting labels to runs/train/exp/labels.jpg... \n",
+            "Image sizes 640 train, 640 val\n",
+            "Using 2 dataloader workers\n",
+            "Logging results to \u001b[1mruns/train/exp\u001b[0m\n",
+            "Starting training for 3 epochs...\n",
+            "\n",
+            "      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size\n",
+            "        0/2      3.91G    0.04618    0.07209    0.01703        232        640: 100% 8/8 [00:09<00:00,  1.17s/it]\n",
+            "                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 4/4 [00:01<00:00,  2.01it/s]\n",
+            "                   all        128        929      0.667      0.602       0.68       0.45\n",
+            "\n",
+            "      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size\n",
+            "        1/2      4.76G    0.04622    0.06891    0.01817        201        640: 100% 8/8 [00:02<00:00,  3.78it/s]\n",
+            "                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 4/4 [00:01<00:00,  2.16it/s]\n",
+            "                   all        128        929      0.709      0.645      0.722      0.478\n",
+            "\n",
+            "      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size\n",
+            "        2/2      4.76G     0.0436     0.0647    0.01698        227        640: 100% 8/8 [00:01<00:00,  4.19it/s]\n",
+            "                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 4/4 [00:01<00:00,  2.95it/s]\n",
+            "                   all        128        929      0.761      0.647      0.735       0.49\n",
+            "\n",
+            "3 epochs completed in 0.006 hours.\n",
+            "Optimizer stripped from runs/train/exp/weights/last.pt, 14.8MB\n",
+            "Optimizer stripped from runs/train/exp/weights/best.pt, 14.8MB\n",
+            "\n",
+            "Validating runs/train/exp/weights/best.pt...\n",
+            "Fusing layers... \n",
+            "Model summary: 157 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs\n",
+            "                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 4/4 [00:06<00:00,  1.56s/it]\n",
+            "                   all        128        929      0.759      0.646      0.734       0.49\n",
+            "                person        128        254      0.857      0.706      0.805      0.525\n",
+            "               bicycle        128          6      0.773      0.577      0.725      0.414\n",
+            "                   car        128         46      0.664      0.435      0.551       0.24\n",
+            "            motorcycle        128          5      0.587        0.8      0.837      0.635\n",
+            "              airplane        128          6          1      0.989      0.995      0.715\n",
+            "                   bus        128          7      0.635      0.714      0.753      0.651\n",
+            "                 train        128          3      0.686      0.333       0.72      0.504\n",
+            "                 truck        128         12      0.604      0.333      0.472      0.259\n",
+            "                  boat        128          6      0.938      0.333      0.449      0.177\n",
+            "         traffic light        128         14      0.778      0.255      0.401      0.217\n",
+            "             stop sign        128          2      0.826          1      0.995      0.895\n",
+            "                 bench        128          9      0.711      0.556      0.661      0.313\n",
+            "                  bird        128         16      0.962          1      0.995      0.642\n",
+            "                   cat        128          4      0.868          1      0.995      0.754\n",
+            "                   dog        128          9          1      0.652      0.899      0.651\n",
+            "                 horse        128          2      0.853          1      0.995      0.622\n",
+            "              elephant        128         17      0.909      0.882      0.934      0.698\n",
+            "                  bear        128          1      0.696          1      0.995      0.995\n",
+            "                 zebra        128          4      0.855          1      0.995      0.905\n",
+            "               giraffe        128          9      0.788      0.828      0.912      0.701\n",
+            "              backpack        128          6      0.835        0.5      0.738      0.311\n",
+            "              umbrella        128         18      0.785      0.814      0.859       0.48\n",
+            "               handbag        128         19      0.759      0.263      0.366      0.205\n",
+            "                   tie        128          7      0.983      0.714       0.77      0.492\n",
+            "              suitcase        128          4      0.656          1      0.945      0.631\n",
+            "               frisbee        128          5      0.721        0.8      0.759      0.724\n",
+            "                  skis        128          1      0.737          1      0.995        0.3\n",
+            "             snowboard        128          7      0.829      0.696       0.83      0.537\n",
+            "           sports ball        128          6      0.637      0.667      0.602      0.311\n",
+            "                  kite        128         10      0.636        0.6      0.599      0.226\n",
+            "          baseball bat        128          4      0.501       0.25      0.468      0.205\n",
+            "        baseball glove        128          7      0.483      0.429      0.465      0.292\n",
+            "            skateboard        128          5      0.932        0.6      0.687      0.493\n",
+            "         tennis racket        128          7       0.77      0.429      0.547      0.332\n",
+            "                bottle        128         18      0.577      0.379      0.554      0.276\n",
+            "            wine glass        128         16      0.704      0.875       0.89       0.51\n",
+            "                   cup        128         36      0.841      0.667      0.837      0.533\n",
+            "                  fork        128          6      0.992      0.333       0.45      0.315\n",
+            "                 knife        128         16      0.768      0.688      0.695      0.403\n",
+            "                 spoon        128         22      0.838       0.47      0.639      0.384\n",
+            "                  bowl        128         28      0.764       0.58      0.716      0.513\n",
+            "                banana        128          1      0.902          1      0.995      0.301\n",
+            "              sandwich        128          2          1          0      0.359      0.326\n",
+            "                orange        128          4      0.722       0.75      0.912      0.581\n",
+            "              broccoli        128         11      0.547      0.364      0.432      0.317\n",
+            "                carrot        128         24      0.619      0.625      0.724      0.495\n",
+            "               hot dog        128          2      0.409          1      0.828      0.762\n",
+            "                 pizza        128          5      0.833      0.995      0.962      0.727\n",
+            "                 donut        128         14      0.631          1       0.96      0.839\n",
+            "                  cake        128          4       0.87          1      0.995       0.83\n",
+            "                 chair        128         35      0.583        0.6      0.608      0.317\n",
+            "                 couch        128          6      0.907      0.667      0.815      0.544\n",
+            "          potted plant        128         14      0.739      0.786      0.823       0.48\n",
+            "                   bed        128          3      0.985      0.333       0.83      0.441\n",
+            "          dining table        128         13      0.821      0.357      0.578      0.342\n",
+            "                toilet        128          2          1      0.988      0.995      0.846\n",
+            "                    tv        128          2       0.57          1      0.995      0.796\n",
+            "                laptop        128          3          1          0      0.593      0.312\n",
+            "                 mouse        128          2          1          0      0.089     0.0445\n",
+            "                remote        128          8          1      0.624      0.634      0.538\n",
+            "            cell phone        128          8      0.622      0.417      0.421      0.187\n",
+            "             microwave        128          3      0.711          1      0.995      0.766\n",
+            "                  oven        128          5      0.329        0.4       0.43      0.282\n",
+            "                  sink        128          6      0.437      0.333      0.338      0.265\n",
+            "          refrigerator        128          5      0.567        0.8      0.799      0.536\n",
+            "                  book        128         29      0.597      0.257      0.349      0.154\n",
+            "                 clock        128          9      0.765      0.889      0.932      0.736\n",
+            "                  vase        128          2       0.33          1      0.995      0.895\n",
+            "              scissors        128          1          1          0      0.497     0.0498\n",
+            "            teddy bear        128         21      0.856      0.569      0.841      0.547\n",
+            "            toothbrush        128          5        0.8          1      0.928      0.574\n",
+            "Results saved to \u001b[1mruns/train/exp\u001b[0m\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "15glLzbQx5u0"
+      },
+      "source": [
+        "# 4. Visualize"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Comet Logging and Visualization 🌟 NEW\n",
+        "\n",
+        "[Comet](https://www.comet.com/site/lp/yolov5-with-comet/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab) is now fully integrated with YOLOv5. Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://www.comet.com/docs/v2/guides/comet-dashboard/code-panels/about-panels/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab)! Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!\n",
+        "\n",
+        "Getting started is easy:\n",
+        "```shell\n",
+        "pip install comet_ml  # 1. install\n",
+        "export COMET_API_KEY=<Your API Key>  # 2. paste API key\n",
+        "python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt  # 3. train\n",
+        "```\n",
+        "To learn more about all of the supported Comet features for this integration, check out the [Comet Tutorial](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration). If you'd like to learn more about Comet, head over to our [documentation](https://www.comet.com/docs/v2/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab). Get started by trying out the Comet Colab Notebook:\n",
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)\n",
+        "\n",
+        "<a href=\"https://bit.ly/yolov5-readme-comet2\">\n",
+        "<img alt=\"Comet Dashboard\" src=\"https://user-images.githubusercontent.com/26833433/202851203-164e94e1-2238-46dd-91f8-de020e9d6b41.png\" width=\"1280\"/></a>"
+      ],
+      "metadata": {
+        "id": "nWOsI5wJR1o3"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ClearML Logging and Automation 🌟 NEW\n",
+        "\n",
+        "[ClearML](https://cutt.ly/yolov5-notebook-clearml) is completely integrated into YOLOv5 to track your experimentation, manage dataset versions and even remotely execute training runs. To enable ClearML (check cells above):\n",
+        "\n",
+        "- `pip install clearml`\n",
+        "- run `clearml-init` to connect to a ClearML server (**deploy your own [open-source server](https://github.com/allegroai/clearml-server)**, or use our [free hosted server](https://cutt.ly/yolov5-notebook-clearml))\n",
+        "\n",
+        "You'll get all the great expected features from an experiment manager: live updates, model upload, experiment comparison etc. but ClearML also tracks uncommitted changes and installed packages for example. Thanks to that ClearML Tasks (which is what we call experiments) are also reproducible on different machines! With only 1 extra line, we can schedule a YOLOv5 training task on a queue to be executed by any number of ClearML Agents (workers).\n",
+        "\n",
+        "You can use ClearML Data to version your dataset and then pass it to YOLOv5 simply using its unique ID. This will help you keep track of your data without adding extra hassle. Explore the [ClearML Tutorial](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration) for details!\n",
+        "\n",
+        "<a href=\"https://cutt.ly/yolov5-notebook-clearml\">\n",
+        "<img alt=\"ClearML Experiment Management UI\" src=\"https://github.com/thepycoder/clearml_screenshots/raw/main/scalars.jpg\" width=\"1280\"/></a>"
+      ],
+      "metadata": {
+        "id": "Lay2WsTjNJzP"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-WPvRbS5Swl6"
+      },
+      "source": [
+        "## Local Logging\n",
+        "\n",
+        "Training results are automatically logged with [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) loggers to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc.\n",
+        "\n",
+        "This directory contains train and val statistics, mosaics, labels, predictions and augmentated mosaics, as well as metrics and charts including precision-recall (PR) curves and confusion matrices.\n",
+        "\n",
+        "<img alt=\"Local logging results\" src=\"https://user-images.githubusercontent.com/26833433/183222430-e1abd1b7-782c-4cde-b04d-ad52926bf818.jpg\" width=\"1280\"/>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Zelyeqbyt3GD"
+      },
+      "source": [
+        "# Environments\n",
+        "\n",
+        "YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n",
+        "\n",
+        "- **Notebooks** with free GPU: <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a> <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a> <a href=\"https://www.kaggle.com/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
+        "- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/)\n",
+        "- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/)\n",
+        "- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) <a href=\"https://hub.docker.com/r/ultralytics/yolov5\"><img src=\"https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker\" alt=\"Docker Pulls\"></a>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6Qu7Iesl0p54"
+      },
+      "source": [
+        "# Status\n",
+        "\n",
+        "![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)\n",
+        "\n",
+        "If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on macOS, Windows, and Ubuntu every 24 hours and on every commit.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IEijrePND_2I"
+      },
+      "source": [
+        "# Appendix\n",
+        "\n",
+        "Additional content below."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GMusP4OAxFu6"
+      },
+      "source": [
+        "# YOLOv5 PyTorch HUB Inference (DetectionModels only)\n",
+        "import torch\n",
+        "\n",
+        "model = torch.hub.load('ultralytics/yolov5', 'yolov5s', force_reload=True, trust_repo=True)  # or yolov5n - yolov5x6 or custom\n",
+        "im = 'https://ultralytics.com/images/zidane.jpg'  # file, Path, PIL.Image, OpenCV, nparray, list\n",
+        "results = model(im)  # inference\n",
+        "results.print()  # or .show(), .save(), .crop(), .pandas(), etc."
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
--- a/utils/__init__.py
+++ b/utils/__init__.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""utils/initialization."""
+import contextlib
+import platform
+import threading
+def emojis(str=""):
+    """Returns an emoji-safe version of a string, stripped of emojis on Windows platforms."""
+    return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
+class TryExcept(contextlib.ContextDecorator):
+    # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
+    def __init__(self, msg=""):
+        """Initializes TryExcept with an optional message, used as a decorator or context manager for error handling."""
+        self.msg = msg
+    def __enter__(self):
+        """Enter the runtime context related to this object for error handling with an optional message."""
+        pass
+    def __exit__(self, exc_type, value, traceback):
+        """Context manager exit method that prints an error message with emojis if an exception occurred, always returns
+        True.
+        """
+        if value:
+            print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
+        return True
+def threaded(func):
+    """Decorator @threaded to run a function in a separate thread, returning the thread instance."""
+    def wrapper(*args, **kwargs):
+        thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
+        thread.start()
+        return thread
+    return wrapper
+def join_threads(verbose=False):
+    """
+    Joins all daemon threads, optionally printing their names if verbose is True.
+    Example: atexit.register(lambda: join_threads())
+    """
+    main_thread = threading.current_thread()
+    for t in threading.enumerate():
+        if t is not main_thread:
+            if verbose:
+                print(f"Joining thread {t.name}")
+            t.join()
+def notebook_init(verbose=True):
+    """Initializes notebook environment by checking requirements, cleaning up, and displaying system info."""
+    print("Checking setup...")
+    import os
+    import shutil
+    from ultralytics.utils.checks import check_requirements
+    from utils.general import check_font, is_colab
+    from utils.torch_utils import select_device  # imports
+    check_font()
+    import psutil
+    if check_requirements("wandb", install=False):
+        os.system("pip uninstall -y wandb")  # eliminate unexpected account creation prompt with infinite hang
+    if is_colab():
+        shutil.rmtree("/content/sample_data", ignore_errors=True)  # remove colab /sample_data directory
+    # System info
+    display = None
+    if verbose:
+        gb = 1 << 30  # bytes to GiB (1024 ** 3)
+        ram = psutil.virtual_memory().total
+        total, used, free = shutil.disk_usage("/")
+        with contextlib.suppress(Exception):  # clear display if ipython is installed
+            from IPython import display
+            display.clear_output()
+        s = f"({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)"
+    else:
+        s = ""
+    select_device(newline=False)
+    print(emojis(f"Setup complete ✅ {s}"))
+    return display
--- a/utils/activations.py
+++ b/utils/activations.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""Activation functions."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class SiLU(nn.Module):
+    @staticmethod
+    def forward(x):
+        """
+        Applies the Sigmoid-weighted Linear Unit (SiLU) activation function.
+        https://arxiv.org/pdf/1606.08415.pdf.
+        """
+        return x * torch.sigmoid(x)
+class Hardswish(nn.Module):
+    @staticmethod
+    def forward(x):
+        """
+        Applies the Hardswish activation function, compatible with TorchScript, CoreML, and ONNX.
+        Equivalent to x * F.hardsigmoid(x)
+        """
+        return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0  # for TorchScript, CoreML and ONNX
+class Mish(nn.Module):
+    """Mish activation https://github.com/digantamisra98/Mish."""
+    @staticmethod
+    def forward(x):
+        """Applies the Mish activation function, a smooth alternative to ReLU."""
+        return x * F.softplus(x).tanh()
+class MemoryEfficientMish(nn.Module):
+    class F(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            """Applies the Mish activation function, a smooth ReLU alternative, to the input tensor `x`."""
+            ctx.save_for_backward(x)
+            return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
+        @staticmethod
+        def backward(ctx, grad_output):
+            """Computes the gradient of the Mish activation function with respect to input `x`."""
+            x = ctx.saved_tensors[0]
+            sx = torch.sigmoid(x)
+            fx = F.softplus(x).tanh()
+            return grad_output * (fx + x * sx * (1 - fx * fx))
+    def forward(self, x):
+        """Applies the Mish activation function to the input tensor `x`."""
+        return self.F.apply(x)
+class FReLU(nn.Module):
+    """FReLU activation https://arxiv.org/abs/2007.11824."""
+    def __init__(self, c1, k=3):  # ch_in, kernel
+        """Initializes FReLU activation with channel `c1` and kernel size `k`."""
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
+        self.bn = nn.BatchNorm2d(c1)
+    def forward(self, x):
+        """
+        Applies FReLU activation with max operation between input and BN-convolved input.
+        https://arxiv.org/abs/2007.11824
+        """
+        return torch.max(x, self.bn(self.conv(x)))
+class AconC(nn.Module):
+    """
+    ACON activation (activate or not) function.
+    AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
+    See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf.
+    """
+    def __init__(self, c1):
+        """Initializes AconC with learnable parameters p1, p2, and beta for channel-wise activation control."""
+        super().__init__()
+        self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
+        self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
+        self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
+    def forward(self, x):
+        """Applies AconC activation function with learnable parameters for channel-wise control on input tensor x."""
+        dpx = (self.p1 - self.p2) * x
+        return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
+class MetaAconC(nn.Module):
+    """
+    ACON activation (activate or not) function.
+    AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
+    See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf.
+    """
+    def __init__(self, c1, k=1, s=1, r=16):
+        """Initializes MetaAconC with params: channel_in (c1), kernel size (k=1), stride (s=1), reduction (r=16)."""
+        super().__init__()
+        c2 = max(r, c1 // r)
+        self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
+        self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
+        self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
+        self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
+        # self.bn1 = nn.BatchNorm2d(c2)
+        # self.bn2 = nn.BatchNorm2d(c1)
+    def forward(self, x):
+        """Applies a forward pass transforming input `x` using learnable parameters and sigmoid activation."""
+        y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
+        # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
+        # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y)))))  # bug/unstable
+        beta = torch.sigmoid(self.fc2(self.fc1(y)))  # bug patch BN layers removed
+        dpx = (self.p1 - self.p2) * x
+        return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""Image augmentation functions."""
+import math
+import random
+import cv2
+import numpy as np
+import torch
+import torchvision.transforms as T
+import torchvision.transforms.functional as TF
+from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
+from utils.metrics import bbox_ioa
+IMAGENET_MEAN = 0.485, 0.456, 0.406  # RGB mean
+IMAGENET_STD = 0.229, 0.224, 0.225  # RGB standard deviation
+class Albumentations:
+    # YOLOv5 Albumentations class (optional, only used if package is installed)
+    def __init__(self, size=640):
+        """Initializes Albumentations class for optional data augmentation in YOLOv5 with specified input size."""
+        self.transform = None
+        prefix = colorstr("albumentations: ")
+        try:
+            import albumentations as A
+            check_version(A.__version__, "1.0.3", hard=True)  # version requirement
+            T = [
+                A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0),
+            ]  # transforms
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
+            LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
+        except ImportError:  # package not installed, skip
+            pass
+        except Exception as e:
+            LOGGER.info(f"{prefix}{e}")
+    def __call__(self, im, labels, p=1.0):
+        """Applies transformations to an image and labels with probability `p`, returning updated image and labels."""
+        if self.transform and random.random() < p:
+            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
+            im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])])
+        return im, labels
+def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
+    """
+    Applies ImageNet normalization to RGB images in BCHW format, modifying them in-place if specified.
+    Example: y = (x - mean) / std
+    """
+    return TF.normalize(x, mean, std, inplace=inplace)
+def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
+    """Reverses ImageNet normalization for BCHW format RGB images by applying `x = x * std + mean`."""
+    for i in range(3):
+        x[:, i] = x[:, i] * std[i] + mean[i]
+    return x
+def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
+    """Applies HSV color-space augmentation to an image with random gains for hue, saturation, and value."""
+    if hgain or sgain or vgain:
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
+        dtype = im.dtype  # uint8
+        x = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
+def hist_equalize(im, clahe=True, bgr=False):
+    """Equalizes image histogram, with optional CLAHE, for BGR or RGB image with shape (n,m,3) and range 0-255."""
+    yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
+    if clahe:
+        c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        yuv[:, :, 0] = c.apply(yuv[:, :, 0])
+    else:
+        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
+    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
+def replicate(im, labels):
+    """
+    Replicates half of the smallest object labels in an image for data augmentation.
+    Returns augmented image and labels.
+    """
+    h, w = im.shape[:2]
+    boxes = labels[:, 1:].astype(int)
+    x1, y1, x2, y2 = boxes.T
+    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)
+    for i in s.argsort()[: round(s.size * 0.5)]:  # smallest indices
+        x1b, y1b, x2b, y2b = boxes[i]
+        bh, bw = y2b - y1b, x2b - x1b
+        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
+        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
+        im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # im4[ymin:ymax, xmin:xmax]
+        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
+    return im, labels
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    """Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+def random_perspective(
+    im, targets=(), segments=(), degrees=10, translate=0.1, scale=0.1, shear=10, perspective=0.0, border=(0, 0)
+):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
+    # targets = [cls, xyxy]
+    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
+    width = im.shape[1] + border[1] * 2
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)
+    # Perspective
+    P = np.eye(3)
+    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
+    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+    # Combined rotation matrix
+    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+        if perspective:
+            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
+        else:  # affine
+            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+    # Visualize
+    # import matplotlib.pyplot as plt
+    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
+    # Transform label coordinates
+    n = len(targets)
+    if n:
+        use_segments = any(x.any() for x in segments) and len(segments) == n
+        new = np.zeros((n, 4))
+        if use_segments:  # warp segments
+            segments = resample_segments(segments)  # upsample
+            for i, segment in enumerate(segments):
+                xy = np.ones((len(segment), 3))
+                xy[:, :2] = segment
+                xy = xy @ M.T  # transform
+                xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
+                # clip
+                new[i] = segment2box(xy, width, height)
+        else:  # warp boxes
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+            # clip
+            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
+            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+        # filter candidates
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        targets = targets[i]
+        targets[:, 1:5] = new[i]
+    return im, targets
+def copy_paste(im, labels, segments, p=0.5):
+    """
+    Applies Copy-Paste augmentation by flipping and merging segments and labels on an image.
+    Details at https://arxiv.org/abs/2012.07177.
+    """
+    n = len(segments)
+    if p and n:
+        h, w, c = im.shape  # height, width, channels
+        im_new = np.zeros(im.shape, np.uint8)
+        for j in random.sample(range(n), k=round(p * n)):
+            l, s = labels[j], segments[j]
+            box = w - l[3], l[2], w - l[1], l[4]
+            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+            if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
+                labels = np.concatenate((labels, [[l[0], *box]]), 0)
+                segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+                cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED)
+        result = cv2.flip(im, 1)  # augment segments (flip left-right)
+        i = cv2.flip(im_new, 1).astype(bool)
+        im[i] = result[i]  # cv2.imwrite('debug.jpg', im)  # debug
+    return im, labels, segments
+def cutout(im, labels, p=0.5):
+    """
+    Applies cutout augmentation to an image with optional label adjustment, using random masks of varying sizes.
+    Details at https://arxiv.org/abs/1708.04552.
+    """
+    if random.random() < p:
+        h, w = im.shape[:2]
+        scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
+        for s in scales:
+            mask_h = random.randint(1, int(h * s))  # create random masks
+            mask_w = random.randint(1, int(w * s))
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+            # apply random color mask
+            im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+            # return unobscured labels
+            if len(labels) and s > 0.03:
+                box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+                ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))  # intersection over area
+                labels = labels[ioa < 0.60]  # remove >60% obscured labels
+    return labels
+def mixup(im, labels, im2, labels2):
+    """
+    Applies MixUp augmentation by blending images and labels.
+    See https://arxiv.org/pdf/1710.09412.pdf for details.
+    """
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    return im, labels
+def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
+    """
+    Filters bounding box candidates by minimum width-height threshold `wh_thr` (pixels), aspect ratio threshold
+    `ar_thr`, and area ratio threshold `area_thr`.
+    box1(4,n) is before augmentation, box2(4,n) is after augmentation.
+    """
+    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
+    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
+def classify_albumentations(
+    augment=True,
+    size=224,
+    scale=(0.08, 1.0),
+    ratio=(0.75, 1.0 / 0.75),  # 0.75, 1.33
+    hflip=0.5,
+    vflip=0.0,
+    jitter=0.4,
+    mean=IMAGENET_MEAN,
+    std=IMAGENET_STD,
+    auto_aug=False,
+):
+    # YOLOv5 classification Albumentations (optional, only used if package is installed)
+    prefix = colorstr("albumentations: ")
+    try:
+        import albumentations as A
+        from albumentations.pytorch import ToTensorV2
+        check_version(A.__version__, "1.0.3", hard=True)  # version requirement
+        if augment:  # Resize and crop
+            T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
+            if auto_aug:
+                # TODO: implement AugMix, AutoAug & RandAug in albumentation
+                LOGGER.info(f"{prefix}auto augmentations are currently not supported")
+            else:
+                if hflip > 0:
+                    T += [A.HorizontalFlip(p=hflip)]
+                if vflip > 0:
+                    T += [A.VerticalFlip(p=vflip)]
+                if jitter > 0:
+                    color_jitter = (float(jitter),) * 3  # repeat value for brightness, contrast, satuaration, 0 hue
+                    T += [A.ColorJitter(*color_jitter, 0)]
+        else:  # Use fixed crop for eval set (reproducibility)
+            T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
+        T += [A.Normalize(mean=mean, std=std), ToTensorV2()]  # Normalize and convert to Tensor
+        LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
+        return A.Compose(T)
+    except ImportError:  # package not installed, skip
+        LOGGER.warning(f"{prefix}⚠️ not found, install with `pip install albumentations` (recommended)")
+    except Exception as e:
+        LOGGER.info(f"{prefix}{e}")
+def classify_transforms(size=224):
+    """Applies a series of transformations including center crop, ToTensor, and normalization for classification."""
+    assert isinstance(size, int), f"ERROR: classify_transforms size {size} must be integer, not (list, tuple)"
+    # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+    return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+class LetterBox:
+    # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+    def __init__(self, size=(640, 640), auto=False, stride=32):
+        """Initializes a LetterBox object for YOLOv5 image preprocessing with optional auto sizing and stride
+        adjustment.
+        """
+        super().__init__()
+        self.h, self.w = (size, size) if isinstance(size, int) else size
+        self.auto = auto  # pass max size integer, automatically solve for short side using stride
+        self.stride = stride  # used with auto
+    def __call__(self, im):
+        """
+        Resizes and pads input image `im` (HWC format) to specified dimensions, maintaining aspect ratio.
+        im = np.array HWC
+        """
+        imh, imw = im.shape[:2]
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old
+        h, w = round(imh * r), round(imw * r)  # resized image
+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+        im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
+        im_out[top : top + h, left : left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
+        return im_out
+class CenterCrop:
+    # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
+    def __init__(self, size=640):
+        """Initializes CenterCrop for image preprocessing, accepting single int or tuple for size, defaults to 640."""
+        super().__init__()
+        self.h, self.w = (size, size) if isinstance(size, int) else size
+    def __call__(self, im):
+        """
+        Applies center crop to the input image and resizes it to a specified size, maintaining aspect ratio.
+        im = np.array HWC
+        """
+        imh, imw = im.shape[:2]
+        m = min(imh, imw)  # min dimension
+        top, left = (imh - m) // 2, (imw - m) // 2
+        return cv2.resize(im[top : top + m, left : left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
+class ToTensor:
+    # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+    def __init__(self, half=False):
+        """Initializes ToTensor for YOLOv5 image preprocessing, with optional half precision (half=True for FP16)."""
+        super().__init__()
+        self.half = half
+    def __call__(self, im):
+        """
+        Converts BGR np.array image from HWC to RGB CHW format, and normalizes to [0, 1], with support for FP16 if
+        `half=True`.
+        im = np.array HWC in BGR order
+        """
+        im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
+        im = torch.from_numpy(im)  # to torch
+        im = im.half() if self.half else im.float()  # uint8 to fp16/32
+        im /= 255.0  # 0-255 to 0.0-1.0
+        return im
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""AutoAnchor utils."""
+import random
+import numpy as np
+import torch
+import yaml
+from tqdm import tqdm
+from utils import TryExcept
+from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr
+PREFIX = colorstr("AutoAnchor: ")
+def check_anchor_order(m):
+    """Checks and corrects anchor order against stride in YOLOv5 Detect() module if necessary."""
+    a = m.anchors.prod(-1).mean(-1).view(-1)  # mean anchor area per output layer
+    da = a[-1] - a[0]  # delta a
+    ds = m.stride[-1] - m.stride[0]  # delta s
+    if da and (da.sign() != ds.sign()):  # same order
+        LOGGER.info(f"{PREFIX}Reversing anchor order")
+        m.anchors[:] = m.anchors.flip(0)
+@TryExcept(f"{PREFIX}ERROR")
+def check_anchors(dataset, model, thr=4.0, imgsz=640):
+    """Evaluates anchor fit to dataset and adjusts if necessary, supporting customizable threshold and image size."""
+    m = model.module.model[-1] if hasattr(model, "module") else model.model[-1]  # Detect()
+    shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
+    scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
+    wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
+    def metric(k):  # compute metric
+        r = wh[:, None] / k[None]
+        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
+        best = x.max(1)[0]  # best_x
+        aat = (x > 1 / thr).float().sum(1).mean()  # anchors above threshold
+        bpr = (best > 1 / thr).float().mean()  # best possible recall
+        return bpr, aat
+    stride = m.stride.to(m.anchors.device).view(-1, 1, 1)  # model strides
+    anchors = m.anchors.clone() * stride  # current anchors
+    bpr, aat = metric(anchors.cpu().view(-1, 2))
+    s = f"\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). "
+    if bpr > 0.98:  # threshold to recompute
+        LOGGER.info(f"{s}Current anchors are a good fit to dataset ✅")
+    else:
+        LOGGER.info(f"{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...")
+        na = m.anchors.numel() // 2  # number of anchors
+        anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
+        new_bpr = metric(anchors)[0]
+        if new_bpr > bpr:  # replace anchors
+            anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
+            m.anchors[:] = anchors.clone().view_as(m.anchors)
+            check_anchor_order(m)  # must be in pixel-space (not grid-space)
+            m.anchors /= stride
+            s = f"{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)"
+        else:
+            s = f"{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)"
+        LOGGER.info(s)
+def kmean_anchors(dataset="./data/coco128.yaml", n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
+    """
+    Creates kmeans-evolved anchors from training dataset.
+    Arguments:
+        dataset: path to data.yaml, or a loaded dataset
+        n: number of anchors
+        img_size: image size used for training
+        thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
+        gen: generations to evolve anchors using genetic algorithm
+        verbose: print all results
+    Return:
+        k: kmeans evolved anchors
+    Usage:
+        from utils.autoanchor import *; _ = kmean_anchors()
+    """
+    from scipy.cluster.vq import kmeans
+    npr = np.random
+    thr = 1 / thr
+    def metric(k, wh):  # compute metrics
+        r = wh[:, None] / k[None]
+        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
+        # x = wh_iou(wh, torch.tensor(k))  # iou metric
+        return x, x.max(1)[0]  # x, best_x
+    def anchor_fitness(k):  # mutation fitness
+        _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
+        return (best * (best > thr).float()).mean()  # fitness
+    def print_results(k, verbose=True):
+        k = k[np.argsort(k.prod(1))]  # sort small to large
+        x, best = metric(k, wh0)
+        bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
+        s = (
+            f"{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n"
+            f"{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, "
+            f"past_thr={x[x > thr].mean():.3f}-mean: "
+        )
+        for x in k:
+            s += "%i,%i, " % (round(x[0]), round(x[1]))
+        if verbose:
+            LOGGER.info(s[:-2])
+        return k
+    if isinstance(dataset, str):  # *.yaml file
+        with open(dataset, errors="ignore") as f:
+            data_dict = yaml.safe_load(f)  # model dict
+        from utils.dataloaders import LoadImagesAndLabels
+        dataset = LoadImagesAndLabels(data_dict["train"], augment=True, rect=True)
+    # Get label wh
+    shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
+    wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
+    # Filter
+    i = (wh0 < 3.0).any(1).sum()
+    if i:
+        LOGGER.info(f"{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size")
+    wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32)  # filter > 2 pixels
+    # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
+    # Kmeans init
+    try:
+        LOGGER.info(f"{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...")
+        assert n <= len(wh)  # apply overdetermined constraint
+        s = wh.std(0)  # sigmas for whitening
+        k = kmeans(wh / s, n, iter=30)[0] * s  # points
+        assert n == len(k)  # kmeans may return fewer points than requested if wh is insufficient or too similar
+    except Exception:
+        LOGGER.warning(f"{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init")
+        k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size  # random init
+    wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
+    k = print_results(k, verbose=False)
+    # Plot
+    # k, d = [None] * 20, [None] * 20
+    # for i in tqdm(range(1, 21)):
+    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
+    # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
+    # ax = ax.ravel()
+    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
+    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
+    # ax[0].hist(wh[wh[:, 0]<100, 0],400)
+    # ax[1].hist(wh[wh[:, 1]<100, 1],400)
+    # fig.savefig('wh.png', dpi=200)
+    # Evolve
+    f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
+    pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT)  # progress bar
+    for _ in pbar:
+        v = np.ones(sh)
+        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
+            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
+        kg = (k.copy() * v).clip(min=2.0)
+        fg = anchor_fitness(kg)
+        if fg > f:
+            f, k = fg, kg.copy()
+            pbar.desc = f"{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}"
+            if verbose:
+                print_results(k, verbose)
+    return print_results(k).astype(np.float32)
--- a/utils/autobatch.py
+++ b/utils/autobatch.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""Auto-batch utils."""
+from copy import deepcopy
+import numpy as np
+import torch
+from utils.general import LOGGER, colorstr
+from utils.torch_utils import profile
+def check_train_batch_size(model, imgsz=640, amp=True):
+    """Checks and computes optimal training batch size for YOLOv5 model, given image size and AMP setting."""
+    with torch.cuda.amp.autocast(amp):
+        return autobatch(deepcopy(model).train(), imgsz)  # compute optimal batch size
+def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
+    """Estimates optimal YOLOv5 batch size using `fraction` of CUDA memory."""
+    # Usage:
+    #     import torch
+    #     from utils.autobatch import autobatch
+    #     model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
+    #     print(autobatch(model))
+    # Check device
+    prefix = colorstr("AutoBatch: ")
+    LOGGER.info(f"{prefix}Computing optimal batch size for --imgsz {imgsz}")
+    device = next(model.parameters()).device  # get model device
+    if device.type == "cpu":
+        LOGGER.info(f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}")
+        return batch_size
+    if torch.backends.cudnn.benchmark:
+        LOGGER.info(f"{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}")
+        return batch_size
+    # Inspect CUDA memory
+    gb = 1 << 30  # bytes to GiB (1024 ** 3)
+    d = str(device).upper()  # 'CUDA:0'
+    properties = torch.cuda.get_device_properties(device)  # device properties
+    t = properties.total_memory / gb  # GiB total
+    r = torch.cuda.memory_reserved(device) / gb  # GiB reserved
+    a = torch.cuda.memory_allocated(device) / gb  # GiB allocated
+    f = t - (r + a)  # GiB free
+    LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free")
+    # Profile batch sizes
+    batch_sizes = [1, 2, 4, 8, 16]
+    try:
+        img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
+        results = profile(img, model, n=3, device=device)
+    except Exception as e:
+        LOGGER.warning(f"{prefix}{e}")
+    # Fit a solution
+    y = [x[2] for x in results if x]  # memory [2]
+    p = np.polyfit(batch_sizes[: len(y)], y, deg=1)  # first degree polynomial fit
+    b = int((f * fraction - p[1]) / p[0])  # y intercept (optimal batch size)
+    if None in results:  # some sizes failed
+        i = results.index(None)  # first fail index
+        if b >= batch_sizes[i]:  # y intercept above failure point
+            b = batch_sizes[max(i - 1, 0)]  # select prior safe point
+    if b < 1 or b > 1024:  # b outside of safe range
+        b = batch_size
+        LOGGER.warning(f"{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.")
+    fraction = (np.polyval(p, b) + r + a) / t  # actual fraction predicted
+    LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅")
+    return b
--- a/utils/aws/__init__.py
+++ b/utils/aws/__init__.py
--- a/utils/aws/mime.sh
+++ b/utils/aws/mime.sh
+# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
+# This script will run on every instance restart, not only on first start
+# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
+Content-Type: multipart/mixed; boundary="//"
+MIME-Version: 1.0
+--//
+Content-Type: text/cloud-config; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment; filename="cloud-config.txt"
+#cloud-config
+cloud_final_modules:
+- [scripts-user, always]
+--//
+Content-Type: text/x-shellscript; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment; filename="userdata.txt"
+#!/bin/bash
+# --- paste contents of userdata.sh here ---
+--//