Commit 5a567950 authored by lidc's avatar lidc
Browse files

yolov5增加了mpi单机多卡和多机多卡启动方式,其readme文件进行了更新,对maskrcnn的debug输出日志进行了删除,并更新了该模型的readme文件

parent a30b77fe
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
""" """
Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit Export a YOLOv5 PyTorch model to TorchScript, ONNX, CoreML, TensorFlow (saved_model, pb, TFLite, TF.js,) formats
TensorFlow exports authored by https://github.com/zldrobit
Format | `export.py --include` | Model
--- | --- | ---
PyTorch | - | yolov5s.pt
TorchScript | `torchscript` | yolov5s.torchscript
ONNX | `onnx` | yolov5s.onnx
OpenVINO | `openvino` | yolov5s_openvino_model/
TensorRT | `engine` | yolov5s.engine
CoreML | `coreml` | yolov5s.mlmodel
TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/
TensorFlow GraphDef | `pb` | yolov5s.pb
TensorFlow Lite | `tflite` | yolov5s.tflite
TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite
TensorFlow.js | `tfjs` | yolov5s_web_model/
Usage: Usage:
$ python path/to/export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ... $ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
Inference: Inference:
$ python path/to/detect.py --weights yolov5s.pt # PyTorch $ python path/to/detect.py --weights yolov5s.pt
yolov5s.torchscript # TorchScript yolov5s.onnx (must export with --dynamic)
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s_saved_model
yolov5s.xml # OpenVINO yolov5s.pb
yolov5s.engine # TensorRT yolov5s.tflite
yolov5s.mlmodel # CoreML (MacOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
TensorFlow.js: TensorFlow.js:
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
...@@ -39,9 +21,7 @@ TensorFlow.js: ...@@ -39,9 +21,7 @@ TensorFlow.js:
""" """
import argparse import argparse
import json
import os import os
import platform
import subprocess import subprocess
import sys import sys
import time import time
...@@ -62,29 +42,23 @@ from models.experimental import attempt_load ...@@ -62,29 +42,23 @@ from models.experimental import attempt_load
from models.yolo import Detect from models.yolo import Detect
from utils.activations import SiLU from utils.activations import SiLU
from utils.datasets import LoadImages from utils.datasets import LoadImages
from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_version, colorstr, from utils.general import colorstr, check_dataset, check_img_size, check_requirements, file_size, print_args, \
file_size, print_args, url2file) set_logging, url2file
from utils.torch_utils import select_device from utils.torch_utils import select_device
def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')): def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
# YOLOv5 TorchScript model export # YOLOv5 TorchScript model export
try: try:
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') print(f'\n{prefix} starting export with torch {torch.__version__}...')
f = file.with_suffix('.torchscript') f = file.with_suffix('.torchscript.pt')
ts = torch.jit.trace(model, im, strict=False) ts = torch.jit.trace(model, im, strict=False)
d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names} (optimize_for_mobile(ts) if optimize else ts).save(f)
extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap()
if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
else:
ts.save(str(f), _extra_files=extra_files)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e: except Exception as e:
LOGGER.info(f'{prefix} export failure: {e}') print(f'{prefix} export failure: {e}')
def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')): def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')):
...@@ -93,7 +67,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst ...@@ -93,7 +67,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
check_requirements(('onnx',)) check_requirements(('onnx',))
import onnx import onnx
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') print(f'\n{prefix} starting export with onnx {onnx.__version__}...')
f = file.with_suffix('.onnx') f = file.with_suffix('.onnx')
torch.onnx.export(model, im, f, verbose=False, opset_version=opset, torch.onnx.export(model, im, f, verbose=False, opset_version=opset,
...@@ -108,7 +82,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst ...@@ -108,7 +82,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
# Checks # Checks
model_onnx = onnx.load(f) # load onnx model model_onnx = onnx.load(f) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model onnx.checker.check_model(model_onnx) # check onnx model
# LOGGER.info(onnx.helper.printable_graph(model_onnx.graph)) # print # print(onnx.helper.printable_graph(model_onnx.graph)) # print
# Simplify # Simplify
if simplify: if simplify:
...@@ -116,7 +90,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst ...@@ -116,7 +90,7 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
check_requirements(('onnx-simplifier',)) check_requirements(('onnx-simplifier',))
import onnxsim import onnxsim
LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
model_onnx, check = onnxsim.simplify( model_onnx, check = onnxsim.simplify(
model_onnx, model_onnx,
dynamic_input_shape=dynamic, dynamic_input_shape=dynamic,
...@@ -124,115 +98,46 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst ...@@ -124,115 +98,46 @@ def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorst
assert check, 'assert check failed' assert check, 'assert check failed'
onnx.save(model_onnx, f) onnx.save(model_onnx, f)
except Exception as e: except Exception as e:
LOGGER.info(f'{prefix} simplifier failure: {e}') print(f'{prefix} simplifier failure: {e}')
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f print(f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'")
except Exception as e:
LOGGER.info(f'{prefix} export failure: {e}')
def export_openvino(model, im, file, prefix=colorstr('OpenVINO:')):
# YOLOv5 OpenVINO export
try:
check_requirements(('openvino-dev',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
import openvino.inference_engine as ie
LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
f = str(file).replace('.pt', '_openvino_model' + os.sep)
cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f}"
subprocess.check_output(cmd, shell=True)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e: except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}') print(f'{prefix} export failure: {e}')
def export_coreml(model, im, file, prefix=colorstr('CoreML:')): def export_coreml(model, im, file, prefix=colorstr('CoreML:')):
# YOLOv5 CoreML export # YOLOv5 CoreML export
ct_model = None
try: try:
check_requirements(('coremltools',)) check_requirements(('coremltools',))
import coremltools as ct import coremltools as ct
LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...') print(f'\n{prefix} starting export with coremltools {ct.__version__}...')
f = file.with_suffix('.mlmodel') f = file.with_suffix('.mlmodel')
model.train() # CoreML exports should be placed in model.train() mode
ts = torch.jit.trace(model, im, strict=False) # TorchScript model ts = torch.jit.trace(model, im, strict=False) # TorchScript model
ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])]) ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255.0, bias=[0, 0, 0])])
ct_model.save(f) ct_model.save(f)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return ct_model, f
except Exception as e: except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}') print(f'\n{prefix} export failure: {e}')
return None, None
return ct_model
def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
# YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
try:
check_requirements(('tensorrt',))
import tensorrt as trt
if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
grid = model.model[-1].anchor_grid
model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
export_onnx(model, im, file, 12, train, False, simplify) # opset 12
model.model[-1].anchor_grid = grid
else: # TensorRT >= 8
check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0
export_onnx(model, im, file, 13, train, False, simplify) # opset 13
onnx = file.with_suffix('.onnx')
assert onnx.exists(), f'failed to export ONNX file: {onnx}'
LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
f = file.with_suffix('.engine') # TensorRT engine file
logger = trt.Logger(trt.Logger.INFO)
if verbose:
logger.min_severity = trt.Logger.Severity.VERBOSE
builder = trt.Builder(logger)
config = builder.create_builder_config()
config.max_workspace_size = workspace * 1 << 30
flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network = builder.create_network(flag)
parser = trt.OnnxParser(network, logger)
if not parser.parse_from_file(str(onnx)):
raise RuntimeError(f'failed to load ONNX file: {onnx}')
inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
LOGGER.info(f'{prefix} Network Description:')
for inp in inputs:
LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
for out in outputs:
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
half &= builder.platform_has_fast_fp16
LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
if half:
config.set_flag(trt.BuilderFlag.FP16)
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
t.write(engine.serialize())
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_saved_model(model, im, file, dynamic, def export_saved_model(model, im, file, dynamic,
tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
conf_thres=0.25, prefix=colorstr('TensorFlow SavedModel:')): conf_thres=0.25, prefix=colorstr('TensorFlow saved_model:')):
# YOLOv5 TensorFlow SavedModel export # YOLOv5 TensorFlow saved_model export
keras_model = None
try: try:
import tensorflow as tf import tensorflow as tf
from tensorflow import keras from tensorflow import keras
from models.tf import TFModel, TFDetect
from models.tf import TFDetect, TFModel print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
f = str(file).replace('.pt', '_saved_model') f = str(file).replace('.pt', '_saved_model')
batch_size, ch, *imgsz = list(im.shape) # BCHW batch_size, ch, *imgsz = list(im.shape) # BCHW
...@@ -246,11 +151,11 @@ def export_saved_model(model, im, file, dynamic, ...@@ -246,11 +151,11 @@ def export_saved_model(model, im, file, dynamic,
keras_model.summary() keras_model.summary()
keras_model.save(f, save_format='tf') keras_model.save(f, save_format='tf')
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return keras_model, f
except Exception as e: except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}') print(f'\n{prefix} export failure: {e}')
return None, None
return keras_model
def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')): def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
...@@ -259,7 +164,7 @@ def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')): ...@@ -259,7 +164,7 @@ def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
import tensorflow as tf import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
f = file.with_suffix('.pb') f = file.with_suffix('.pb')
m = tf.function(lambda x: keras_model(x)) # full model m = tf.function(lambda x: keras_model(x)) # full model
...@@ -268,18 +173,18 @@ def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')): ...@@ -268,18 +173,18 @@ def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
frozen_func.graph.as_graph_def() frozen_func.graph.as_graph_def()
tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False) tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e: except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}') print(f'\n{prefix} export failure: {e}')
def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('TensorFlow Lite:')): def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('TensorFlow Lite:')):
# YOLOv5 TensorFlow Lite export # YOLOv5 TensorFlow Lite export
try: try:
import tensorflow as tf import tensorflow as tf
from models.tf import representative_dataset_gen
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
batch_size, ch, *imgsz = list(im.shape) # BCHW batch_size, ch, *imgsz = list(im.shape) # BCHW
f = str(file).replace('.pt', '-fp16.tflite') f = str(file).replace('.pt', '-fp16.tflite')
...@@ -288,7 +193,6 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te ...@@ -288,7 +193,6 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te
converter.target_spec.supported_types = [tf.float16] converter.target_spec.supported_types = [tf.float16]
converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.optimizations = [tf.lite.Optimize.DEFAULT]
if int8: if int8:
from models.tf import representative_dataset_gen
dataset = LoadImages(check_dataset(data)['train'], img_size=imgsz, auto=False) # representative data dataset = LoadImages(check_dataset(data)['train'], img_size=imgsz, auto=False) # representative data
converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib) converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
...@@ -300,38 +204,10 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te ...@@ -300,38 +204,10 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te
tflite_model = converter.convert() tflite_model = converter.convert()
open(f, "wb").write(tflite_model) open(f, "wb").write(tflite_model)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}')
def export_edgetpu(keras_model, im, file, prefix=colorstr('Edge TPU:')):
# YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
try:
cmd = 'edgetpu_compiler --version'
help_url = 'https://coral.ai/docs/edgetpu/compiler/'
assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'
if subprocess.run(cmd, shell=True).returncode != 0:
LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')
for c in ['curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',
'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',
'sudo apt-get update',
'sudo apt-get install edgetpu-compiler']:
subprocess.run(c, shell=True, check=True)
ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')
f = str(file).replace('.pt', '-int8_edgetpu.tflite') # Edge TPU model
f_tfl = str(file).replace('.pt', '-int8.tflite') # TFLite model
cmd = f"edgetpu_compiler -s {f_tfl}"
subprocess.run(cmd, shell=True, check=True)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e: except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}') print(f'\n{prefix} export failure: {e}')
def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')): def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
...@@ -339,16 +215,15 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')): ...@@ -339,16 +215,15 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
try: try:
check_requirements(('tensorflowjs',)) check_requirements(('tensorflowjs',))
import re import re
import tensorflowjs as tfjs import tensorflowjs as tfjs
LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...') print(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
f = str(file).replace('.pt', '_web_model') # js dir f = str(file).replace('.pt', '_web_model') # js dir
f_pb = file.with_suffix('.pb') # *.pb path f_pb = file.with_suffix('.pb') # *.pb path
f_json = f + '/model.json' # *.json path f_json = f + '/model.json' # *.json path
cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \ cmd = f"tensorflowjs_converter --input_format=tf_frozen_model " \
f'--output_node_names="Identity,Identity_1,Identity_2,Identity_3" {f_pb} {f}' f"--output_node_names='Identity,Identity_1,Identity_2,Identity_3' {f_pb} {f}"
subprocess.run(cmd, shell=True) subprocess.run(cmd, shell=True)
json = open(f_json).read() json = open(f_json).read()
...@@ -365,10 +240,9 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')): ...@@ -365,10 +240,9 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
json) json)
j.write(subst) j.write(subst)
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e: except Exception as e:
LOGGER.info(f'\n{prefix} export failure: {e}') print(f'\n{prefix} export failure: {e}')
@torch.no_grad() @torch.no_grad()
...@@ -377,7 +251,7 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' ...@@ -377,7 +251,7 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
imgsz=(640, 640), # image (height, width) imgsz=(640, 640), # image (height, width)
batch_size=1, # batch size batch_size=1, # batch size
device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu
include=('torchscript', 'onnx'), # include formats include=('torchscript', 'onnx', 'coreml'), # include formats
half=False, # FP16 half-precision export half=False, # FP16 half-precision export
inplace=False, # set YOLOv5 Detect() inplace=True inplace=False, # set YOLOv5 Detect() inplace=True
train=False, # model.train() mode train=False, # model.train() mode
...@@ -386,10 +260,6 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' ...@@ -386,10 +260,6 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
dynamic=False, # ONNX/TF: dynamic axes dynamic=False, # ONNX/TF: dynamic axes
simplify=False, # ONNX: simplify model simplify=False, # ONNX: simplify model
opset=12, # ONNX: opset version opset=12, # ONNX: opset version
verbose=False, # TensorRT: verbose log
workspace=4, # TensorRT: workspace size (GB)
nms=False, # TF: add NMS to model
agnostic_nms=False, # TF: add agnostic NMS to model
topk_per_class=100, # TF.js NMS: topk per class to keep topk_per_class=100, # TF.js NMS: topk per class to keep
topk_all=100, # TF.js NMS: topk for all classes to keep topk_all=100, # TF.js NMS: topk for all classes to keep
iou_thres=0.45, # TF.js NMS: IoU threshold iou_thres=0.45, # TF.js NMS: IoU threshold
...@@ -397,12 +267,9 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' ...@@ -397,12 +267,9 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
): ):
t = time.time() t = time.time()
include = [x.lower() for x in include] include = [x.lower() for x in include]
tf_exports = list(x in include for x in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs')) # TensorFlow exports tf_exports = list(x in include for x in ('saved_model', 'pb', 'tflite', 'tfjs')) # TensorFlow exports
file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)
# Checks
imgsz *= 2 if len(imgsz) == 1 else 1 # expand imgsz *= 2 if len(imgsz) == 1 else 1 # expand
opset = 12 if ('openvino' in include) else opset # OpenVINO requires opset <= 12 file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)
# Load PyTorch model # Load PyTorch model
device = select_device(device) device = select_device(device)
...@@ -430,52 +297,40 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' ...@@ -430,52 +297,40 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
for _ in range(2): for _ in range(2):
y = model(im) # dry runs y = model(im) # dry runs
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} ({file_size(file):.1f} MB)") print(f"\n{colorstr('PyTorch:')} starting from {file} ({file_size(file):.1f} MB)")
# Exports # Exports
if 'torchscript' in include: if 'torchscript' in include:
f = export_torchscript(model, im, file, optimize) export_torchscript(model, im, file, optimize)
if 'engine' in include: # TensorRT required before ONNX if 'onnx' in include:
f = export_engine(model, im, file, train, half, simplify, workspace, verbose) export_onnx(model, im, file, opset, train, dynamic, simplify)
if ('onnx' in include) or ('openvino' in include): # OpenVINO requires ONNX
f = export_onnx(model, im, file, opset, train, dynamic, simplify)
if 'openvino' in include:
f = export_openvino(model, im, file)
if 'coreml' in include: if 'coreml' in include:
_, f = export_coreml(model, im, file) export_coreml(model, im, file)
# TensorFlow Exports # TensorFlow Exports
if any(tf_exports): if any(tf_exports):
pb, tflite, edgetpu, tfjs = tf_exports[1:] pb, tflite, tfjs = tf_exports[1:]
if int8 or edgetpu: # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707
check_requirements(('flatbuffers==1.12',)) # required before `import tensorflow`
assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.' assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
model, f = export_saved_model(model, im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs, model = export_saved_model(model, im, file, dynamic, tf_nms=tfjs, agnostic_nms=tfjs,
agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class, topk_per_class=topk_per_class, topk_all=topk_all, conf_thres=conf_thres,
topk_all=topk_all, iou_thres=iou_thres) # keras model
conf_thres=conf_thres, iou_thres=iou_thres) # keras model
if pb or tfjs: # pb prerequisite to tfjs if pb or tfjs: # pb prerequisite to tfjs
f = export_pb(model, im, file) export_pb(model, im, file)
if tflite or edgetpu: if tflite:
f = export_tflite(model, im, file, int8=int8 or edgetpu, data=data, ncalib=100) export_tflite(model, im, file, int8=int8, data=data, ncalib=100)
if edgetpu:
f = export_edgetpu(model, im, file)
if tfjs: if tfjs:
f = export_tfjs(model, im, file) export_tfjs(model, im, file)
# Finish # Finish
LOGGER.info(f'\nExport complete ({time.time() - t:.2f}s)' print(f'\nExport complete ({time.time() - t:.2f}s)'
f"\nResults saved to {colorstr('bold', file.parent.resolve())}" f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
f"\nVisualize with https://netron.app" f'\nVisualize with https://netron.app')
f"\nDetect with `python detect.py --weights {f}`"
f" or `model = torch.hub.load('ultralytics/yolov5', 'custom', '{f}')"
f"\nValidate with `python val.py --weights {f}`")
def parse_opt(): def parse_opt():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)') parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
...@@ -486,26 +341,22 @@ def parse_opt(): ...@@ -486,26 +341,22 @@ def parse_opt():
parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization') parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes') parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model') parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version') parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')
parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')
parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep') parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep') parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold') parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
parser.add_argument('--include', nargs='+', parser.add_argument('--include', nargs='+',
default=['torchscript', 'onnx'], default=['torchscript', 'onnx'],
help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs') help='available formats are (torchscript, onnx, coreml, saved_model, pb, tflite, tfjs)')
opt = parser.parse_args() opt = parser.parse_args()
print_args(FILE.stem, opt) print_args(FILE.stem, opt)
return opt return opt
def main(opt): def main(opt):
for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]): set_logging()
run(**vars(opt)) run(**vars(opt))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -5,7 +5,6 @@ PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/ ...@@ -5,7 +5,6 @@ PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
Usage: Usage:
import torch import torch
model = torch.hub.load('ultralytics/yolov5', 'yolov5s') model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model = torch.hub.load('ultralytics/yolov5:master', 'custom', 'path/to/yolov5s.onnx') # file from branch
""" """
import torch import torch
...@@ -28,35 +27,36 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo ...@@ -28,35 +27,36 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
""" """
from pathlib import Path from pathlib import Path
from models.common import AutoShape, DetectMultiBackend
from models.yolo import Model from models.yolo import Model
from models.experimental import attempt_load
from utils.general import check_requirements, set_logging
from utils.downloads import attempt_download from utils.downloads import attempt_download
from utils.general import check_requirements, intersect_dicts, set_logging
from utils.torch_utils import select_device from utils.torch_utils import select_device
file = Path(__file__).resolve()
check_requirements(exclude=('tensorboard', 'thop', 'opencv-python')) check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
set_logging(verbose=verbose) set_logging(verbose=verbose)
name = Path(name) save_dir = Path('') if str(name).endswith('.pt') else file.parent
path = name.with_suffix('.pt') if name.suffix == '' else name # checkpoint path path = (save_dir / name).with_suffix('.pt') # checkpoint path
try: try:
device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device) device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device)
if pretrained and channels == 3 and classes == 80: if pretrained and channels == 3 and classes == 80:
model = DetectMultiBackend(path, device=device) # download/load FP32 model model = attempt_load(path, map_location=device) # download/load FP32 model
# model = models.experimental.attempt_load(path, map_location=device) # download/load FP32 model
else: else:
cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0] # model.yaml path
model = Model(cfg, channels, classes) # create model model = Model(cfg, channels, classes) # create model
if pretrained: if pretrained:
ckpt = torch.load(attempt_download(path), map_location=device) # load ckpt = torch.load(attempt_download(path), map_location=device) # load
msd = model.state_dict() # model state_dict
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter
model.load_state_dict(csd, strict=False) # load model.load_state_dict(csd, strict=False) # load
if len(ckpt['model'].names) == classes: if len(ckpt['model'].names) == classes:
model.names = ckpt['model'].names # set class names attribute model.names = ckpt['model'].names # set class names attribute
if autoshape: if autoshape:
model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS
return model.to(device) return model.to(device)
except Exception as e: except Exception as e:
...@@ -125,11 +125,10 @@ if __name__ == '__main__': ...@@ -125,11 +125,10 @@ if __name__ == '__main__':
# model = custom(path='path/to/model.pt') # custom # model = custom(path='path/to/model.pt') # custom
# Verify inference # Verify inference
from pathlib import Path
import cv2 import cv2
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from pathlib import Path
imgs = ['data/images/zidane.jpg', # filename imgs = ['data/images/zidane.jpg', # filename
Path('data/images/zidane.jpg'), # Path Path('data/images/zidane.jpg'), # Path
...@@ -138,6 +137,6 @@ if __name__ == '__main__': ...@@ -138,6 +137,6 @@ if __name__ == '__main__':
Image.open('data/images/bus.jpg'), # PIL Image.open('data/images/bus.jpg'), # PIL
np.zeros((320, 640, 3))] # numpy np.zeros((320, 640, 3))] # numpy
results = model(imgs, size=320) # batched inference results = model(imgs) # batched inference
results.print() results.print()
results.save() results.save()
...@@ -3,29 +3,27 @@ ...@@ -3,29 +3,27 @@
Common modules Common modules
""" """
import json import logging
import math import math
import platform
import warnings import warnings
from collections import OrderedDict, namedtuple
from copy import copy from copy import copy
from pathlib import Path from pathlib import Path
import cv2
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import requests import requests
import torch import torch
import torch.nn as nn import torch.nn as nn
import yaml
from PIL import Image from PIL import Image
from torch.cuda import amp from torch.cuda import amp
from utils.datasets import exif_transpose, letterbox from utils.datasets import exif_transpose, letterbox
from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path, from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \
make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh) scale_coords, xyxy2xywh
from utils.plots import Annotator, colors, save_one_box from utils.plots import Annotator, colors
from utils.torch_utils import copy_attr, time_sync from utils.torch_utils import time_sync
LOGGER = logging.getLogger(__name__)
def autopad(k, p=None): # kernel, padding def autopad(k, p=None): # kernel, padding
...@@ -81,15 +79,15 @@ class TransformerBlock(nn.Module): ...@@ -81,15 +79,15 @@ class TransformerBlock(nn.Module):
if c1 != c2: if c1 != c2:
self.conv = Conv(c1, c2) self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2 self.c2 = c2
def forward(self, x): def forward(self, x):
if self.conv is not None: if self.conv is not None:
x = self.conv(x) x = self.conv(x)
b, _, w, h = x.shape b, _, w, h = x.shape
p = x.flatten(2).permute(2, 0, 1) p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h) return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module): class Bottleneck(nn.Module):
...@@ -115,8 +113,8 @@ class BottleneckCSP(nn.Module): ...@@ -115,8 +113,8 @@ class BottleneckCSP(nn.Module):
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1) self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.SiLU() self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x): def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x))) y1 = self.cv3(self.m(self.cv1(x)))
...@@ -132,7 +130,7 @@ class C3(nn.Module): ...@@ -132,7 +130,7 @@ class C3(nn.Module):
self.cv1 = Conv(c1, c_, 1, 1) self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x): def forward(self, x):
...@@ -160,7 +158,7 @@ class C3Ghost(C3): ...@@ -160,7 +158,7 @@ class C3Ghost(C3):
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e) super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n))) self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
class SPP(nn.Module): class SPP(nn.Module):
...@@ -275,218 +273,30 @@ class Concat(nn.Module): ...@@ -275,218 +273,30 @@ class Concat(nn.Module):
return torch.cat(x, self.d) return torch.cat(x, self.d)
class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# CoreML: *.mlmodel
# OpenVINO: *.xml
# TensorFlow: *_saved_model
# TensorFlow: *.pb
# TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite
# ONNX Runtime: *.onnx
# OpenCV DNN: *.onnx with dnn=True
# TensorRT: *.engine
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
suffix = Path(w).suffix.lower()
suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel', '.xml']
check_suffix(w, suffixes) # check weights have acceptable suffix
pt, jit, onnx, engine, tflite, pb, saved_model, coreml, xml = (suffix == x for x in suffixes) # backends
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
w = attempt_download(w) # download if not local
if data: # data.yaml path (optional)
with open(data, errors='ignore') as f:
names = yaml.safe_load(f)['names'] # class names
if pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files)
if extra_files['config.txt']:
d = json.loads(extra_files['config.txt']) # extra_files dict
stride, names = int(d['stride']), d['names']
elif dnn: # ONNX OpenCV DNN
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
check_requirements(('opencv-python>=4.5.4',))
net = cv2.dnn.readNetFromONNX(w)
elif onnx: # ONNX Runtime
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
cuda = torch.cuda.is_available()
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
import onnxruntime
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = onnxruntime.InferenceSession(w, providers=providers)
elif xml: # OpenVINO
LOGGER.info(f'Loading {w} for OpenVINO inference...')
check_requirements(('openvino-dev',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
import openvino.inference_engine as ie
core = ie.IECore()
network = core.read_network(model=w, weights=Path(w).with_suffix('.bin')) # *.xml, *.bin paths
executable_network = core.load_network(network, device_name='CPU', num_requests=1)
elif engine: # TensorRT
LOGGER.info(f'Loading {w} for TensorRT inference...')
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
bindings = OrderedDict()
for index in range(model.num_bindings):
name = model.get_binding_name(index)
dtype = trt.nptype(model.get_binding_dtype(index))
shape = tuple(model.get_binding_shape(index))
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context()
batch_size = bindings['images'].shape[0]
elif coreml: # CoreML
LOGGER.info(f'Loading {w} for CoreML inference...')
import coremltools as ct
model = ct.models.MLModel(w)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
if saved_model: # SavedModel
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
import tensorflow as tf
model = tf.keras.models.load_model(w)
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
import tensorflow as tf
def wrap_frozen_graph(gd, inputs, outputs):
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
tf.nest.map_structure(x.graph.as_graph_element, outputs))
graph_def = tf.Graph().as_graph_def()
graph_def.ParseFromString(open(w, 'rb').read())
frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
if 'edgetpu' in w.lower(): # Edge TPU
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
import tflite_runtime.interpreter as tfli # install https://coral.ai/software/#edgetpu-runtime
delegate = {'Linux': 'libedgetpu.so.1',
'Darwin': 'libedgetpu.1.dylib',
'Windows': 'edgetpu.dll'}[platform.system()]
interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
else: # Lite
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
import tensorflow as tf
interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model
interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False, val=False):
# YOLOv5 MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if self.pt or self.jit: # PyTorch
y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
return y if val else y[0]
elif self.dnn: # ONNX OpenCV DNN
im = im.cpu().numpy() # torch to numpy
self.net.setInput(im)
y = self.net.forward()
elif self.onnx: # ONNX Runtime
im = im.cpu().numpy() # torch to numpy
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32
desc = self.ie.TensorDesc(precision='FP32', dims=im.shape, layout='NCHW') # Tensor Description
request = self.executable_network.requests[0] # inference request
request.set_blob(blob_name='images', blob=self.ie.Blob(desc, im)) # name=next(iter(request.input_blobs))
request.infer()
y = request.output_blobs['output'].buffer # name=next(iter(request.output_blobs))
elif self.engine: # TensorRT
assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
self.binding_addrs['images'] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
y = self.bindings['output'].data
elif self.coreml: # CoreML
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.ANTIALIAS)
y = self.model.predict({'image': im}) # coordinates are xywh normalized
if 'confidence' in y:
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
else:
y = y[list(y)[-1]] # last output
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
if self.saved_model: # SavedModel
y = self.model(im, training=False).numpy()
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im)).numpy()
elif self.tflite: # Lite
input, output = self.input_details[0], self.output_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
if int8:
scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im)
self.interpreter.invoke()
y = self.interpreter.get_tensor(output['index'])
if int8:
scale, zero_point = output['quantization']
y = (y.astype(np.float32) - zero_point) * scale # re-scale
y[..., 0] *= w # x
y[..., 1] *= h # y
y[..., 2] *= w # w
y[..., 3] *= h # h
y = torch.tensor(y) if isinstance(y, np.ndarray) else y
return (y, []) if val else y
def warmup(self, imgsz=(1, 3, 640, 640), half=False):
# Warmup model by running inference once
if self.pt or self.jit or self.onnx or self.engine: # warmup types
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
self.forward(im) # warmup
class AutoShape(nn.Module): class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic classes = None # (optional list) filter by class
multi_label = False # NMS multiple labels per box multi_label = False # NMS multiple labels per box
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
max_det = 1000 # maximum number of detections per image max_det = 1000 # maximum number of detections per image
amp = False # Automatic Mixed Precision (AMP) inference
def __init__(self, model): def __init__(self, model):
super().__init__() super().__init__()
LOGGER.info('Adding AutoShape... ')
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
self.pt = not self.dmb or model.pt # PyTorch model
self.model = model.eval() self.model = model.eval()
def autoshape(self):
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self
def _apply(self, fn): def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn) self = super()._apply(fn)
if self.pt: m = self.model.model[-1] # Detect()
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() m.stride = fn(m.stride)
m.stride = fn(m.stride) m.grid = list(map(fn, m.grid))
m.grid = list(map(fn, m.grid)) if isinstance(m.anchor_grid, list):
if isinstance(m.anchor_grid, list): m.anchor_grid = list(map(fn, m.anchor_grid))
m.anchor_grid = list(map(fn, m.anchor_grid))
return self return self
@torch.no_grad() @torch.no_grad()
...@@ -501,10 +311,9 @@ class AutoShape(nn.Module): ...@@ -501,10 +311,9 @@ class AutoShape(nn.Module):
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
t = [time_sync()] t = [time_sync()]
p = next(self.model.parameters()) if self.pt else torch.zeros(1) # for device and type p = next(self.model.parameters()) # for device and type
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
if isinstance(imgs, torch.Tensor): # torch if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=autocast): with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
# Pre-process # Pre-process
...@@ -526,21 +335,21 @@ class AutoShape(nn.Module): ...@@ -526,21 +335,21 @@ class AutoShape(nn.Module):
g = (size / max(s)) # gain g = (size / max(s)) # gain
shape1.append([y * g for y in s]) shape1.append([y * g for y in s])
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_sync()) t.append(time_sync())
with amp.autocast(enabled=autocast): with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference # Inference
y = self.model(x, augment, profile) # forward y = self.model(x, augment, profile)[0] # forward
t.append(time_sync()) t.append(time_sync())
# Post-process # Post-process
y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes, y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes,
agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS multi_label=self.multi_label, max_det=self.max_det) # NMS
for i in range(n): for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i]) scale_coords(shape1, y[i][:, :4], shape0[i])
...@@ -550,15 +359,14 @@ class AutoShape(nn.Module): ...@@ -550,15 +359,14 @@ class AutoShape(nn.Module):
class Detections: class Detections:
# YOLOv5 detections class for inference results # YOLOv5 detections class for inference results
def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None): def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
super().__init__() super().__init__()
d = pred[0].device # device d = pred[0].device # device
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names self.names = names # class names
self.files = files # image filenames self.files = files # image filenames
self.times = times # profiling times
self.xyxy = pred # xyxy pixels self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
...@@ -638,11 +446,10 @@ class Detections: ...@@ -638,11 +446,10 @@ class Detections:
def tolist(self): def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():' # return a list of Detections objects, i.e. 'for result in results.tolist():'
r = range(self.n) # iterable x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] for d in x:
# for d in x: for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
# for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: setattr(d, k, getattr(d, k)[0]) # pop out of list
# setattr(d, k, getattr(d, k)[0]) # pop out of list
return x return x
def __len__(self): def __len__(self):
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
""" """
Experimental modules Experimental modules
""" """
import math
import numpy as np import numpy as np
import torch import torch
...@@ -33,7 +32,7 @@ class Sum(nn.Module): ...@@ -33,7 +32,7 @@ class Sum(nn.Module):
self.weight = weight # apply weights boolean self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object self.iter = range(n - 1) # iter object
if weight: if weight:
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
def forward(self, x): def forward(self, x):
y = x[0] # no weight y = x[0] # no weight
...@@ -49,27 +48,26 @@ class Sum(nn.Module): ...@@ -49,27 +48,26 @@ class Sum(nn.Module):
class MixConv2d(nn.Module): class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
super().__init__() super().__init__()
n = len(k) # number of convolutions groups = len(k)
if equal_ch: # equal c_ per group if equal_ch: # equal c_ per group
i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(n)] # intermediate channels c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group else: # equal weight.numel() per group
b = [c2] + [0] * n b = [c2] + [0] * groups
a = np.eye(n + 1, n, k=-1) a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1) a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2 a *= np.array(k) ** 2
a[0] = 1 a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList( self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
[nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
self.bn = nn.BatchNorm2d(c2) self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x): def forward(self, x):
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList): class Ensemble(nn.ModuleList):
...@@ -99,6 +97,7 @@ def attempt_load(weights, map_location=None, inplace=True, fuse=True): ...@@ -99,6 +97,7 @@ def attempt_load(weights, map_location=None, inplace=True, fuse=True):
else: else:
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse
# Compatibility updates # Compatibility updates
for m in model.modules(): for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]: if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
......
...@@ -9,22 +9,22 @@ anchors: ...@@ -9,22 +9,22 @@ anchors:
- [30,61, 62,45, 59,119] # P4/16 - [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32 - [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, C3, [512]]
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]], [-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 1, SPPF, [1024, 5]], # 9 [-1, 3, C3, [1024, False]], # 9
] ]
# YOLOv5 v6.0 BiFPN head # YOLOv5 BiFPN head
head: head:
[[-1, 1, Conv, [512, 1, 1]], [[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
...@@ -37,7 +37,7 @@ head: ...@@ -37,7 +37,7 @@ head:
[-1, 3, C3, [256, False]], # 17 (P3/8-small) [-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]], [-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change [[-1, 14, 6], 1, Concat, [1]], # cat P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]], [-1, 1, Conv, [512, 3, 2]],
......
...@@ -9,34 +9,34 @@ anchors: ...@@ -9,34 +9,34 @@ anchors:
- [30,61, 62,45, 59,119] # P4/16 - [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32 - [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]], [-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 1, SPPF, [1024, 5]], # 9 [-1, 6, BottleneckCSP, [1024]], # 9
] ]
# YOLOv5 v6.0 FPN head # YOLOv5 FPN head
head: head:
[[-1, 3, C3, [1024, False]], # 10 (P5/32-large) [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4 [[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [512, 1, 1]],
[-1, 3, C3, [512, False]], # 14 (P4/16-medium) [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3 [[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 1, 1]],
[-1, 3, C3, [256, False]], # 18 (P3/8-small) [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
] ]
...@@ -4,24 +4,24 @@ ...@@ -4,24 +4,24 @@
nc: 80 # number of classes nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer anchors: 3
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]], [-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 1, SPPF, [1024, 5]], # 9 [-1, 3, C3, [1024, False]], # 9
] ]
# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs # YOLOv5 head
head: head:
[[-1, 1, Conv, [512, 1, 1]], [[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
......
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 6, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 1024 ] ],
[ -1, 1, SPPF, [ 1024, 5 ] ], # 9
]
# YOLOv5 v6.0 head with (P3, P4) outputs
head:
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 13
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
[ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
]
...@@ -4,26 +4,26 @@ ...@@ -4,26 +4,26 @@
nc: 80 # number of classes nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer anchors: 3
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]], [-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]], [-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 1, SPPF, [1024, 5]], # 11 [-1, 3, C3, [1024, False]], # 11
] ]
# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs # YOLOv5 head
head: head:
[[-1, 1, Conv, [768, 1, 1]], [[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
...@@ -50,7 +50,7 @@ head: ...@@ -50,7 +50,7 @@ head:
[-1, 1, Conv, [768, 3, 2]], [-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6 [[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) [-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
] ]
...@@ -4,16 +4,16 @@ ...@@ -4,16 +4,16 @@
nc: 80 # number of classes nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer anchors: 3
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32 [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
...@@ -21,11 +21,11 @@ backbone: ...@@ -21,11 +21,11 @@ backbone:
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]], [-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128 [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 3, C3, [1280]], [-1, 1, SPP, [1280, [3, 5]]],
[-1, 1, SPPF, [1280, 5]], # 13 [-1, 3, C3, [1280, False]], # 13
] ]
# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs # YOLOv5 head
head: head:
[[-1, 1, Conv, [1024, 1, 1]], [[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
......
...@@ -9,40 +9,40 @@ anchors: ...@@ -9,40 +9,40 @@ anchors:
- [30,61, 62,45, 59,119] # P4/16 - [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32 - [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, BottleneckCSP, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]], [-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 1, SPPF, [1024, 5]], # 9 [-1, 3, BottleneckCSP, [1024, False]], # 9
] ]
# YOLOv5 v6.0 PANet head # YOLOv5 PANet head
head: head:
[[-1, 1, Conv, [512, 1, 1]], [[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4 [[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13 [-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3 [[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small) [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]], [-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4 [[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]], [-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5 [[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
] ]
...@@ -9,22 +9,22 @@ anchors: ...@@ -9,22 +9,22 @@ anchors:
- [30,61, 62,45, 59,119] # P4/16 - [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32 - [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3Ghost, [128]], [-1, 3, C3Ghost, [128]],
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3Ghost, [256]], [-1, 9, C3Ghost, [256]],
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3Ghost, [512]], [-1, 9, C3Ghost, [512]],
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3Ghost, [1024]], [-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 1, SPPF, [1024, 5]], # 9 [-1, 3, C3Ghost, [1024, False]], # 9
] ]
# YOLOv5 v6.0 head # YOLOv5 head
head: head:
[[-1, 1, GhostConv, [512, 1, 1]], [[-1, 1, GhostConv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
......
...@@ -9,22 +9,22 @@ anchors: ...@@ -9,22 +9,22 @@ anchors:
- [30,61, 62,45, 59,119] # P4/16 - [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32 - [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone # YOLOv5 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]], [-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]], [-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]], [-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module [-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 1, SPPF, [1024, 5]], # 9 [-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
] ]
# YOLOv5 v6.0 head # YOLOv5 head
head: head:
[[-1, 1, Conv, [512, 1, 1]], [[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, nn.Upsample, [None, 2, 'nearest']],
......
...@@ -11,6 +11,7 @@ Export: ...@@ -11,6 +11,7 @@ Export:
""" """
import argparse import argparse
import logging
import sys import sys
from copy import deepcopy from copy import deepcopy
from pathlib import Path from pathlib import Path
...@@ -27,17 +28,19 @@ import torch ...@@ -27,17 +28,19 @@ import torch
import torch.nn as nn import torch.nn as nn
from tensorflow import keras from tensorflow import keras
from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, autopad, C3
from models.experimental import CrossConv, MixConv2d, attempt_load from models.experimental import CrossConv, MixConv2d, attempt_load
from models.yolo import Detect from models.yolo import Detect
from utils.general import make_divisible, print_args, set_logging
from utils.activations import SiLU from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args
LOGGER = logging.getLogger(__name__)
class TFBN(keras.layers.Layer): class TFBN(keras.layers.Layer):
# TensorFlow BatchNormalization wrapper # TensorFlow BatchNormalization wrapper
def __init__(self, w=None): def __init__(self, w=None):
super().__init__() super(TFBN, self).__init__()
self.bn = keras.layers.BatchNormalization( self.bn = keras.layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(w.bias.numpy()), beta_initializer=keras.initializers.Constant(w.bias.numpy()),
gamma_initializer=keras.initializers.Constant(w.weight.numpy()), gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
...@@ -51,7 +54,7 @@ class TFBN(keras.layers.Layer): ...@@ -51,7 +54,7 @@ class TFBN(keras.layers.Layer):
class TFPad(keras.layers.Layer): class TFPad(keras.layers.Layer):
def __init__(self, pad): def __init__(self, pad):
super().__init__() super(TFPad, self).__init__()
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]]) self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
def call(self, inputs): def call(self, inputs):
...@@ -62,7 +65,7 @@ class TFConv(keras.layers.Layer): ...@@ -62,7 +65,7 @@ class TFConv(keras.layers.Layer):
# Standard convolution # Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups # ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__() super(TFConv, self).__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
assert isinstance(k, int), "Convolution with multiple kernels are not allowed." assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding) # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
...@@ -93,11 +96,11 @@ class TFFocus(keras.layers.Layer): ...@@ -93,11 +96,11 @@ class TFFocus(keras.layers.Layer):
# Focus wh information into c-space # Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, kernel, stride, padding, groups # ch_in, ch_out, kernel, stride, padding, groups
super().__init__() super(TFFocus, self).__init__()
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
# inputs = inputs / 255 # normalize 0-255 to 0-1 # inputs = inputs / 255. # normalize 0-255 to 0-1
return self.conv(tf.concat([inputs[:, ::2, ::2, :], return self.conv(tf.concat([inputs[:, ::2, ::2, :],
inputs[:, 1::2, ::2, :], inputs[:, 1::2, ::2, :],
inputs[:, ::2, 1::2, :], inputs[:, ::2, 1::2, :],
...@@ -107,7 +110,7 @@ class TFFocus(keras.layers.Layer): ...@@ -107,7 +110,7 @@ class TFFocus(keras.layers.Layer):
class TFBottleneck(keras.layers.Layer): class TFBottleneck(keras.layers.Layer):
# Standard bottleneck # Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
super().__init__() super(TFBottleneck, self).__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2) self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
...@@ -120,7 +123,7 @@ class TFBottleneck(keras.layers.Layer): ...@@ -120,7 +123,7 @@ class TFBottleneck(keras.layers.Layer):
class TFConv2d(keras.layers.Layer): class TFConv2d(keras.layers.Layer):
# Substitution for PyTorch nn.Conv2D # Substitution for PyTorch nn.Conv2D
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None): def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
super().__init__() super(TFConv2d, self).__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
self.conv = keras.layers.Conv2D( self.conv = keras.layers.Conv2D(
c2, k, s, 'VALID', use_bias=bias, c2, k, s, 'VALID', use_bias=bias,
...@@ -135,7 +138,7 @@ class TFBottleneckCSP(keras.layers.Layer): ...@@ -135,7 +138,7 @@ class TFBottleneckCSP(keras.layers.Layer):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__() super(TFBottleneckCSP, self).__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2) self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
...@@ -155,7 +158,7 @@ class TFC3(keras.layers.Layer): ...@@ -155,7 +158,7 @@ class TFC3(keras.layers.Layer):
# CSP Bottleneck with 3 convolutions # CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__() super(TFC3, self).__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
...@@ -169,7 +172,7 @@ class TFC3(keras.layers.Layer): ...@@ -169,7 +172,7 @@ class TFC3(keras.layers.Layer):
class TFSPP(keras.layers.Layer): class TFSPP(keras.layers.Layer):
# Spatial pyramid pooling layer used in YOLOv3-SPP # Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13), w=None): def __init__(self, c1, c2, k=(5, 9, 13), w=None):
super().__init__() super(TFSPP, self).__init__()
c_ = c1 // 2 # hidden channels c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2) self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
...@@ -180,25 +183,9 @@ class TFSPP(keras.layers.Layer): ...@@ -180,25 +183,9 @@ class TFSPP(keras.layers.Layer):
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3)) return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
class TFSPPF(keras.layers.Layer):
# Spatial pyramid pooling-Fast layer
def __init__(self, c1, c2, k=5, w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
def call(self, inputs):
x = self.cv1(inputs)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
class TFDetect(keras.layers.Layer): class TFDetect(keras.layers.Layer):
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
super().__init__() super(TFDetect, self).__init__()
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32) self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
self.nc = nc # number of classes self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor self.no = nc + 5 # number of outputs per anchor
...@@ -226,13 +213,13 @@ class TFDetect(keras.layers.Layer): ...@@ -226,13 +213,13 @@ class TFDetect(keras.layers.Layer):
if not self.training: # inference if not self.training: # inference
y = tf.sigmoid(x[i]) y = tf.sigmoid(x[i])
xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
# Normalize xywh to 0-1 to reduce calibration error # Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, y[..., 4:]], -1) y = tf.concat([xy, wh, y[..., 4:]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))
return x if self.training else (tf.concat(z, 1), x) return x if self.training else (tf.concat(z, 1), x)
...@@ -246,7 +233,7 @@ class TFDetect(keras.layers.Layer): ...@@ -246,7 +233,7 @@ class TFDetect(keras.layers.Layer):
class TFUpsample(keras.layers.Layer): class TFUpsample(keras.layers.Layer):
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
super().__init__() super(TFUpsample, self).__init__()
assert scale_factor == 2, "scale_factor must be 2" assert scale_factor == 2, "scale_factor must be 2"
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode) self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode) # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
...@@ -260,7 +247,7 @@ class TFUpsample(keras.layers.Layer): ...@@ -260,7 +247,7 @@ class TFUpsample(keras.layers.Layer):
class TFConcat(keras.layers.Layer): class TFConcat(keras.layers.Layer):
def __init__(self, dimension=1, w=None): def __init__(self, dimension=1, w=None):
super().__init__() super(TFConcat, self).__init__()
assert dimension == 1, "convert only NCHW to NHWC concat" assert dimension == 1, "convert only NCHW to NHWC concat"
self.d = 3 self.d = 3
...@@ -269,7 +256,7 @@ class TFConcat(keras.layers.Layer): ...@@ -269,7 +256,7 @@ class TFConcat(keras.layers.Layer):
def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5) no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
...@@ -285,7 +272,7 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) ...@@ -285,7 +272,7 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
pass pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
c1, c2 = ch[f], args[0] c1, c2 = ch[f], args[0]
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
...@@ -296,7 +283,7 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) ...@@ -296,7 +283,7 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
elif m is nn.BatchNorm2d: elif m is nn.BatchNorm2d:
args = [ch[f]] args = [ch[f]]
elif m is Concat: elif m is Concat:
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
elif m is Detect: elif m is Detect:
args.append([ch[x + 1] for x in f]) args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors if isinstance(args[1], int): # number of anchors
...@@ -309,11 +296,11 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) ...@@ -309,11 +296,11 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \ m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
else tf_m(*args, w=model.model[i]) # module else tf_m(*args, w=model.model[i]) # module
torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in torch_m_.parameters()) # number params np = sum([x.numel() for x in torch_m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_) layers.append(m_)
ch.append(c2) ch.append(c2)
...@@ -322,7 +309,7 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) ...@@ -322,7 +309,7 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
class TFModel: class TFModel:
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
super().__init__() super(TFModel, self).__init__()
if isinstance(cfg, dict): if isinstance(cfg, dict):
self.yaml = cfg # model dict self.yaml = cfg # model dict
else: # is *.yaml else: # is *.yaml
...@@ -333,7 +320,7 @@ class TFModel: ...@@ -333,7 +320,7 @@ class TFModel:
# Define model # Define model
if nc and nc != self.yaml['nc']: if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}") print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
self.yaml['nc'] = nc # override yaml value self.yaml['nc'] = nc # override yaml value
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz) self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
...@@ -410,10 +397,10 @@ class AgnosticNMS(keras.layers.Layer): ...@@ -410,10 +397,10 @@ class AgnosticNMS(keras.layers.Layer):
def representative_dataset_gen(dataset, ncalib=100): def representative_dataset_gen(dataset, ncalib=100):
# Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): for n, (path, img, im0s, vid_cap) in enumerate(dataset):
input = np.transpose(img, [1, 2, 0]) input = np.transpose(img, [1, 2, 0])
input = np.expand_dims(input, axis=0).astype(np.float32) input = np.expand_dims(input, axis=0).astype(np.float32)
input /= 255 input /= 255.0
yield [input] yield [input]
if n >= ncalib: if n >= ncalib:
break break
...@@ -440,8 +427,6 @@ def run(weights=ROOT / 'yolov5s.pt', # weights path ...@@ -440,8 +427,6 @@ def run(weights=ROOT / 'yolov5s.pt', # weights path
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im)) keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
keras_model.summary() keras_model.summary()
LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
def parse_opt(): def parse_opt():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -456,6 +441,7 @@ def parse_opt(): ...@@ -456,6 +441,7 @@ def parse_opt():
def main(opt): def main(opt):
set_logging()
run(**vars(opt)) run(**vars(opt))
......
...@@ -20,15 +20,18 @@ if str(ROOT) not in sys.path: ...@@ -20,15 +20,18 @@ if str(ROOT) not in sys.path:
from models.common import * from models.common import *
from models.experimental import * from models.experimental import *
from utils.autoanchor import check_anchor_order from utils.autoanchor import check_anchor_order
from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args from utils.general import check_yaml, make_divisible, print_args, set_logging
from utils.plots import feature_visualization from utils.plots import feature_visualization
from utils.torch_utils import fuse_conv_and_bn, initialize_weights, model_info, scale_img, select_device, time_sync from utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \
select_device, time_sync
try: try:
import thop # for FLOPs computation import thop # for FLOPs computation
except ImportError: except ImportError:
thop = None thop = None
LOGGER = logging.getLogger(__name__)
class Detect(nn.Module): class Detect(nn.Module):
stride = None # strides computed during build stride = None # strides computed during build
...@@ -54,15 +57,15 @@ class Detect(nn.Module): ...@@ -54,15 +57,15 @@ class Detect(nn.Module):
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference if not self.training: # inference
if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
y = x[i].sigmoid() y = x[i].sigmoid()
if self.inplace: if self.inplace:
y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, y[..., 4:]), -1) y = torch.cat((xy, wh, y[..., 4:]), -1)
z.append(y.view(bs, -1, self.no)) z.append(y.view(bs, -1, self.no))
...@@ -71,10 +74,7 @@ class Detect(nn.Module): ...@@ -71,10 +74,7 @@ class Detect(nn.Module):
def _make_grid(self, nx=20, ny=20, i=0): def _make_grid(self, nx=20, ny=20, i=0):
d = self.anchors[i].device d = self.anchors[i].device
if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
yv, xv = torch.meshgrid([torch.arange(ny, device=d), torch.arange(nx, device=d)], indexing='ij')
else:
yv, xv = torch.meshgrid([torch.arange(ny, device=d), torch.arange(nx, device=d)])
grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float() grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()
anchor_grid = (self.anchors[i].clone() * self.stride[i]) \ anchor_grid = (self.anchors[i].clone() * self.stride[i]) \
.view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float() .view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float()
...@@ -89,7 +89,7 @@ class Model(nn.Module): ...@@ -89,7 +89,7 @@ class Model(nn.Module):
else: # is *.yaml else: # is *.yaml
import yaml # for torch hub import yaml # for torch hub
self.yaml_file = Path(cfg).name self.yaml_file = Path(cfg).name
with open(cfg, encoding='ascii', errors='ignore') as f: with open(cfg, errors='ignore') as f:
self.yaml = yaml.safe_load(f) # model dict self.yaml = yaml.safe_load(f) # model dict
# Define model # Define model
...@@ -200,7 +200,7 @@ class Model(nn.Module): ...@@ -200,7 +200,7 @@ class Model(nn.Module):
for mi, s in zip(m.m, m.stride): # from for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
def _print_biases(self): def _print_biases(self):
...@@ -225,6 +225,12 @@ class Model(nn.Module): ...@@ -225,6 +225,12 @@ class Model(nn.Module):
self.info() self.info()
return self return self
def autoshape(self): # add AutoShape module
LOGGER.info('Adding AutoShape... ')
m = AutoShape(self) # wrap model
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
return m
def info(self, verbose=False, img_size=640): # print model information def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size) model_info(self, verbose, img_size)
...@@ -241,7 +247,7 @@ class Model(nn.Module): ...@@ -241,7 +247,7 @@ class Model(nn.Module):
def parse_model(d, ch): # model_dict, input_channels(3) def parse_model(d, ch): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5) no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
...@@ -269,7 +275,7 @@ def parse_model(d, ch): # model_dict, input_channels(3) ...@@ -269,7 +275,7 @@ def parse_model(d, ch): # model_dict, input_channels(3)
elif m is nn.BatchNorm2d: elif m is nn.BatchNorm2d:
args = [ch[f]] args = [ch[f]]
elif m is Concat: elif m is Concat:
c2 = sum(ch[x] for x in f) c2 = sum([ch[x] for x in f])
elif m is Detect: elif m is Detect:
args.append([ch[x] for x in f]) args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors if isinstance(args[1], int): # number of anchors
...@@ -281,11 +287,11 @@ def parse_model(d, ch): # model_dict, input_channels(3) ...@@ -281,11 +287,11 @@ def parse_model(d, ch): # model_dict, input_channels(3)
else: else:
c2 = ch[f] c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params np = sum([x.numel() for x in m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_) layers.append(m_)
if i == 0: if i == 0:
...@@ -299,10 +305,10 @@ if __name__ == '__main__': ...@@ -299,10 +305,10 @@ if __name__ == '__main__':
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--profile', action='store_true', help='profile model speed') parser.add_argument('--profile', action='store_true', help='profile model speed')
parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
opt = parser.parse_args() opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(FILE.stem, opt) print_args(FILE.stem, opt)
set_logging()
device = select_device(opt.device) device = select_device(opt.device)
# Create model # Create model
...@@ -314,14 +320,6 @@ if __name__ == '__main__': ...@@ -314,14 +320,6 @@ if __name__ == '__main__':
img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
y = model(img, profile=True) y = model(img, profile=True)
# Test all models
if opt.test:
for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
try:
_ = Model(cfg)
except Exception as e:
print(f'Error in {cfg}: {e}')
# Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898) # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
# from torch.utils.tensorboard import SummaryWriter # from torch.utils.tensorboard import SummaryWriter
# tb_writer = SummaryWriter('.') # tb_writer = SummaryWriter('.')
......
...@@ -27,7 +27,6 @@ seaborn>=0.11.0 ...@@ -27,7 +27,6 @@ seaborn>=0.11.0
# scikit-learn==0.19.2 # CoreML quantization # scikit-learn==0.19.2 # CoreML quantization
# tensorflow>=2.4.1 # TFLite export # tensorflow>=2.4.1 # TFLite export
# tensorflowjs>=3.9.0 # TF.js export # tensorflowjs>=3.9.0 # TF.js export
# openvino-dev # OpenVINO export
# Extras -------------------------------------- # Extras --------------------------------------
# albumentations>=1.0.3 # albumentations>=1.0.3
......
# Project-wide configuration file, can be used for package metadata and other toll configurations
# Example usage: global configuration for PEP8 (via flake8) setting or default pytest arguments
[metadata]
license_file = LICENSE
description-file = README.md
[tool:pytest]
norecursedirs =
.git
dist
build
addopts =
--doctest-modules
--durations=25
--color=yes
[flake8]
max-line-length = 120
exclude = .tox,*.egg,build,temp
select = E,W,F
doctests = True
verbose = 2
# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
format = pylint
# see: https://www.flake8rules.com/
ignore =
E731 # Do not assign a lambda expression, use a def
F405
E402
F841
E741
F821
E722
F401
W504
E127
W504
E231
E501
F403
E302
F541
[isort]
# https://pycqa.github.io/isort/docs/configuration/options.html
line_length = 120
multi_line_output = 0
#!/bin/bash
export MIOPEN_DEBUG_DISABLE_FIND_DB=1
export NCCL_SOCKET_IFNAME=ib0
export HSA_USERPTR_FOR_PAGED_MEM=0
module rm compiler/dtk/21.10
module load compiler/dtk/22.04.2
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank=$OMPI_COMM_WORLD_RANK
comm_size=$OMPI_COMM_WORLD_SIZE
echo $lrank
echo $comm_rank
echo $comm_size
APP="python3 `pwd`/train_multi.py --batch 128 --dist-url tcp://${1}:34567 --dist-backend nccl --world-size=${comm_size} --rank=${comm_rank} --local_rank=${lrank} --data coco.yaml --weight yolov5m.pt --project yolov5m/train --hyp data/hyps/hyp.scratch-high.yaml --cfg yolov5m.yaml --epochs 5000"
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
#echo GLOO_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
#GLOO_SOCKET_IFNAME=ib0 numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_1:1
export UCX_IB_PCI_BW=mlx5_1:50Gbs
echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=1 --membind=1 ${APP}
NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_2:1
export UCX_IB_PCI_BW=mlx5_2:50Gbs
echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=2 --membind=2 ${APP}
NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_3:1
export UCX_IB_PCI_BW=mlx5_3:50Gbs
echo NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=3 --membind=3 ${APP}
NCCL_SOCKET_IFNAME=ib0 numactl --cpunodebind=3 --membind=3 ${APP}
;;
esac
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
""" """
Train a YOLOv5 model on a custom dataset. Train a YOLOv5 model on a custom dataset
Models and datasets download automatically from the latest YOLOv5 release.
Models: https://github.com/ultralytics/yolov5/tree/master/models
Datasets: https://github.com/ultralytics/yolov5/tree/master/data
Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
Usage: Usage:
$ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED) $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
$ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch
""" """
import argparse import argparse
import logging
import math import math
import os import os
import random import random
import sys import sys
import time import time
from copy import deepcopy from copy import deepcopy
from datetime import datetime
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
...@@ -29,7 +23,7 @@ import torch.nn as nn ...@@ -29,7 +23,7 @@ import torch.nn as nn
import yaml import yaml
from torch.cuda import amp from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import SGD, Adam, AdamW, lr_scheduler from torch.optim import Adam, SGD, lr_scheduler
from tqdm import tqdm from tqdm import tqdm
FILE = Path(__file__).resolve() FILE = Path(__file__).resolve()
...@@ -42,21 +36,21 @@ import val # for end-of-epoch mAP ...@@ -42,21 +36,21 @@ import val # for end-of-epoch mAP
from models.experimental import attempt_load from models.experimental import attempt_load
from models.yolo import Model from models.yolo import Model
from utils.autoanchor import check_anchors from utils.autoanchor import check_anchors
from utils.autobatch import check_train_batch_size
from utils.callbacks import Callbacks
from utils.datasets import create_dataloader from utils.datasets import create_dataloader
from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
from utils.downloads import attempt_download from utils.downloads import attempt_download
from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements,
check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle,
print_args, print_mutation, strip_optimizer)
from utils.loggers import Loggers
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.loss import ComputeLoss from utils.loss import ComputeLoss
from utils.plots import plot_labels, plot_evolve
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
torch_distributed_zero_first
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.metrics import fitness from utils.metrics import fitness
from utils.plots import plot_evolve, plot_labels from utils.loggers import Loggers
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first from utils.callbacks import Callbacks
LOGGER = logging.getLogger(__name__)
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv('RANK', -1)) RANK = int(os.getenv('RANK', -1))
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
...@@ -67,7 +61,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -67,7 +61,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
device, device,
callbacks callbacks
): ):
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
...@@ -83,14 +77,13 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -83,14 +77,13 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
# Save run settings # Save run settings
if not evolve: with open(save_dir / 'hyp.yaml', 'w') as f:
with open(save_dir / 'hyp.yaml', 'w') as f: yaml.safe_dump(hyp, f, sort_keys=False)
yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f:
with open(save_dir / 'opt.yaml', 'w') as f: yaml.safe_dump(vars(opt), f, sort_keys=False)
yaml.safe_dump(vars(opt), f, sort_keys=False) data_dict = None
# Loggers # Loggers
data_dict = None
if RANK in [-1, 0]: if RANK in [-1, 0]:
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
if loggers.wandb: if loggers.wandb:
...@@ -112,7 +105,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -112,7 +105,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
nc = 1 if single_cls else int(data_dict['nc']) # number of classes nc = 1 if single_cls else int(data_dict['nc']) # number of classes
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
# Model # Model
check_suffix(weights, '.pt') # check weights check_suffix(weights, '.pt') # check weights
...@@ -131,22 +124,13 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -131,22 +124,13 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
# Freeze # Freeze
freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze freeze = [f'model.{x}.' for x in range(freeze)] # layers to freeze
for k, v in model.named_parameters(): for k, v in model.named_parameters():
v.requires_grad = True # train all layers v.requires_grad = True # train all layers
if any(x in k for x in freeze): if any(x in k for x in freeze):
LOGGER.info(f'freezing {k}') print(f'freezing {k}')
v.requires_grad = False v.requires_grad = False
# Image size
gs = max(int(model.stride.max()), 32) # grid size (max stride)
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
# Batch size
if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size
batch_size = check_train_batch_size(model, imgsz)
loggers.on_params_update({"batch_size": batch_size})
# Optimizer # Optimizer
nbs = 64 # nominal batch size nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
...@@ -162,10 +146,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -162,10 +146,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
g1.append(v.weight) g1.append(v.weight)
if opt.optimizer == 'Adam': if opt.adam:
optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
elif opt.optimizer == 'AdamW':
optimizer = AdamW(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
else: else:
optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
...@@ -208,10 +190,15 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -208,10 +190,15 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
del ckpt, csd del ckpt, csd
# Image sizes
gs = max(int(model.stride.max()), 32) # grid size (max stride)
nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj'])
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
# DP mode # DP mode
if cuda and RANK == -1 and torch.cuda.device_count() > 1: if cuda and RANK == -1 and torch.cuda.device_count() > 1:
LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
model = torch.nn.DataParallel(model) model = torch.nn.DataParallel(model)
# SyncBatchNorm # SyncBatchNorm
...@@ -223,7 +210,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -223,7 +210,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK, hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
workers=workers, image_weights=opt.image_weights, quad=opt.quad, workers=workers, image_weights=opt.image_weights, quad=opt.quad,
prefix=colorstr('train: '), shuffle=True) prefix=colorstr('train: '))
mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class
nb = len(train_loader) # number of batches nb = len(train_loader) # number of batches
assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
...@@ -254,11 +241,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -254,11 +241,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
if cuda and RANK != -1: if cuda and RANK != -1:
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
# Model attributes # Model parameters
nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) hyp['box'] *= 3. / nl # scale to layers
hyp['box'] *= 3 / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers
hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
hyp['label_smoothing'] = opt.label_smoothing hyp['label_smoothing'] = opt.label_smoothing
model.nc = nc # attach number of classes to model model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model model.hyp = hyp # attach hyperparameters to model
...@@ -277,7 +263,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -277,7 +263,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
stopper = EarlyStopping(patience=opt.patience) stopper = EarlyStopping(patience=opt.patience)
compute_loss = ComputeLoss(model) # init loss class compute_loss = ComputeLoss(model) # init loss class
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f'Using {train_loader.num_workers} dataloader workers\n'
f"Logging results to {colorstr('bold', save_dir)}\n" f"Logging results to {colorstr('bold', save_dir)}\n"
f'Starting training for {epochs} epochs...') f'Starting training for {epochs} epochs...')
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
...@@ -299,11 +285,11 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -299,11 +285,11 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
pbar = enumerate(train_loader) pbar = enumerate(train_loader)
LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size')) LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
if RANK in [-1, 0]: if RANK in [-1, 0]:
pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar pbar = tqdm(pbar, total=nb) # progress bar
optimizer.zero_grad() optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
ni = i + nb * epoch # number integrated batches (since train start) ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0
# Warmup # Warmup
if ni <= nw: if ni <= nw:
...@@ -390,8 +376,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -390,8 +376,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
'ema': deepcopy(ema.ema).half(), 'ema': deepcopy(ema.ema).half(),
'updates': ema.updates, 'updates': ema.updates,
'optimizer': optimizer.state_dict(), 'optimizer': optimizer.state_dict(),
'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
'date': datetime.now().isoformat()}
# Save last, best and delete # Save last, best and delete
torch.save(ckpt, last) torch.save(ckpt, last)
...@@ -438,10 +423,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -438,10 +423,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
plots=True, plots=True,
callbacks=callbacks, callbacks=callbacks,
compute_loss=compute_loss) # val best model with plots compute_loss=compute_loss) # val best model with plots
if is_coco:
callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
callbacks.run('on_train_end', last, best, plots, epoch, results) callbacks.run('on_train_end', last, best, plots, epoch)
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
torch.cuda.empty_cache() torch.cuda.empty_cache()
...@@ -455,13 +438,13 @@ def parse_opt(known=False): ...@@ -455,13 +438,13 @@ def parse_opt(known=False):
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--epochs', type=int, default=300)
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
...@@ -469,9 +452,9 @@ def parse_opt(known=False): ...@@ -469,9 +452,9 @@ def parse_opt(known=False):
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
...@@ -479,13 +462,13 @@ def parse_opt(known=False): ...@@ -479,13 +462,13 @@ def parse_opt(known=False):
parser.add_argument('--linear-lr', action='store_true', help='linear LR') parser.add_argument('--linear-lr', action='store_true', help='linear LR')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
parser.add_argument('--save-period', type=int, default=1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
# Weights & Biases arguments # Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity') parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') parser.add_argument('--upload_dataset', action='store_true', help='W&B: Upload dataset as artifact table')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
...@@ -495,6 +478,7 @@ def parse_opt(known=False): ...@@ -495,6 +478,7 @@ def parse_opt(known=False):
def main(opt, callbacks=Callbacks()): def main(opt, callbacks=Callbacks()):
# Checks # Checks
set_logging(RANK)
if RANK in [-1, 0]: if RANK in [-1, 0]:
print_args(FILE.stem, opt) print_args(FILE.stem, opt)
check_git_status() check_git_status()
...@@ -618,9 +602,9 @@ def main(opt, callbacks=Callbacks()): ...@@ -618,9 +602,9 @@ def main(opt, callbacks=Callbacks()):
# Plot results # Plot results
plot_evolve(evolve_csv) plot_evolve(evolve_csv)
LOGGER.info(f'Hyperparameter evolution finished\n' print(f'Hyperparameter evolution finished\n'
f"Results saved to {colorstr('bold', save_dir)}\n" f"Results saved to {colorstr('bold', save_dir)}\n"
f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}') f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
def run(**kwargs): def run(**kwargs):
......
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Train a YOLOv5 model on a custom dataset
Usage:
$ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
"""
import argparse
import logging
import math
import os
import random
import sys
import time
from copy import deepcopy
from pathlib import Path
import numpy as np
import torch
import torch.distributed as dist
import torch.nn as nn
import yaml
from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import Adam, SGD, lr_scheduler
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import val # for end-of-epoch mAP
from models.experimental import attempt_load
from models.yolo import Model
from utils.autoanchor import check_anchors
from utils.datasets import create_dataloader
from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
from utils.downloads import attempt_download
from utils.loss import ComputeLoss
from utils.plots import plot_labels, plot_evolve
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
torch_distributed_zero_first
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.metrics import fitness
from utils.loggers import Loggers
from utils.callbacks import Callbacks
LOGGER = logging.getLogger(__name__)
#LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
#LOCAL_RANK = int(torch.cuda.device_count() - 1)
#LOCAL_RANK2 = int(os.environ['LOCAL_RANK'])
#LOCAL_RANK = int(os.getenv('LOCAL_RANK'),-1) # https://pytorch.org/docs/stable/elastic/run.html
#LOCAL_RANK = int(os.environ['LOCAL_RANK'])
#print('LOCAL_RANK:',LOCAL_RANK)
#print('LOCAL_RANK2:',LOCAL_RANK2)
print('torch.cuda.device_count():',torch.cuda.device_count())
#RANK = int(os.getenv('RANK', -1))
#WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
def train(hyp, # path/to/hyp.yaml or hyp dictionary
opt,
device,
callbacks
):
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
# Directories
w = save_dir / 'weights' # weights dir
(w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir
last, best = w / 'last.pt', w / 'best.pt'
# Hyperparameters
if isinstance(hyp, str):
with open(hyp, errors='ignore') as f:
hyp = yaml.safe_load(f) # load hyps dict
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
# Save run settings
with open(save_dir / 'hyp.yaml', 'w') as f:
yaml.safe_dump(hyp, f, sort_keys=False)
with open(save_dir / 'opt.yaml', 'w') as f:
yaml.safe_dump(vars(opt), f, sort_keys=False)
data_dict = None
# Loggers
if opt.rank in [-1, 0]:
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
if loggers.wandb:
data_dict = loggers.wandb.data_dict
if resume:
weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp
# Register actions
for k in methods(loggers):
callbacks.register_action(k, callback=getattr(loggers, k))
# Config
plots = not evolve # create plots
cuda = device.type != 'cpu'
init_seeds(1 + opt.rank)
with torch_distributed_zero_first(LOCAL_RANK):
data_dict = data_dict or check_dataset(data) # check if None
train_path, val_path = data_dict['train'], data_dict['val']
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
# Model
check_suffix(weights, '.pt') # check weights
pretrained = weights.endswith('.pt')
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location=device) # load checkpoint
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(csd, strict=False) # load
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
else:
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
# Freeze
freeze = [f'model.{x}.' for x in range(freeze)] # layers to freeze
for k, v in model.named_parameters():
v.requires_grad = True # train all layers
if any(x in k for x in freeze):
print(f'freezing {k}')
v.requires_grad = False
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
g0, g1, g2 = [], [], [] # optimizer parameter groups
for v in model.modules():
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
g2.append(v.bias)
if isinstance(v, nn.BatchNorm2d): # weight (no decay)
g0.append(v.weight)
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
g1.append(v.weight)
if opt.adam:
optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
else:
optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']}) # add g1 with weight_decay
optimizer.add_param_group({'params': g2}) # add g2 (biases)
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias")
del g0, g1, g2
# Scheduler
if opt.linear_lr:
lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
else:
lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
# EMA
ema = ModelEMA(model) if opt.rank in [-1, 0] else None
# Resume
start_epoch, best_fitness = 0, 0.0
if pretrained:
# Optimizer
if ckpt['optimizer'] is not None:
optimizer.load_state_dict(ckpt['optimizer'])
best_fitness = ckpt['best_fitness']
# EMA
if ema and ckpt.get('ema'):
ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
ema.updates = ckpt['updates']
# Epochs
start_epoch = ckpt['epoch'] + 1
if resume:
assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
if epochs < start_epoch:
LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
epochs += ckpt['epoch'] # finetune additional epochs
del ckpt, csd
# Image sizes
gs = max(int(model.stride.max()), 32) # grid size (max stride)
nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj'])
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
# DP mode
if cuda and opt.rank == -1 and torch.cuda.device_count() > 1:
logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
model = torch.nn.DataParallel(model)
# SyncBatchNorm
if opt.sync_bn and cuda and opt.rank != -1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
LOGGER.info('Using SyncBatchNorm()')
# Trainloader
train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // opt.world_size, gs, single_cls,
hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
workers=workers, image_weights=opt.image_weights, quad=opt.quad,
prefix=colorstr('train: '))
mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class
nb = len(train_loader) # number of batches
assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
# Process 0
if opt.rank in [-1, 0]:
val_loader = create_dataloader(val_path, imgsz, batch_size // opt.world_size * 2, gs, single_cls,
hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1,
workers=workers, pad=0.5,
prefix=colorstr('val: '))[0]
if not resume:
labels = np.concatenate(dataset.labels, 0)
# c = torch.tensor(labels[:, 0]) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
# model._initialize_biases(cf.to(device))
if plots:
plot_labels(labels, names, save_dir)
# Anchors
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
model.half().float() # pre-reduce anchor precision
callbacks.run('on_pretrain_routine_end')
# DDP mode
if cuda and opt.rank != -1:
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
# Model parameters
hyp['box'] *= 3. / nl # scale to layers
hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers
hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
hyp['label_smoothing'] = opt.label_smoothing
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
model.names = names
# Start training
t0 = time.time()
nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
last_opt_step = -1
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
scheduler.last_epoch = start_epoch - 1 # do not move
scaler = amp.GradScaler(enabled=cuda)
stopper = EarlyStopping(patience=opt.patience)
compute_loss = ComputeLoss(model) # init loss class
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
f'Using {train_loader.num_workers} dataloader workers\n'
f"Logging results to {colorstr('bold', save_dir)}\n"
f'Starting training for {epochs} epochs...')
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
model.train()
# Update image weights (optional, single-GPU only)
if opt.image_weights:
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
# Update mosaic border (optional)
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
mloss = torch.zeros(3, device=device) # mean losses
if opt.rank != -1:
train_loader.sampler.set_epoch(epoch)
pbar = enumerate(train_loader)
LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
if opt.rank in [-1, 0]:
pbar = tqdm(pbar, total=nb) # progress bar
optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0
# Warmup
if ni <= nw:
xi = [0, nw] # x interp
# compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
for j, x in enumerate(optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
# Multi-scale
if opt.multi_scale:
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
sf = sz / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
if opt.rank != -1:
loss *= opt.world_size # gradient averaged between devices in DDP mode
if opt.quad:
loss *= 4.
# Backward
scaler.scale(loss).backward()
# Optimize
if ni - last_opt_step >= accumulate:
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
if ema:
ema.update(model)
last_opt_step = ni
# Log
if opt.rank in [-1, 0]:
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots, opt.sync_bn)
# end batch ------------------------------------------------------------------------------------------------
# Scheduler
lr = [x['lr'] for x in optimizer.param_groups] # for loggers
scheduler.step()
if opt.rank in [-1, 0]:
# mAP
callbacks.run('on_train_epoch_end', epoch=epoch)
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
if not noval or final_epoch: # Calculate mAP
results, maps, _ = val.run(data_dict,
batch_size=batch_size // opt.world_size * 2,
imgsz=imgsz,
model=ema.ema,
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
plots=False,
callbacks=callbacks,
compute_loss=compute_loss)
# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
if fi > best_fitness:
best_fitness = fi
log_vals = list(mloss) + list(results) + lr
callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
# Save model
if (not nosave) or (final_epoch and not evolve): # if save
ckpt = {'epoch': epoch,
'best_fitness': best_fitness,
'model': deepcopy(de_parallel(model)).half(),
'ema': deepcopy(ema.ema).half(),
'updates': ema.updates,
'optimizer': optimizer.state_dict(),
'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
# Save last, best and delete
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
torch.save(ckpt, w / f'epoch{epoch}.pt')
del ckpt
callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
# Stop Single-GPU
if opt.rank == -1 and stopper(epoch=epoch, fitness=fi):
break
# Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
# stop = stopper(epoch=epoch, fitness=fi)
# if RANK == 0:
# dist.broadcast_object_list([stop], 0) # broadcast 'stop' to all ranks
# Stop DPP
# with torch_distributed_zero_first(RANK):
# if stop:
# break # must break all DDP ranks
# end epoch ----------------------------------------------------------------------------------------------------
# end training -----------------------------------------------------------------------------------------------------
if opt.rank in [-1, 0]:
LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
for f in last, best:
if f.exists():
strip_optimizer(f) # strip optimizers
if f is best:
LOGGER.info(f'\nValidating {f}...')
results, _, _ = val.run(data_dict,
batch_size=batch_size // opt.world_size * 2,
imgsz=imgsz,
model=attempt_load(f, device).half(),
iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
save_json=is_coco,
verbose=True,
plots=True,
callbacks=callbacks,
compute_loss=compute_loss) # val best model with plots
callbacks.run('on_train_end', last, best, plots, epoch)
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
torch.cuda.empty_cache()
return results
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300)
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--linear-lr', action='store_true', help='linear LR')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
# Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', action='store_true', help='W&B: Upload dataset as artifact table')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
parser.add_argument('--rank', default=-1, type=int,help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
parser.add_argument('--world-size', default=-1, type=int,
help='number of nodes for distributed training')
opt = parser.parse_known_args()[0] if known else parser.parse_args()
return opt
def main(opt, callbacks=Callbacks()):
# Checks
set_logging(opt.rank)
if opt.rank in [-1, 0]:
print_args(FILE.stem, opt)
check_git_status()
check_requirements(exclude=['thop'])
# Resume
if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
opt = argparse.Namespace(**yaml.safe_load(f)) # replace
opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate
LOGGER.info(f'Resuming training from {ckpt}')
else:
opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
if opt.evolve:
opt.project = str(ROOT / 'runs/evolve')
opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
# DDP mode
device = select_device(opt.device, batch_size=opt.batch_size)
if LOCAL_RANK != -1:
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
assert not opt.evolve, '--evolve argument is not compatible with DDP training'
torch.cuda.set_device(LOCAL_RANK)
device = torch.device('cuda', LOCAL_RANK)
#dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo",init_method=opt.dist_url,
world_size=opt.world_size, rank=opt.rank)
# Train
if not opt.evolve:
train(opt.hyp, opt, device, callbacks)
if opt.world_size > 1 and opt.rank == 0:
LOGGER.info('Destroying process group... ')
dist.destroy_process_group()
# Evolve hyperparameters (optional)
else:
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
'box': (1, 0.02, 0.2), # box loss gain
'cls': (1, 0.2, 4.0), # cls loss gain
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
'iou_t': (0, 0.1, 0.7), # IoU training threshold
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
'scale': (1, 0.0, 0.9), # image scale (+/- gain)
'shear': (1, 0.0, 10.0), # image shear (+/- deg)
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
'mixup': (1, 0.0, 1.0), # image mixup (probability)
'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability)
with open(opt.hyp, errors='ignore') as f:
hyp = yaml.safe_load(f) # load hyps dict
if 'anchors' not in hyp: # anchors commented in hyp.yaml
hyp['anchors'] = 3
opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
if opt.bucket:
os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}') # download evolve.csv if exists
for _ in range(opt.evolve): # generations to evolve
if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate
# Select parent(s)
parent = 'single' # parent selection method: 'single' or 'weighted'
x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
n = min(5, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness(x))][:n] # top n mutations
w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0)
if parent == 'single' or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == 'weighted':
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
# Mutate
mp, s = 0.8, 0.2 # mutation probability, sigma
npr = np.random
npr.seed(int(time.time()))
g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1
ng = len(meta)
v = np.ones(ng)
while all(v == 1): # mutate until a change occurs (prevent duplicates)
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
hyp[k] = float(x[i + 7] * v[i]) # mutate
# Constrain to limits
for k, v in meta.items():
hyp[k] = max(hyp[k], v[1]) # lower limit
hyp[k] = min(hyp[k], v[2]) # upper limit
hyp[k] = round(hyp[k], 5) # significant digits
# Train mutation
results = train(hyp.copy(), opt, device, callbacks)
# Write mutation results
print_mutation(results, hyp.copy(), save_dir, opt.bucket)
# Plot results
plot_evolve(evolve_csv)
print(f'Hyperparameter evolution finished\n'
f"Results saved to {colorstr('bold', save_dir)}\n"
f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
def run(**kwargs):
# Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
opt = parse_opt(True)
for k, v in kwargs.items():
setattr(opt, k, v)
main(opt)
if __name__ == "__main__":
#print('torch.backends.cudnn.enabled=False:',torch.backends.cudnn.enabled)
opt = parse_opt()
LOCAL_RANK = opt.local_rank
print('local_rank:',LOCAL_RANK)
main(opt)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment