"research/syntaxnet/third_party/utf/LICENSE" did not exist on "32ab5a58dd3714d0747da6993a01315dadbf0e0f"
Commit a53a851b authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #1184 failed with stages
in 0 seconds
# Ultralytics YOLO 🚀, AGPL-3.0 license
import time
from threading import Thread
import pandas as pd
from ultralytics import Explorer
from ultralytics.utils import ROOT, SETTINGS
from ultralytics.utils.checks import check_requirements
check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3"))
import streamlit as st
from streamlit_select import image_select
def _get_explorer():
"""Initializes and returns an instance of the Explorer class."""
exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model"))
thread = Thread(
target=exp.create_embeddings_table, kwargs={"force": st.session_state.get("force_recreate_embeddings")}
)
thread.start()
progress_bar = st.progress(0, text="Creating embeddings table...")
while exp.progress < 1:
time.sleep(0.1)
progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%")
thread.join()
st.session_state["explorer"] = exp
progress_bar.empty()
def init_explorer_form():
"""Initializes an Explorer instance and creates embeddings table with progress tracking."""
datasets = ROOT / "cfg" / "datasets"
ds = [d.name for d in datasets.glob("*.yaml")]
models = [
"yolov8n.pt",
"yolov8s.pt",
"yolov8m.pt",
"yolov8l.pt",
"yolov8x.pt",
"yolov8n-seg.pt",
"yolov8s-seg.pt",
"yolov8m-seg.pt",
"yolov8l-seg.pt",
"yolov8x-seg.pt",
"yolov8n-pose.pt",
"yolov8s-pose.pt",
"yolov8m-pose.pt",
"yolov8l-pose.pt",
"yolov8x-pose.pt",
]
with st.form(key="explorer_init_form"):
col1, col2 = st.columns(2)
with col1:
st.selectbox("Select dataset", ds, key="dataset", index=ds.index("coco128.yaml"))
with col2:
st.selectbox("Select model", models, key="model")
st.checkbox("Force recreate embeddings", key="force_recreate_embeddings")
st.form_submit_button("Explore", on_click=_get_explorer)
def query_form():
"""Sets up a form in Streamlit to initialize Explorer with dataset and model selection."""
with st.form("query_form"):
col1, col2 = st.columns([0.8, 0.2])
with col1:
st.text_input(
"Query",
"WHERE labels LIKE '%person%' AND labels LIKE '%dog%'",
label_visibility="collapsed",
key="query",
)
with col2:
st.form_submit_button("Query", on_click=run_sql_query)
def ai_query_form():
"""Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection."""
with st.form("ai_query_form"):
col1, col2 = st.columns([0.8, 0.2])
with col1:
st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query")
with col2:
st.form_submit_button("Ask AI", on_click=run_ai_query)
def find_similar_imgs(imgs):
"""Initializes a Streamlit form for AI-based image querying with custom input."""
exp = st.session_state["explorer"]
similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow")
paths = similar.to_pydict()["im_file"]
st.session_state["imgs"] = paths
st.session_state["res"] = similar
def similarity_form(selected_imgs):
"""Initializes a form for AI-based image querying with custom input in Streamlit."""
st.write("Similarity Search")
with st.form("similarity_form"):
subcol1, subcol2 = st.columns([1, 1])
with subcol1:
st.number_input(
"limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit"
)
with subcol2:
disabled = not len(selected_imgs)
st.write("Selected: ", len(selected_imgs))
st.form_submit_button(
"Search",
disabled=disabled,
on_click=find_similar_imgs,
args=(selected_imgs,),
)
if disabled:
st.error("Select at least one image to search.")
# def persist_reset_form():
# with st.form("persist_reset"):
# col1, col2 = st.columns([1, 1])
# with col1:
# st.form_submit_button("Reset", on_click=reset)
#
# with col2:
# st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
def run_sql_query():
"""Executes an SQL query and returns the results."""
st.session_state["error"] = None
query = st.session_state.get("query")
if query.rstrip().lstrip():
exp = st.session_state["explorer"]
res = exp.sql_query(query, return_type="arrow")
st.session_state["imgs"] = res.to_pydict()["im_file"]
st.session_state["res"] = res
def run_ai_query():
"""Execute SQL query and update session state with query results."""
if not SETTINGS["openai_api_key"]:
st.session_state["error"] = (
'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
)
return
st.session_state["error"] = None
query = st.session_state.get("ai_query")
if query.rstrip().lstrip():
exp = st.session_state["explorer"]
res = exp.ask_ai(query)
if not isinstance(res, pd.DataFrame) or res.empty:
st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it."
return
st.session_state["imgs"] = res["im_file"].to_list()
st.session_state["res"] = res
def reset_explorer():
"""Resets the explorer to its initial state by clearing session variables."""
st.session_state["explorer"] = None
st.session_state["imgs"] = None
st.session_state["error"] = None
def utralytics_explorer_docs_callback():
"""Resets the explorer to its initial state by clearing session variables."""
with st.container(border=True):
st.image(
"https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg",
width=100,
)
st.markdown(
"<p>This demo is built using Ultralytics Explorer API. Visit <a href='https://docs.ultralytics.com/datasets/explorer/'>API docs</a> to try examples & learn more</p>",
unsafe_allow_html=True,
help=None,
)
st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/")
def layout():
"""Resets explorer session variables and provides documentation with a link to API docs."""
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True)
if st.session_state.get("explorer") is None:
init_explorer_form()
return
st.button(":arrow_backward: Select Dataset", on_click=reset_explorer)
exp = st.session_state.get("explorer")
col1, col2 = st.columns([0.75, 0.25], gap="small")
imgs = []
if st.session_state.get("error"):
st.error(st.session_state["error"])
else:
if st.session_state.get("imgs"):
imgs = st.session_state.get("imgs")
else:
imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"]
st.session_state["res"] = exp.table.to_arrow()
total_imgs, selected_imgs = len(imgs), []
with col1:
subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5)
with subcol1:
st.write("Max Images Displayed:")
with subcol2:
num = st.number_input(
"Max Images Displayed",
min_value=0,
max_value=total_imgs,
value=min(500, total_imgs),
key="num_imgs_displayed",
label_visibility="collapsed",
)
with subcol3:
st.write("Start Index:")
with subcol4:
start_idx = st.number_input(
"Start Index",
min_value=0,
max_value=total_imgs,
value=0,
key="start_index",
label_visibility="collapsed",
)
with subcol5:
reset = st.button("Reset", use_container_width=False, key="reset")
if reset:
st.session_state["imgs"] = None
st.experimental_rerun()
query_form()
ai_query_form()
if total_imgs:
labels, boxes, masks, kpts, classes = None, None, None, None, None
task = exp.model.task
if st.session_state.get("display_labels"):
labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num]
boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num]
masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num]
kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num]
classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num]
imgs_displayed = imgs[start_idx : start_idx + num]
selected_imgs = image_select(
f"Total samples: {total_imgs}",
images=imgs_displayed,
use_container_width=False,
# indices=[i for i in range(num)] if select_all else None,
labels=labels,
classes=classes,
bboxes=boxes,
masks=masks if task == "segment" else None,
kpts=kpts if task == "pose" else None,
)
with col2:
similarity_form(selected_imgs)
display_labels = st.checkbox("Labels", value=False, key="display_labels")
utralytics_explorer_docs_callback()
if __name__ == "__main__":
layout()
# Ultralytics YOLO 🚀, AGPL-3.0 license
import getpass
from typing import List
import cv2
import numpy as np
import pandas as pd
from ultralytics.data.augment import LetterBox
from ultralytics.utils import LOGGER as logger
from ultralytics.utils import SETTINGS
from ultralytics.utils.checks import check_requirements
from ultralytics.utils.ops import xyxy2xywh
from ultralytics.utils.plotting import plot_images
def get_table_schema(vector_size):
"""Extracts and returns the schema of a database table."""
from lancedb.pydantic import LanceModel, Vector
class Schema(LanceModel):
im_file: str
labels: List[str]
cls: List[int]
bboxes: List[List[float]]
masks: List[List[List[int]]]
keypoints: List[List[List[float]]]
vector: Vector(vector_size)
return Schema
def get_sim_index_schema():
"""Returns a LanceModel schema for a database table with specified vector size."""
from lancedb.pydantic import LanceModel
class Schema(LanceModel):
idx: int
im_file: str
count: int
sim_im_files: List[str]
return Schema
def sanitize_batch(batch, dataset_info):
"""Sanitizes input batch for inference, ensuring correct format and dimensions."""
batch["cls"] = batch["cls"].flatten().int().tolist()
box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1])
batch["bboxes"] = [box for box, _ in box_cls_pair]
batch["cls"] = [cls for _, cls in box_cls_pair]
batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]]
batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]]
batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]]
return batch
def plot_query_result(similar_set, plot_labels=True):
"""
Plot images from the similar set.
Args:
similar_set (list): Pyarrow or pandas object containing the similar data points
plot_labels (bool): Whether to plot labels or not
"""
similar_set = (
similar_set.to_dict(orient="list") if isinstance(similar_set, pd.DataFrame) else similar_set.to_pydict()
)
empty_masks = [[[]]]
empty_boxes = [[]]
images = similar_set.get("im_file", [])
bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else []
masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else []
kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else []
cls = similar_set.get("cls", [])
plot_size = 640
imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], []
for i, imf in enumerate(images):
im = cv2.imread(imf)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
h, w = im.shape[:2]
r = min(plot_size / h, plot_size / w)
imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1))
if plot_labels:
if len(bboxes) > i and len(bboxes[i]) > 0:
box = np.array(bboxes[i], dtype=np.float32)
box[:, [0, 2]] *= r
box[:, [1, 3]] *= r
plot_boxes.append(box)
if len(masks) > i and len(masks[i]) > 0:
mask = np.array(masks[i], dtype=np.uint8)[0]
plot_masks.append(LetterBox(plot_size, center=False)(image=mask))
if len(kpts) > i and kpts[i] is not None:
kpt = np.array(kpts[i], dtype=np.float32)
kpt[:, :, :2] *= r
plot_kpts.append(kpt)
batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i)
imgs = np.stack(imgs, axis=0)
masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8)
kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32)
boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32)
batch_idx = np.concatenate(batch_idx, axis=0)
cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0)
return plot_images(
imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False
)
def prompt_sql_query(query):
"""Plots images with optional labels from a similar data set."""
check_requirements("openai>=1.6.1")
from openai import OpenAI
if not SETTINGS["openai_api_key"]:
logger.warning("OpenAI API key not found in settings. Please enter your API key below.")
openai_api_key = getpass.getpass("OpenAI API key: ")
SETTINGS.update({"openai_api_key": openai_api_key})
openai = OpenAI(api_key=SETTINGS["openai_api_key"])
messages = [
{
"role": "system",
"content": """
You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on
the following schema and a user request. You only need to output the format with fixed selection
statement that selects everything from "'table'", like `SELECT * from 'table'`
Schema:
im_file: string not null
labels: list<item: string> not null
child 0, item: string
cls: list<item: int64> not null
child 0, item: int64
bboxes: list<item: list<item: double>> not null
child 0, item: list<item: double>
child 0, item: double
masks: list<item: list<item: list<item: int64>>> not null
child 0, item: list<item: list<item: int64>>
child 0, item: list<item: int64>
child 0, item: int64
keypoints: list<item: list<item: list<item: double>>> not null
child 0, item: list<item: list<item: double>>
child 0, item: list<item: double>
child 0, item: double
vector: fixed_size_list<item: float>[256] not null
child 0, item: float
Some details about the schema:
- the "labels" column contains the string values like 'person' and 'dog' for the respective objects
in each image
- the "cls" column contains the integer values on these classes that map them the labels
Example of a correct query:
request - Get all data points that contain 2 or more people and at least one dog
correct query-
SELECT * FROM 'table' WHERE ARRAY_LENGTH(cls) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1;
""",
},
{"role": "user", "content": f"{query}"},
]
response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
return response.choices[0].message.content
# Ultralytics YOLO 🚀, AGPL-3.0 license
import glob
import math
import os
import time
from dataclasses import dataclass
from pathlib import Path
from threading import Thread
from urllib.parse import urlparse
import cv2
import numpy as np
import requests
import torch
from PIL import Image
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
from ultralytics.utils import LOGGER, is_colab, is_kaggle, ops
from ultralytics.utils.checks import check_requirements
@dataclass
class SourceTypes:
"""Class to represent various types of input sources for predictions."""
stream: bool = False
screenshot: bool = False
from_img: bool = False
tensor: bool = False
class LoadStreams:
"""
Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams.
Attributes:
sources (str): The source input paths or URLs for the video streams.
vid_stride (int): Video frame-rate stride, defaults to 1.
buffer (bool): Whether to buffer input streams, defaults to False.
running (bool): Flag to indicate if the streaming thread is running.
mode (str): Set to 'stream' indicating real-time capture.
imgs (list): List of image frames for each stream.
fps (list): List of FPS for each stream.
frames (list): List of total frames for each stream.
threads (list): List of threads for each stream.
shape (list): List of shapes for each stream.
caps (list): List of cv2.VideoCapture objects for each stream.
bs (int): Batch size for processing.
Methods:
__init__: Initialize the stream loader.
update: Read stream frames in daemon thread.
close: Close stream loader and release resources.
__iter__: Returns an iterator object for the class.
__next__: Returns source paths, transformed, and original images for processing.
__len__: Return the length of the sources object.
Example:
```bash
yolo predict source='rtsp://example.com/media.mp4'
```
"""
def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
"""Initialize instance variables and check for consistent input stream shapes."""
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
self.buffer = buffer # buffer input streams
self.running = True # running flag for Thread
self.mode = "stream"
self.vid_stride = vid_stride # video frame-rate stride
sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
n = len(sources)
self.bs = n
self.fps = [0] * n # frames per second
self.frames = [0] * n
self.threads = [None] * n
self.caps = [None] * n # video capture objects
self.imgs = [[] for _ in range(n)] # images
self.shape = [[] for _ in range(n)] # image shapes
self.sources = [ops.clean_str(x) for x in sources] # clean source names for later
for i, s in enumerate(sources): # index, source
# Start thread to read frames from video stream
st = f"{i + 1}/{n}: {s}... "
if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"): # if source is YouTube video
# YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
s = get_best_youtube_url(s)
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
if s == 0 and (is_colab() or is_kaggle()):
raise NotImplementedError(
"'source=0' webcam not supported in Colab and Kaggle notebooks. "
"Try running 'source=0' in a local environment."
)
self.caps[i] = cv2.VideoCapture(s) # store video capture object
if not self.caps[i].isOpened():
raise ConnectionError(f"{st}Failed to open {s}")
w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = self.caps[i].get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float(
"inf"
) # infinite stream fallback
self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
success, im = self.caps[i].read() # guarantee first frame
if not success or im is None:
raise ConnectionError(f"{st}Failed to read images from {s}")
self.imgs[i].append(im)
self.shape[i] = im.shape
self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True)
LOGGER.info(f"{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)")
self.threads[i].start()
LOGGER.info("") # newline
def update(self, i, cap, stream):
"""Read stream `i` frames in daemon thread."""
n, f = 0, self.frames[i] # frame number, frame array
while self.running and cap.isOpened() and n < (f - 1):
if len(self.imgs[i]) < 30: # keep a <=30-image buffer
n += 1
cap.grab() # .read() = .grab() followed by .retrieve()
if n % self.vid_stride == 0:
success, im = cap.retrieve()
if not success:
im = np.zeros(self.shape[i], dtype=np.uint8)
LOGGER.warning("WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.")
cap.open(stream) # re-open stream if signal was lost
if self.buffer:
self.imgs[i].append(im)
else:
self.imgs[i] = [im]
else:
time.sleep(0.01) # wait until the buffer is empty
def close(self):
"""Close stream loader and release resources."""
self.running = False # stop flag for Thread
for thread in self.threads:
if thread.is_alive():
thread.join(timeout=5) # Add timeout
for cap in self.caps: # Iterate through the stored VideoCapture objects
try:
cap.release() # release video capture
except Exception as e:
LOGGER.warning(f"WARNING ⚠️ Could not release VideoCapture object: {e}")
cv2.destroyAllWindows()
def __iter__(self):
"""Iterates through YOLO image feed and re-opens unresponsive streams."""
self.count = -1
return self
def __next__(self):
"""Returns source paths, transformed and original images for processing."""
self.count += 1
images = []
for i, x in enumerate(self.imgs):
# Wait until a frame is available in each buffer
while not x:
if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"): # q to quit
self.close()
raise StopIteration
time.sleep(1 / min(self.fps))
x = self.imgs[i]
if not x:
LOGGER.warning(f"WARNING ⚠️ Waiting for stream {i}")
# Get and remove the first frame from imgs buffer
if self.buffer:
images.append(x.pop(0))
# Get the last frame, and clear the rest from the imgs buffer
else:
images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8))
x.clear()
return self.sources, images, [""] * self.bs
def __len__(self):
"""Return the length of the sources object."""
return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
class LoadScreenshots:
"""
YOLOv8 screenshot dataloader.
This class manages the loading of screenshot images for processing with YOLOv8.
Suitable for use with `yolo predict source=screen`.
Attributes:
source (str): The source input indicating which screen to capture.
screen (int): The screen number to capture.
left (int): The left coordinate for screen capture area.
top (int): The top coordinate for screen capture area.
width (int): The width of the screen capture area.
height (int): The height of the screen capture area.
mode (str): Set to 'stream' indicating real-time capture.
frame (int): Counter for captured frames.
sct (mss.mss): Screen capture object from `mss` library.
bs (int): Batch size, set to 1.
monitor (dict): Monitor configuration details.
Methods:
__iter__: Returns an iterator object.
__next__: Captures the next screenshot and returns it.
"""
def __init__(self, source):
"""Source = [screen_number left top width height] (pixels)."""
check_requirements("mss")
import mss # noqa
source, *params = source.split()
self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
if len(params) == 1:
self.screen = int(params[0])
elif len(params) == 4:
left, top, width, height = (int(x) for x in params)
elif len(params) == 5:
self.screen, left, top, width, height = (int(x) for x in params)
self.mode = "stream"
self.frame = 0
self.sct = mss.mss()
self.bs = 1
self.fps = 30
# Parse monitor shape
monitor = self.sct.monitors[self.screen]
self.top = monitor["top"] if top is None else (monitor["top"] + top)
self.left = monitor["left"] if left is None else (monitor["left"] + left)
self.width = width or monitor["width"]
self.height = height or monitor["height"]
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
def __iter__(self):
"""Returns an iterator of the object."""
return self
def __next__(self):
"""mss screen capture: get raw pixels from the screen as np array."""
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
self.frame += 1
return [str(self.screen)], [im0], [s] # screen, img, string
class LoadImagesAndVideos:
"""
YOLOv8 image/video dataloader.
This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
various formats, including single image files, video files, and lists of image and video paths.
Attributes:
files (list): List of image and video file paths.
nf (int): Total number of files (images and videos).
video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
mode (str): Current mode, 'image' or 'video'.
vid_stride (int): Stride for video frame-rate, defaults to 1.
bs (int): Batch size, set to 1 for this class.
cap (cv2.VideoCapture): Video capture object for OpenCV.
frame (int): Frame counter for video.
frames (int): Total number of frames in the video.
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
Methods:
_new_video(path): Create a new cv2.VideoCapture object for a given video path.
"""
def __init__(self, path, batch=1, vid_stride=1):
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
parent = None
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
parent = Path(path).parent
path = Path(path).read_text().splitlines() # list of sources
files = []
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
if "*" in a:
files.extend(sorted(glob.glob(a, recursive=True))) # glob
elif os.path.isdir(a):
files.extend(sorted(glob.glob(os.path.join(a, "*.*")))) # dir
elif os.path.isfile(a):
files.append(a) # files (absolute or relative to CWD)
elif parent and (parent / p).is_file():
files.append(str((parent / p).absolute())) # files (relative to *.txt file parent)
else:
raise FileNotFoundError(f"{p} does not exist")
images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
ni, nv = len(images), len(videos)
self.files = images + videos
self.nf = ni + nv # number of files
self.ni = ni # number of images
self.video_flag = [False] * ni + [True] * nv
self.mode = "image"
self.vid_stride = vid_stride # video frame-rate stride
self.bs = batch
if any(videos):
self._new_video(videos[0]) # new video
else:
self.cap = None
if self.nf == 0:
raise FileNotFoundError(
f"No images or videos found in {p}. "
f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
)
def __iter__(self):
"""Returns an iterator object for VideoStream or ImageFolder."""
self.count = 0
return self
def __next__(self):
"""Returns the next batch of images or video frames along with their paths and metadata."""
paths, imgs, info = [], [], []
while len(imgs) < self.bs:
if self.count >= self.nf: # end of file list
if len(imgs) > 0:
return paths, imgs, info # return last partial batch
else:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
self.mode = "video"
if not self.cap or not self.cap.isOpened():
self._new_video(path)
for _ in range(self.vid_stride):
success = self.cap.grab()
if not success:
break # end of video or failure
if success:
success, im0 = self.cap.retrieve()
if success:
self.frame += 1
paths.append(path)
imgs.append(im0)
info.append(f"video {self.count + 1}/{self.nf} (frame {self.frame}/{self.frames}) {path}: ")
if self.frame == self.frames: # end of video
self.count += 1
self.cap.release()
else:
# Move to the next file if the current video ended or failed to open
self.count += 1
if self.cap:
self.cap.release()
if self.count < self.nf:
self._new_video(self.files[self.count])
else:
self.mode = "image"
im0 = cv2.imread(path) # BGR
if im0 is None:
raise FileNotFoundError(f"Image Not Found {path}")
paths.append(path)
imgs.append(im0)
info.append(f"image {self.count + 1}/{self.nf} {path}: ")
self.count += 1 # move to the next file
if self.count >= self.ni: # end of image list
break
return paths, imgs, info
def _new_video(self, path):
"""Creates a new video capture object for the given path."""
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
if not self.cap.isOpened():
raise FileNotFoundError(f"Failed to open video {path}")
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
def __len__(self):
"""Returns the number of batches in the object."""
return math.ceil(self.nf / self.bs) # number of files
class LoadPilAndNumpy:
"""
Load images from PIL and Numpy arrays for batch processing.
This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
It performs basic validation and format conversion to ensure that the images are in the required format for
downstream processing.
Attributes:
paths (list): List of image paths or autogenerated filenames.
im0 (list): List of images stored as Numpy arrays.
mode (str): Type of data being processed, defaults to 'image'.
bs (int): Batch size, equivalent to the length of `im0`.
Methods:
_single_check(im): Validate and format a single image to a Numpy array.
"""
def __init__(self, im0):
"""Initialize PIL and Numpy Dataloader."""
if not isinstance(im0, list):
im0 = [im0]
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
self.im0 = [self._single_check(im) for im in im0]
self.mode = "image"
self.bs = len(self.im0)
@staticmethod
def _single_check(im):
"""Validate and format an image to numpy array."""
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
if isinstance(im, Image.Image):
if im.mode != "RGB":
im = im.convert("RGB")
im = np.asarray(im)[:, :, ::-1]
im = np.ascontiguousarray(im) # contiguous
return im
def __len__(self):
"""Returns the length of the 'im0' attribute."""
return len(self.im0)
def __next__(self):
"""Returns batch paths, images, processed images, None, ''."""
if self.count == 1: # loop only once as it's batch inference
raise StopIteration
self.count += 1
return self.paths, self.im0, [""] * self.bs
def __iter__(self):
"""Enables iteration for class LoadPilAndNumpy."""
self.count = 0
return self
class LoadTensor:
"""
Load images from torch.Tensor data.
This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
Attributes:
im0 (torch.Tensor): The input tensor containing the image(s).
bs (int): Batch size, inferred from the shape of `im0`.
mode (str): Current mode, set to 'image'.
paths (list): List of image paths or filenames.
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
Methods:
_single_check(im, stride): Validate and possibly modify the input tensor.
"""
def __init__(self, im0) -> None:
"""Initialize Tensor Dataloader."""
self.im0 = self._single_check(im0)
self.bs = self.im0.shape[0]
self.mode = "image"
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
@staticmethod
def _single_check(im, stride=32):
"""Validate and format an image to torch.Tensor."""
s = (
f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
)
if len(im.shape) != 4:
if len(im.shape) != 3:
raise ValueError(s)
LOGGER.warning(s)
im = im.unsqueeze(0)
if im.shape[2] % stride or im.shape[3] % stride:
raise ValueError(s)
if im.max() > 1.0 + torch.finfo(im.dtype).eps: # torch.float32 eps is 1.2e-07
LOGGER.warning(
f"WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. "
f"Dividing input by 255."
)
im = im.float() / 255.0
return im
def __iter__(self):
"""Returns an iterator object."""
self.count = 0
return self
def __next__(self):
"""Return next item in the iterator."""
if self.count == 1:
raise StopIteration
self.count += 1
return self.paths, self.im0, [""] * self.bs
def __len__(self):
"""Returns the batch size."""
return self.bs
def autocast_list(source):
"""Merges a list of source of different types into a list of numpy arrays or PIL images."""
files = []
for im in source:
if isinstance(im, (str, Path)): # filename or uri
files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im))
elif isinstance(im, (Image.Image, np.ndarray)): # PIL or np Image
files.append(im)
else:
raise TypeError(
f"type {type(im).__name__} is not a supported Ultralytics prediction source type. \n"
f"See https://docs.ultralytics.com/modes/predict for supported source types."
)
return files
def get_best_youtube_url(url, use_pafy=True):
"""
Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest
quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream.
Args:
url (str): The URL of the YouTube video.
use_pafy (bool): Use the pafy package, default=True, otherwise use yt_dlp package.
Returns:
(str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
"""
if use_pafy:
check_requirements(("pafy", "youtube_dl==2020.12.2"))
import pafy # noqa
return pafy.new(url).getbestvideo(preftype="mp4").url
else:
check_requirements("yt-dlp")
import yt_dlp
with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
info_dict = ydl.extract_info(url, download=False) # extract info
for f in reversed(info_dict.get("formats", [])): # reversed because best is usually last
# Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size
good_size = (f.get("width") or 0) >= 1920 or (f.get("height") or 0) >= 1080
if good_size and f["vcodec"] != "none" and f["acodec"] == "none" and f["ext"] == "mp4":
return f.get("url")
# Define constants
LOADERS = (LoadStreams, LoadPilAndNumpy, LoadImagesAndVideos, LoadScreenshots)
#!/bin/bash
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Download latest models from https://github.com/ultralytics/assets/releases
# Example usage: bash ultralytics/data/scripts/download_weights.sh
# parent
# └── weights
# ├── yolov8n.pt ← downloads here
# ├── yolov8s.pt
# └── ...
python - <<EOF
from ultralytics.utils.downloads import attempt_download_asset
assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '-cls', '-seg', '-pose')]
for x in assets:
attempt_download_asset(f'weights/{x}')
EOF
#!/bin/bash
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Download COCO 2017 dataset https://cocodataset.org
# Example usage: bash data/scripts/get_coco.sh
# parent
# ├── ultralytics
# └── datasets
# └── coco ← downloads here
# Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
if [ "$#" -gt 0 ]; then
for opt in "$@"; do
case "${opt}" in
--train) train=true ;;
--val) val=true ;;
--test) test=true ;;
--segments) segments=true ;;
--sama) sama=true ;;
esac
done
else
train=true
val=true
test=false
segments=false
sama=false
fi
# Download/unzip labels
d='../datasets' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
if [ "$segments" == "true" ]; then
f='coco2017labels-segments.zip' # 169 MB
elif [ "$sama" == "true" ]; then
f='coco2017labels-segments-sama.zip' # 199 MB https://www.sama.com/sama-coco-dataset/
else
f='coco2017labels.zip' # 46 MB
fi
echo 'Downloading' $url$f ' ...'
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
# Download/unzip images
d='../datasets/coco/images' # unzip directory
url=http://images.cocodataset.org/zips/
if [ "$train" == "true" ]; then
f='train2017.zip' # 19G, 118k images
echo 'Downloading' $url$f '...'
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
fi
if [ "$val" == "true" ]; then
f='val2017.zip' # 1G, 5k images
echo 'Downloading' $url$f '...'
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
fi
if [ "$test" == "true" ]; then
f='test2017.zip' # 7G, 41k images (optional)
echo 'Downloading' $url$f '...'
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
fi
wait # finish background tasks
#!/bin/bash
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
# Example usage: bash data/scripts/get_coco128.sh
# parent
# ├── ultralytics
# └── datasets
# └── coco128 ← downloads here
# Download/unzip images and labels
d='../datasets' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f='coco128.zip' # or 'coco128-segments.zip', 68 MB
echo 'Downloading' $url$f ' ...'
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
wait # finish background tasks
#!/bin/bash
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Download ILSVRC2012 ImageNet dataset https://image-net.org
# Example usage: bash data/scripts/get_imagenet.sh
# parent
# ├── ultralytics
# └── datasets
# └── imagenet ← downloads here
# Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
if [ "$#" -gt 0 ]; then
for opt in "$@"; do
case "${opt}" in
--train) train=true ;;
--val) val=true ;;
esac
done
else
train=true
val=true
fi
# Make dir
d='../datasets/imagenet' # unzip directory
mkdir -p $d && cd $d
# Download/unzip train
if [ "$train" == "true" ]; then
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
find . -name "*.tar" | while read NAME; do
mkdir -p "${NAME%.tar}"
tar -xf "${NAME}" -C "${NAME%.tar}"
rm -f "${NAME}"
done
cd ..
fi
# Download/unzip val
if [ "$val" == "true" ]; then
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
fi
# Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
# rm train/n04266014/n04266014_10835.JPEG
# TFRecords (optional)
# wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
# Ultralytics YOLO 🚀, AGPL-3.0 license
import itertools
from glob import glob
from math import ceil
from pathlib import Path
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm
from ultralytics.data.utils import exif_size, img2label_paths
from ultralytics.utils.checks import check_requirements
check_requirements("shapely")
from shapely.geometry import Polygon
def bbox_iof(polygon1, bbox2, eps=1e-6):
"""
Calculate iofs between bbox1 and bbox2.
Args:
polygon1 (np.ndarray): Polygon coordinates, (n, 8).
bbox2 (np.ndarray): Bounding boxes, (n ,4).
"""
polygon1 = polygon1.reshape(-1, 4, 2)
lt_point = np.min(polygon1, axis=-2)
rb_point = np.max(polygon1, axis=-2)
bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
rb = np.minimum(bbox1[:, None, 2:], bbox2[..., 2:])
wh = np.clip(rb - lt, 0, np.inf)
h_overlaps = wh[..., 0] * wh[..., 1]
l, t, r, b = (bbox2[..., i] for i in range(4))
polygon2 = np.stack([l, t, r, t, r, b, l, b], axis=-1).reshape(-1, 4, 2)
sg_polys1 = [Polygon(p) for p in polygon1]
sg_polys2 = [Polygon(p) for p in polygon2]
overlaps = np.zeros(h_overlaps.shape)
for p in zip(*np.nonzero(h_overlaps)):
overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
unions = unions[..., None]
unions = np.clip(unions, eps, np.inf)
outputs = overlaps / unions
if outputs.ndim == 1:
outputs = outputs[..., None]
return outputs
def load_yolo_dota(data_root, split="train"):
"""
Load DOTA dataset.
Args:
data_root (str): Data root.
split (str): The split data set, could be train or val.
Notes:
The directory structure assumed for the DOTA dataset:
- data_root
- images
- train
- val
- labels
- train
- val
"""
assert split in ["train", "val"]
im_dir = Path(data_root) / "images" / split
assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
im_files = glob(str(Path(data_root) / "images" / split / "*"))
lb_files = img2label_paths(im_files)
annos = []
for im_file, lb_file in zip(im_files, lb_files):
w, h = exif_size(Image.open(im_file))
with open(lb_file) as f:
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
lb = np.array(lb, dtype=np.float32)
annos.append(dict(ori_size=(h, w), label=lb, filepath=im_file))
return annos
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
"""
Get the coordinates of windows.
Args:
im_size (tuple): Original image size, (h, w).
crop_sizes (List(int)): Crop size of windows.
gaps (List(int)): Gap between crops.
im_rate_thr (float): Threshold of windows areas divided by image ares.
"""
h, w = im_size
windows = []
for crop_size, gap in zip(crop_sizes, gaps):
assert crop_size > gap, f"invalid crop_size gap pair [{crop_size} {gap}]"
step = crop_size - gap
xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
xs = [step * i for i in range(xn)]
if len(xs) > 1 and xs[-1] + crop_size > w:
xs[-1] = w - crop_size
yn = 1 if h <= crop_size else ceil((h - crop_size) / step + 1)
ys = [step * i for i in range(yn)]
if len(ys) > 1 and ys[-1] + crop_size > h:
ys[-1] = h - crop_size
start = np.array(list(itertools.product(xs, ys)), dtype=np.int64)
stop = start + crop_size
windows.append(np.concatenate([start, stop], axis=1))
windows = np.concatenate(windows, axis=0)
im_in_wins = windows.copy()
im_in_wins[:, 0::2] = np.clip(im_in_wins[:, 0::2], 0, w)
im_in_wins[:, 1::2] = np.clip(im_in_wins[:, 1::2], 0, h)
im_areas = (im_in_wins[:, 2] - im_in_wins[:, 0]) * (im_in_wins[:, 3] - im_in_wins[:, 1])
win_areas = (windows[:, 2] - windows[:, 0]) * (windows[:, 3] - windows[:, 1])
im_rates = im_areas / win_areas
if not (im_rates > im_rate_thr).any():
max_rate = im_rates.max()
im_rates[abs(im_rates - max_rate) < eps] = 1
return windows[im_rates > im_rate_thr]
def get_window_obj(anno, windows, iof_thr=0.7):
"""Get objects for each window."""
h, w = anno["ori_size"]
label = anno["label"]
if len(label):
label[:, 1::2] *= w
label[:, 2::2] *= h
iofs = bbox_iof(label[:, 1:], windows)
# Unnormalized and misaligned coordinates
return [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))] # window_anns
else:
return [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))] # window_anns
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
"""
Crop images and save new labels.
Args:
anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
windows (list): A list of windows coordinates.
window_objs (list): A list of labels inside each window.
im_dir (str): The output directory path of images.
lb_dir (str): The output directory path of labels.
Notes:
The directory structure assumed for the DOTA dataset:
- data_root
- images
- train
- val
- labels
- train
- val
"""
im = cv2.imread(anno["filepath"])
name = Path(anno["filepath"]).stem
for i, window in enumerate(windows):
x_start, y_start, x_stop, y_stop = window.tolist()
new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
patch_im = im[y_start:y_stop, x_start:x_stop]
ph, pw = patch_im.shape[:2]
cv2.imwrite(str(Path(im_dir) / f"{new_name}.jpg"), patch_im)
label = window_objs[i]
if len(label) == 0:
continue
label[:, 1::2] -= x_start
label[:, 2::2] -= y_start
label[:, 1::2] /= pw
label[:, 2::2] /= ph
with open(Path(lb_dir) / f"{new_name}.txt", "w") as f:
for lb in label:
formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
"""
Split both images and labels.
Notes:
The directory structure assumed for the DOTA dataset:
- data_root
- images
- split
- labels
- split
and the output directory structure is:
- save_dir
- images
- split
- labels
- split
"""
im_dir = Path(save_dir) / "images" / split
im_dir.mkdir(parents=True, exist_ok=True)
lb_dir = Path(save_dir) / "labels" / split
lb_dir.mkdir(parents=True, exist_ok=True)
annos = load_yolo_dota(data_root, split=split)
for anno in tqdm(annos, total=len(annos), desc=split):
windows = get_windows(anno["ori_size"], crop_sizes, gaps)
window_objs = get_window_obj(anno, windows)
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
"""
Split train and val set of DOTA.
Notes:
The directory structure assumed for the DOTA dataset:
- data_root
- images
- train
- val
- labels
- train
- val
and the output directory structure is:
- save_dir
- images
- train
- val
- labels
- train
- val
"""
crop_sizes, gaps = [], []
for r in rates:
crop_sizes.append(int(crop_size / r))
gaps.append(int(gap / r))
for split in ["train", "val"]:
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
"""
Split test set of DOTA, labels are not included within this set.
Notes:
The directory structure assumed for the DOTA dataset:
- data_root
- images
- test
and the output directory structure is:
- save_dir
- images
- test
"""
crop_sizes, gaps = [], []
for r in rates:
crop_sizes.append(int(crop_size / r))
gaps.append(int(gap / r))
save_dir = Path(save_dir) / "images" / "test"
save_dir.mkdir(parents=True, exist_ok=True)
im_dir = Path(data_root) / "images" / "test"
assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
im_files = glob(str(im_dir / "*"))
for im_file in tqdm(im_files, total=len(im_files), desc="test"):
w, h = exif_size(Image.open(im_file))
windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
im = cv2.imread(im_file)
name = Path(im_file).stem
for window in windows:
x_start, y_start, x_stop, y_stop = window.tolist()
new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
patch_im = im[y_start:y_stop, x_start:x_stop]
cv2.imwrite(str(save_dir / f"{new_name}.jpg"), patch_im)
if __name__ == "__main__":
split_trainval(data_root="DOTAv2", save_dir="DOTAv2-split")
split_test(data_root="DOTAv2", save_dir="DOTAv2-split")
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import hashlib
import json
import os
import random
import subprocess
import time
import zipfile
from multiprocessing.pool import ThreadPool
from pathlib import Path
from tarfile import is_tarfile
import cv2
import numpy as np
from PIL import Image, ImageOps
from ultralytics.nn.autobackend import check_class_names
from ultralytics.utils import (
DATASETS_DIR,
LOGGER,
NUM_THREADS,
ROOT,
SETTINGS_YAML,
TQDM,
clean_url,
colorstr,
emojis,
yaml_load,
yaml_save,
)
from ultralytics.utils.checks import check_file, check_font, is_ascii
from ultralytics.utils.downloads import download, safe_download, unzip_file
from ultralytics.utils.ops import segments2boxes
HELP_URL = "See https://docs.ultralytics.com/datasets/detect for dataset formatting guidance."
IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"} # image suffixes
VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes
PIN_MEMORY = str(os.getenv("PIN_MEMORY", True)).lower() == "true" # global pin_memory for dataloaders
def img2label_paths(img_paths):
"""Define label paths as a function of image paths."""
sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings
return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
def get_hash(paths):
"""Returns a single hash value of a list of paths (files or dirs)."""
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.sha256(str(size).encode()) # hash sizes
h.update("".join(paths).encode()) # hash paths
return h.hexdigest() # return hash
def exif_size(img: Image.Image):
"""Returns exif-corrected PIL size."""
s = img.size # (width, height)
if img.format == "JPEG": # only support JPEG images
with contextlib.suppress(Exception):
exif = img.getexif()
if exif:
rotation = exif.get(274, None) # the EXIF key for the orientation tag is 274
if rotation in [6, 8]: # rotation 270 or 90
s = s[1], s[0]
return s
def verify_image(args):
"""Verify one image."""
(im_file, cls), prefix = args
# Number (found, corrupt), message
nf, nc, msg = 0, 0, ""
try:
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
shape = (shape[1], shape[0]) # hw
assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
if im.format.lower() in ("jpg", "jpeg"):
with open(im_file, "rb") as f:
f.seek(-2, 2)
if f.read() != b"\xff\xd9": # corrupt JPEG
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
nf = 1
except Exception as e:
nc = 1
msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
return (im_file, cls), nf, nc, msg
def verify_image_label(args):
"""Verify one image-label pair."""
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
# Number (missing, found, empty, corrupt), message, segments, keypoints
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, "", [], None
try:
# Verify images
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
shape = (shape[1], shape[0]) # hw
assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
if im.format.lower() in ("jpg", "jpeg"):
with open(im_file, "rb") as f:
f.seek(-2, 2)
if f.read() != b"\xff\xd9": # corrupt JPEG
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
# Verify labels
if os.path.isfile(lb_file):
nf = 1 # label found
with open(lb_file) as f:
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
if any(len(x) > 6 for x in lb) and (not keypoint): # is segment
classes = np.array([x[0] for x in lb], dtype=np.float32)
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...)
lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
lb = np.array(lb, dtype=np.float32)
nl = len(lb)
if nl:
if keypoint:
assert lb.shape[1] == (5 + nkpt * ndim), f"labels require {(5 + nkpt * ndim)} columns each"
points = lb[:, 5:].reshape(-1, ndim)[:, :2]
else:
assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected"
points = lb[:, 1:]
assert points.max() <= 1, f"non-normalized or out of bounds coordinates {points[points > 1]}"
assert lb.min() >= 0, f"negative label values {lb[lb < 0]}"
# All labels
max_cls = lb[:, 0].max() # max label count
assert max_cls <= num_cls, (
f"Label class {int(max_cls)} exceeds dataset class count {num_cls}. "
f"Possible class labels are 0-{num_cls - 1}"
)
_, i = np.unique(lb, axis=0, return_index=True)
if len(i) < nl: # duplicate row check
lb = lb[i] # remove duplicates
if segments:
segments = [segments[x] for x in i]
msg = f"{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed"
else:
ne = 1 # label empty
lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32)
else:
nm = 1 # label missing
lb = np.zeros((0, (5 + nkpt * ndim) if keypoints else 5), dtype=np.float32)
if keypoint:
keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
if ndim == 2:
kpt_mask = np.where((keypoints[..., 0] < 0) | (keypoints[..., 1] < 0), 0.0, 1.0).astype(np.float32)
keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3)
lb = lb[:, :5]
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
except Exception as e:
nc = 1
msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
return [None, None, None, None, None, nm, nf, ne, nc, msg]
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
"""
Convert a list of polygons to a binary mask of the specified image size.
Args:
imgsz (tuple): The size of the image as (height, width).
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
N is the number of polygons, and M is the number of points such that M % 2 = 0.
color (int, optional): The color value to fill in the polygons on the mask. Defaults to 1.
downsample_ratio (int, optional): Factor by which to downsample the mask. Defaults to 1.
Returns:
(np.ndarray): A binary mask of the specified image size with the polygons filled in.
"""
mask = np.zeros(imgsz, dtype=np.uint8)
polygons = np.asarray(polygons, dtype=np.int32)
polygons = polygons.reshape((polygons.shape[0], -1, 2))
cv2.fillPoly(mask, polygons, color=color)
nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)
# Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1
return cv2.resize(mask, (nw, nh))
def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
"""
Convert a list of polygons to a set of binary masks of the specified image size.
Args:
imgsz (tuple): The size of the image as (height, width).
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
N is the number of polygons, and M is the number of points such that M % 2 = 0.
color (int): The color value to fill in the polygons on the masks.
downsample_ratio (int, optional): Factor by which to downsample each mask. Defaults to 1.
Returns:
(np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
"""
return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons])
def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
"""Return a (640, 640) overlap mask."""
masks = np.zeros(
(imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio),
dtype=np.int32 if len(segments) > 255 else np.uint8,
)
areas = []
ms = []
for si in range(len(segments)):
mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1)
ms.append(mask)
areas.append(mask.sum())
areas = np.asarray(areas)
index = np.argsort(-areas)
ms = np.array(ms)[index]
for i in range(len(segments)):
mask = ms[i] * (i + 1)
masks = masks + mask
masks = np.clip(masks, a_min=0, a_max=i + 1)
return masks, index
def find_dataset_yaml(path: Path) -> Path:
"""
Find and return the YAML file associated with a Detect, Segment or Pose dataset.
This function searches for a YAML file at the root level of the provided directory first, and if not found, it
performs a recursive search. It prefers YAML files that have the same stem as the provided path. An AssertionError
is raised if no YAML file is found or if multiple YAML files are found.
Args:
path (Path): The directory path to search for the YAML file.
Returns:
(Path): The path of the found YAML file.
"""
files = list(path.glob("*.yaml")) or list(path.rglob("*.yaml")) # try root level first and then recursive
assert files, f"No YAML file found in '{path.resolve()}'"
if len(files) > 1:
files = [f for f in files if f.stem == path.stem] # prefer *.yaml files that match
assert len(files) == 1, f"Expected 1 YAML file in '{path.resolve()}', but found {len(files)}.\n{files}"
return files[0]
def check_det_dataset(dataset, autodownload=True):
"""
Download, verify, and/or unzip a dataset if not found locally.
This function checks the availability of a specified dataset, and if not found, it has the option to download and
unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
resolves paths related to the dataset.
Args:
dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
Returns:
(dict): Parsed dataset information and paths.
"""
file = check_file(dataset)
# Download (optional)
extract_dir = ""
if zipfile.is_zipfile(file) or is_tarfile(file):
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
file = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = file.parent, False
# Read YAML
data = yaml_load(file, append_filename=True) # dictionary
# Checks
for k in "train", "val":
if k not in data:
if k != "val" or "validation" not in data:
raise SyntaxError(
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")
)
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data["val"] = data.pop("validation") # replace 'validation' key with 'val' key
if "names" not in data and "nc" not in data:
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
if "names" in data and "nc" in data and len(data["names"]) != data["nc"]:
raise SyntaxError(emojis(f"{dataset} 'names' length {len(data['names'])} and 'nc: {data['nc']}' must match."))
if "names" not in data:
data["names"] = [f"class_{i}" for i in range(data["nc"])]
else:
data["nc"] = len(data["names"])
data["names"] = check_class_names(data["names"])
# Resolve paths
path = Path(extract_dir or data.get("path") or Path(data.get("yaml_file", "")).parent) # dataset root
if not path.is_absolute():
path = (DATASETS_DIR / path).resolve()
# Set paths
data["path"] = path # download scripts
for k in "train", "val", "test":
if data.get(k): # prepend path
if isinstance(data[k], str):
x = (path / data[k]).resolve()
if not x.exists() and data[k].startswith("../"):
x = (path / data[k][3:]).resolve()
data[k] = str(x)
else:
data[k] = [str((path / x).resolve()) for x in data[k]]
# Parse YAML
val, s = (data.get(x) for x in ("val", "download"))
if val:
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val):
name = clean_url(dataset) # dataset name with URL auth stripped
m = f"\nDataset '{name}' images not found ⚠️, missing path '{[x for x in val if not x.exists()][0]}'"
if s and autodownload:
LOGGER.warning(m)
else:
m += f"\nNote dataset download directory is '{DATASETS_DIR}'. You can update this in '{SETTINGS_YAML}'"
raise FileNotFoundError(m)
t = time.time()
r = None # success
if s.startswith("http") and s.endswith(".zip"): # URL
safe_download(url=s, dir=DATASETS_DIR, delete=True)
elif s.startswith("bash "): # bash script
LOGGER.info(f"Running {s} ...")
r = os.system(s)
else: # python script
exec(s, {"yaml": data})
dt = f"({round(time.time() - t, 1)}s)"
s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
LOGGER.info(f"Dataset download {s}\n")
check_font("Arial.ttf" if is_ascii(data["names"]) else "Arial.Unicode.ttf") # download fonts
return data # dictionary
def check_cls_dataset(dataset, split=""):
"""
Checks a classification dataset such as Imagenet.
This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.
Args:
dataset (str | Path): The name of the dataset.
split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''.
Returns:
(dict): A dictionary containing the following keys:
- 'train' (Path): The directory path containing the training set of the dataset.
- 'val' (Path): The directory path containing the validation set of the dataset.
- 'test' (Path): The directory path containing the test set of the dataset.
- 'nc' (int): The number of classes in the dataset.
- 'names' (dict): A dictionary of class names in the dataset.
"""
# Download (optional if dataset=https://file.zip is passed directly)
if str(dataset).startswith(("http:/", "https:/")):
dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False)
elif Path(dataset).suffix in (".zip", ".tar", ".gz"):
file = check_file(dataset)
dataset = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
dataset = Path(dataset)
data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
if not data_dir.is_dir():
LOGGER.warning(f"\nDataset not found ⚠️, missing path {data_dir}, attempting download...")
t = time.time()
if str(dataset) == "imagenet":
subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True)
else:
url = f"https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip"
download(url, dir=data_dir.parent)
s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
LOGGER.info(s)
train_set = data_dir / "train"
val_set = (
data_dir / "val"
if (data_dir / "val").exists()
else data_dir / "validation"
if (data_dir / "validation").exists()
else None
) # data/test or data/val
test_set = data_dir / "test" if (data_dir / "test").exists() else None # data/val or data/test
if split == "val" and not val_set:
LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.")
elif split == "test" and not test_set:
LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.")
nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()]) # number of classes
names = [x.name for x in (data_dir / "train").iterdir() if x.is_dir()] # class names list
names = dict(enumerate(sorted(names)))
# Print to console
for k, v in {"train": train_set, "val": val_set, "test": test_set}.items():
prefix = f'{colorstr(f"{k}:")} {v}...'
if v is None:
LOGGER.info(prefix)
else:
files = [path for path in v.rglob("*.*") if path.suffix[1:].lower() in IMG_FORMATS]
nf = len(files) # number of files
nd = len({file.parent for file in files}) # number of directories
if nf == 0:
if k == "train":
raise FileNotFoundError(emojis(f"{dataset} '{k}:' no training images found ❌ "))
else:
LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: WARNING ⚠️ no images found")
elif nd != nc:
LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: ERROR ❌️ requires {nc} classes, not {nd}")
else:
LOGGER.info(f"{prefix} found {nf} images in {nd} classes ✅ ")
return {"train": train_set, "val": val_set, "test": test_set, "nc": nc, "names": names}
class HUBDatasetStats:
"""
A class for generating HUB dataset JSON and `-hub` dataset directory.
Args:
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
Example:
Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
```python
from ultralytics.data.utils import HUBDatasetStats
stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset
stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset
stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset
stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify') # classification dataset
stats.get_json(save=True)
stats.process_images()
```
"""
def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
"""Initialize class."""
path = Path(path).resolve()
LOGGER.info(f"Starting HUB dataset checks for {path}....")
self.task = task # detect, segment, pose, classify
if self.task == "classify":
unzip_dir = unzip_file(path)
data = check_cls_dataset(unzip_dir)
data["path"] = unzip_dir
else: # detect, segment, pose
_, data_dir, yaml_path = self._unzip(Path(path))
try:
# Load YAML with checks
data = yaml_load(yaml_path)
data["path"] = "" # strip path since YAML should be in dataset root for all HUB datasets
yaml_save(yaml_path, data)
data = check_det_dataset(yaml_path, autodownload) # dict
data["path"] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
except Exception as e:
raise Exception("error/HUB/dataset_stats/init") from e
self.hub_dir = Path(f'{data["path"]}-hub')
self.im_dir = self.hub_dir / "images"
self.stats = {"nc": len(data["names"]), "names": list(data["names"].values())} # statistics dictionary
self.data = data
@staticmethod
def _unzip(path):
"""Unzip data.zip."""
if not str(path).endswith(".zip"): # path is data.yaml
return False, None, path
unzip_dir = unzip_file(path, path=path.parent)
assert unzip_dir.is_dir(), (
f"Error unzipping {path}, {unzip_dir} not found. " f"path/to/abc.zip MUST unzip to path/to/abc/"
)
return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path
def _hub_ops(self, f):
"""Saves a compressed image for HUB previews."""
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
def get_json(self, save=False, verbose=False):
"""Return dataset JSON for Ultralytics HUB."""
def _round(labels):
"""Update labels to integer class and 4 decimal place floats."""
if self.task == "detect":
coordinates = labels["bboxes"]
elif self.task == "segment":
coordinates = [x.flatten() for x in labels["segments"]]
elif self.task == "pose":
n = labels["keypoints"].shape[0]
coordinates = np.concatenate((labels["bboxes"], labels["keypoints"].reshape(n, -1)), 1)
else:
raise ValueError("Undefined dataset task.")
zipped = zip(labels["cls"], coordinates)
return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped]
for split in "train", "val", "test":
self.stats[split] = None # predefine
path = self.data.get(split)
# Check split
if path is None: # no split
continue
files = [f for f in Path(path).rglob("*.*") if f.suffix[1:].lower() in IMG_FORMATS] # image files in split
if not files: # no images
continue
# Get dataset statistics
if self.task == "classify":
from torchvision.datasets import ImageFolder
dataset = ImageFolder(self.data[split])
x = np.zeros(len(dataset.classes)).astype(int)
for im in dataset.imgs:
x[im[1]] += 1
self.stats[split] = {
"instance_stats": {"total": len(dataset), "per_class": x.tolist()},
"image_stats": {"total": len(dataset), "unlabelled": 0, "per_class": x.tolist()},
"labels": [{Path(k).name: v} for k, v in dataset.imgs],
}
else:
from ultralytics.data import YOLODataset
dataset = YOLODataset(img_path=self.data[split], data=self.data, task=self.task)
x = np.array(
[
np.bincount(label["cls"].astype(int).flatten(), minlength=self.data["nc"])
for label in TQDM(dataset.labels, total=len(dataset), desc="Statistics")
]
) # shape(128x80)
self.stats[split] = {
"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
"image_stats": {
"total": len(dataset),
"unlabelled": int(np.all(x == 0, 1).sum()),
"per_class": (x > 0).sum(0).tolist(),
},
"labels": [{Path(k).name: _round(v)} for k, v in zip(dataset.im_files, dataset.labels)],
}
# Save, print and return
if save:
self.hub_dir.mkdir(parents=True, exist_ok=True) # makes dataset-hub/
stats_path = self.hub_dir / "stats.json"
LOGGER.info(f"Saving {stats_path.resolve()}...")
with open(stats_path, "w") as f:
json.dump(self.stats, f) # save stats.json
if verbose:
LOGGER.info(json.dumps(self.stats, indent=2, sort_keys=False))
return self.stats
def process_images(self):
"""Compress images for Ultralytics HUB."""
from ultralytics.data import YOLODataset # ClassificationDataset
self.im_dir.mkdir(parents=True, exist_ok=True) # makes dataset-hub/images/
for split in "train", "val", "test":
if self.data.get(split) is None:
continue
dataset = YOLODataset(img_path=self.data[split], data=self.data)
with ThreadPool(NUM_THREADS) as pool:
for _ in TQDM(pool.imap(self._hub_ops, dataset.im_files), total=len(dataset), desc=f"{split} images"):
pass
LOGGER.info(f"Done. All images saved to {self.im_dir}")
return self.im_dir
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
"""
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
resized.
Args:
f (str): The path to the input image file.
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
quality (int, optional): The image compression quality as a percentage. Default is 50%.
Example:
```python
from pathlib import Path
from ultralytics.data.utils import compress_one_image
for f in Path('path/to/dataset').rglob('*.jpg'):
compress_one_image(f)
```
"""
try: # use PIL
im = Image.open(f)
r = max_dim / max(im.height, im.width) # ratio
if r < 1.0: # image too large
im = im.resize((int(im.width * r), int(im.height * r)))
im.save(f_new or f, "JPEG", quality=quality, optimize=True) # save
except Exception as e: # use OpenCV
LOGGER.info(f"WARNING ⚠️ HUB ops PIL failure {f}: {e}")
im = cv2.imread(f)
im_height, im_width = im.shape[:2]
r = max_dim / max(im_height, im_width) # ratio
if r < 1.0: # image too large
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
cv2.imwrite(str(f_new or f), im)
def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
"""
Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
Args:
path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
Example:
```python
from ultralytics.data.utils import autosplit
autosplit()
```
"""
path = Path(path) # images dir
files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
n = len(files) # number of files
random.seed(0) # for reproducibility
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt"] # 3 txt files
for x in txt:
if (path.parent / x).exists():
(path.parent / x).unlink() # remove existing
LOGGER.info(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only)
for i, img in TQDM(zip(indices, files), total=n):
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
with open(path.parent / txt[i], "a") as f:
f.write(f"./{img.relative_to(path.parent).as_posix()}" + "\n") # add image to txt file
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
Format | `format=argument` | Model
--- | --- | ---
PyTorch | - | yolov8n.pt
TorchScript | `torchscript` | yolov8n.torchscript
ONNX | `onnx` | yolov8n.onnx
OpenVINO | `openvino` | yolov8n_openvino_model/
TensorRT | `engine` | yolov8n.engine
CoreML | `coreml` | yolov8n.mlpackage
TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/
TensorFlow GraphDef | `pb` | yolov8n.pb
TensorFlow Lite | `tflite` | yolov8n.tflite
TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite
TensorFlow.js | `tfjs` | yolov8n_web_model/
PaddlePaddle | `paddle` | yolov8n_paddle_model/
NCNN | `ncnn` | yolov8n_ncnn_model/
Requirements:
$ pip install "ultralytics[export]"
Python:
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model.export(format='onnx')
CLI:
$ yolo mode=export model=yolov8n.pt format=onnx
Inference:
$ yolo predict model=yolov8n.pt # PyTorch
yolov8n.torchscript # TorchScript
yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
yolov8n_openvino_model # OpenVINO
yolov8n.engine # TensorRT
yolov8n.mlpackage # CoreML (macOS-only)
yolov8n_saved_model # TensorFlow SavedModel
yolov8n.pb # TensorFlow GraphDef
yolov8n.tflite # TensorFlow Lite
yolov8n_edgetpu.tflite # TensorFlow Edge TPU
yolov8n_paddle_model # PaddlePaddle
yolov8n_ncnn_model # NCNN
TensorFlow.js:
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
$ npm install
$ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model
$ npm start
"""
import json
import os
import shutil
import subprocess
import time
import warnings
from copy import deepcopy
from datetime import datetime
from pathlib import Path
import numpy as np
import torch
from ultralytics.cfg import get_cfg
from ultralytics.data.dataset import YOLODataset
from ultralytics.data.utils import check_det_dataset
from ultralytics.nn.autobackend import check_class_names, default_class_names
from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder, v10Detect
from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel
from ultralytics.utils import (
ARM64,
DEFAULT_CFG,
LINUX,
LOGGER,
MACOS,
ROOT,
WINDOWS,
__version__,
callbacks,
colorstr,
get_default_args,
yaml_save,
)
from ultralytics.utils.checks import PYTHON_VERSION, check_imgsz, check_is_path_safe, check_requirements, check_version
from ultralytics.utils.downloads import attempt_download_asset, get_github_assets
from ultralytics.utils.files import file_size, spaces_in_path
from ultralytics.utils.ops import Profile
from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device, smart_inference_mode
def export_formats():
"""YOLOv8 export formats."""
import pandas
x = [
["PyTorch", "-", ".pt", True, True],
["TorchScript", "torchscript", ".torchscript", True, True],
["ONNX", "onnx", ".onnx", True, True],
["OpenVINO", "openvino", "_openvino_model", True, False],
["TensorRT", "engine", ".engine", False, True],
["CoreML", "coreml", ".mlpackage", True, False],
["TensorFlow SavedModel", "saved_model", "_saved_model", True, True],
["TensorFlow GraphDef", "pb", ".pb", True, True],
["TensorFlow Lite", "tflite", ".tflite", True, False],
["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False],
["TensorFlow.js", "tfjs", "_web_model", True, False],
["PaddlePaddle", "paddle", "_paddle_model", True, True],
["NCNN", "ncnn", "_ncnn_model", True, True],
]
return pandas.DataFrame(x, columns=["Format", "Argument", "Suffix", "CPU", "GPU"])
def gd_outputs(gd):
"""TensorFlow GraphDef model output node names."""
name_list, input_list = [], []
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
name_list.append(node.name)
input_list.extend(node.input)
return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))
def try_export(inner_func):
"""YOLOv8 export decorator, i..e @try_export."""
inner_args = get_default_args(inner_func)
def outer_func(*args, **kwargs):
"""Export a model."""
prefix = inner_args["prefix"]
try:
with Profile() as dt:
f, model = inner_func(*args, **kwargs)
LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)")
return f, model
except Exception as e:
LOGGER.info(f"{prefix} export failure ❌ {dt.t:.1f}s: {e}")
raise e
return outer_func
class Exporter:
"""
A class for exporting a model.
Attributes:
args (SimpleNamespace): Configuration for the exporter.
callbacks (list, optional): List of callback functions. Defaults to None.
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""
Initializes the Exporter class.
Args:
cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
overrides (dict, optional): Configuration overrides. Defaults to None.
_callbacks (dict, optional): Dictionary of callback functions. Defaults to None.
"""
self.args = get_cfg(cfg, overrides)
if self.args.format.lower() in ("coreml", "mlmodel"): # fix attempt for protobuf<3.20.x errors
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # must run before TensorBoard callback
self.callbacks = _callbacks or callbacks.get_default_callbacks()
callbacks.add_integration_callbacks(self)
@smart_inference_mode()
def __call__(self, model=None):
"""Returns list of exported files/dirs after running callbacks."""
self.run_callbacks("on_export_start")
t = time.time()
fmt = self.args.format.lower() # to lowercase
if fmt in ("tensorrt", "trt"): # 'engine' aliases
fmt = "engine"
if fmt in ("mlmodel", "mlpackage", "mlprogram", "apple", "ios", "coreml"): # 'coreml' aliases
fmt = "coreml"
fmts = tuple(export_formats()["Argument"][1:]) # available export formats
flags = [x == fmt for x in fmts]
if sum(flags) != 1:
raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn = flags # export booleans
# Device
if fmt == "engine" and self.args.device is None:
LOGGER.warning("WARNING ⚠️ TensorRT requires GPU export, automatically assigning device=0")
self.args.device = "0"
self.device = select_device("cpu" if self.args.device is None else self.args.device)
# Checks
if not hasattr(model, "names"):
model.names = default_class_names()
model.names = check_class_names(model.names)
if self.args.half and onnx and self.device.type == "cpu":
LOGGER.warning("WARNING ⚠️ half=True only compatible with GPU export, i.e. use device=0")
self.args.half = False
assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one."
self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size
if self.args.optimize:
assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False"
assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'"
if edgetpu and not LINUX:
raise SystemError("Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler/")
if isinstance(model, WorldModel):
LOGGER.warning(
"WARNING ⚠️ YOLOWorld (original version) export is not supported to any format.\n"
"WARNING ⚠️ YOLOWorldv2 models (i.e. 'yolov8s-worldv2.pt') only support export to "
"(torchscript, onnx, openvino, engine, coreml) formats. "
"See https://docs.ultralytics.com/models/yolo-world for details."
)
# Input
im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
file = Path(
getattr(model, "pt_path", None) or getattr(model, "yaml_file", None) or model.yaml.get("yaml_file", "")
)
if file.suffix in {".yaml", ".yml"}:
file = Path(file.name)
# Update model
model = deepcopy(model).to(self.device)
for p in model.parameters():
p.requires_grad = False
model.eval()
model.float()
model = model.fuse()
for m in model.modules():
if isinstance(m, (Detect, RTDETRDecoder)): # includes all Detect subclasses like Segment, Pose, OBB
m.dynamic = self.args.dynamic
m.export = True
m.format = self.args.format
if isinstance(m, v10Detect):
m.max_det = self.args.max_det
elif isinstance(m, C2f) and not any((saved_model, pb, tflite, edgetpu, tfjs)):
# EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
m.forward = m.forward_split
y = None
for _ in range(2):
y = model(im) # dry runs
if self.args.half and onnx and self.device.type != "cpu":
im, model = im.half(), model.half() # to FP16
# Filter warnings
warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # suppress TracerWarning
warnings.filterwarnings("ignore", category=UserWarning) # suppress shape prim::Constant missing ONNX warning
warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress CoreML np.bool deprecation warning
# Assign
self.im = im
self.model = model
self.file = file
self.output_shape = (
tuple(y.shape)
if isinstance(y, torch.Tensor)
else tuple(tuple(x.shape if isinstance(x, torch.Tensor) else []) for x in y)
)
self.pretty_name = Path(self.model.yaml.get("yaml_file", self.file)).stem.replace("yolo", "YOLO")
data = model.args["data"] if hasattr(model, "args") and isinstance(model.args, dict) else ""
description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}'
self.metadata = {
"description": description,
"author": "Ultralytics",
"date": datetime.now().isoformat(),
"version": __version__,
"license": "AGPL-3.0 License (https://ultralytics.com/license)",
"docs": "https://docs.ultralytics.com",
"stride": int(max(model.stride)),
"task": model.task,
"batch": self.args.batch,
"imgsz": self.imgsz,
"names": model.names,
} # model metadata
if model.task == "pose":
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
LOGGER.info(
f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)'
)
# Exports
f = [""] * len(fmts) # exported filenames
if jit or ncnn: # TorchScript
f[0], _ = self.export_torchscript()
if engine: # TensorRT required before ONNX
f[1], _ = self.export_engine()
if onnx: # ONNX
f[2], _ = self.export_onnx()
if xml: # OpenVINO
f[3], _ = self.export_openvino()
if coreml: # CoreML
f[4], _ = self.export_coreml()
if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats
self.args.int8 |= edgetpu
f[5], keras_model = self.export_saved_model()
if pb or tfjs: # pb prerequisite to tfjs
f[6], _ = self.export_pb(keras_model=keras_model)
if tflite:
f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms)
if edgetpu:
f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite")
if tfjs:
f[9], _ = self.export_tfjs()
if paddle: # PaddlePaddle
f[10], _ = self.export_paddle()
if ncnn: # NCNN
f[11], _ = self.export_ncnn()
# Finish
f = [str(x) for x in f if x] # filter out '' and None
if any(f):
f = str(Path(f[-1]))
square = self.imgsz[0] == self.imgsz[1]
s = (
""
if square
else f"WARNING ⚠️ non-PyTorch val requires square images, 'imgsz={self.imgsz}' will not "
f"work. Use export 'imgsz={max(self.imgsz)}' if val is required."
)
imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(" ", "")
predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else ""
q = "int8" if self.args.int8 else "half" if self.args.half else "" # quantization
LOGGER.info(
f'\nExport complete ({time.time() - t:.1f}s)'
f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
f'\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}'
f'\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}'
f'\nVisualize: https://netron.app'
)
self.run_callbacks("on_export_end")
return f # return list of exported files/dirs
@try_export
def export_torchscript(self, prefix=colorstr("TorchScript:")):
"""YOLOv8 TorchScript model export."""
LOGGER.info(f"\n{prefix} starting export with torch {torch.__version__}...")
f = self.file.with_suffix(".torchscript")
ts = torch.jit.trace(self.model, self.im, strict=False)
extra_files = {"config.txt": json.dumps(self.metadata)} # torch._C.ExtraFilesMap()
if self.args.optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
LOGGER.info(f"{prefix} optimizing for mobile...")
from torch.utils.mobile_optimizer import optimize_for_mobile
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
else:
ts.save(str(f), _extra_files=extra_files)
return f, None
@try_export
def export_onnx(self, prefix=colorstr("ONNX:")):
"""YOLOv8 ONNX export."""
requirements = ["onnx>=1.12.0"]
if self.args.simplify:
requirements += ["onnxsim>=0.4.33", "onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime"]
if ARM64:
check_requirements("cmake") # 'cmake' is needed to build onnxsim on aarch64
check_requirements(requirements)
import onnx # noqa
opset_version = self.args.opset or get_latest_opset()
LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...")
f = str(self.file.with_suffix(".onnx"))
output_names = ["output0", "output1"] if isinstance(self.model, SegmentationModel) else ["output0"]
dynamic = self.args.dynamic
if dynamic:
dynamic = {"images": {0: "batch", 2: "height", 3: "width"}} # shape(1,3,640,640)
if isinstance(self.model, SegmentationModel):
dynamic["output0"] = {0: "batch", 2: "anchors"} # shape(1, 116, 8400)
dynamic["output1"] = {0: "batch", 2: "mask_height", 3: "mask_width"} # shape(1,32,160,160)
elif isinstance(self.model, DetectionModel):
dynamic["output0"] = {0: "batch", 2: "anchors"} # shape(1, 84, 8400)
torch.onnx.export(
self.model.cpu() if dynamic else self.model, # dynamic=True only compatible with cpu
self.im.cpu() if dynamic else self.im,
f,
verbose=False,
opset_version=opset_version,
do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
input_names=["images"],
output_names=output_names,
dynamic_axes=dynamic or None,
)
# Checks
model_onnx = onnx.load(f) # load onnx model
# onnx.checker.check_model(model_onnx) # check onnx model
# Simplify
if self.args.simplify:
try:
import onnxsim
LOGGER.info(f"{prefix} simplifying with onnxsim {onnxsim.__version__}...")
# subprocess.run(f'onnxsim "{f}" "{f}"', shell=True)
model_onnx, check = onnxsim.simplify(model_onnx)
assert check, "Simplified ONNX model could not be validated"
except Exception as e:
LOGGER.info(f"{prefix} simplifier failure: {e}")
# Metadata
for k, v in self.metadata.items():
meta = model_onnx.metadata_props.add()
meta.key, meta.value = k, str(v)
onnx.save(model_onnx, f)
return f, model_onnx
@try_export
def export_openvino(self, prefix=colorstr("OpenVINO:")):
"""YOLOv8 OpenVINO export."""
check_requirements("openvino>=2024.0.0") # requires openvino: https://pypi.org/project/openvino/
import openvino as ov
LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
assert TORCH_1_13, f"OpenVINO export requires torch>=1.13.0 but torch=={torch.__version__} is installed"
ov_model = ov.convert_model(
self.model.cpu(),
input=None if self.args.dynamic else [self.im.shape],
example_input=self.im,
)
def serialize(ov_model, file):
"""Set RT info, serialize and save metadata YAML."""
ov_model.set_rt_info("YOLOv8", ["model_info", "model_type"])
ov_model.set_rt_info(True, ["model_info", "reverse_input_channels"])
ov_model.set_rt_info(114, ["model_info", "pad_value"])
ov_model.set_rt_info([255.0], ["model_info", "scale_values"])
ov_model.set_rt_info(self.args.iou, ["model_info", "iou_threshold"])
ov_model.set_rt_info([v.replace(" ", "_") for v in self.model.names.values()], ["model_info", "labels"])
if self.model.task != "classify":
ov_model.set_rt_info("fit_to_window_letterbox", ["model_info", "resize_type"])
ov.runtime.save_model(ov_model, file, compress_to_fp16=self.args.half)
yaml_save(Path(file).parent / "metadata.yaml", self.metadata) # add metadata.yaml
if self.args.int8:
fq = str(self.file).replace(self.file.suffix, f"_int8_openvino_model{os.sep}")
fq_ov = str(Path(fq) / self.file.with_suffix(".xml").name)
if not self.args.data:
self.args.data = DEFAULT_CFG.data or "coco128.yaml"
LOGGER.warning(
f"{prefix} WARNING ⚠️ INT8 export requires a missing 'data' arg for calibration. "
f"Using default 'data={self.args.data}'."
)
check_requirements("nncf>=2.8.0")
import nncf
def transform_fn(data_item):
"""Quantization transform function."""
assert (
data_item["img"].dtype == torch.uint8
), "Input image must be uint8 for the quantization preprocessing"
im = data_item["img"].numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0 - 255 to 0.0 - 1.0
return np.expand_dims(im, 0) if im.ndim == 3 else im
# Generate calibration data for integer quantization
LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
data = check_det_dataset(self.args.data)
dataset = YOLODataset(data["val"], data=data, imgsz=self.imgsz[0], augment=False)
n = len(dataset)
if n < 300:
LOGGER.warning(f"{prefix} WARNING ⚠️ >300 images recommended for INT8 calibration, found {n} images.")
quantization_dataset = nncf.Dataset(dataset, transform_fn)
ignored_scope = None
if isinstance(self.model.model[-1], Detect):
# Includes all Detect subclasses like Segment, Pose, OBB, WorldDetect
head_module_name = ".".join(list(self.model.named_modules())[-1][0].split(".")[:2])
ignored_scope = nncf.IgnoredScope( # ignore operations
patterns=[
f".*{head_module_name}/.*/Add",
f".*{head_module_name}/.*/Sub*",
f".*{head_module_name}/.*/Mul*",
f".*{head_module_name}/.*/Div*",
f".*{head_module_name}\\.dfl.*",
],
types=["Sigmoid"],
)
quantized_ov_model = nncf.quantize(
ov_model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED, ignored_scope=ignored_scope
)
serialize(quantized_ov_model, fq_ov)
return fq, None
f = str(self.file).replace(self.file.suffix, f"_openvino_model{os.sep}")
f_ov = str(Path(f) / self.file.with_suffix(".xml").name)
serialize(ov_model, f_ov)
return f, None
@try_export
def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
"""YOLOv8 Paddle export."""
check_requirements(("paddlepaddle", "x2paddle"))
import x2paddle # noqa
from x2paddle.convert import pytorch2paddle # noqa
LOGGER.info(f"\n{prefix} starting export with X2Paddle {x2paddle.__version__}...")
f = str(self.file).replace(self.file.suffix, f"_paddle_model{os.sep}")
pytorch2paddle(module=self.model, save_dir=f, jit_type="trace", input_examples=[self.im]) # export
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
return f, None
@try_export
def export_ncnn(self, prefix=colorstr("NCNN:")):
"""
YOLOv8 NCNN export using PNNX https://github.com/pnnx/pnnx.
"""
check_requirements("ncnn")
import ncnn # noqa
LOGGER.info(f"\n{prefix} starting export with NCNN {ncnn.__version__}...")
f = Path(str(self.file).replace(self.file.suffix, f"_ncnn_model{os.sep}"))
f_ts = self.file.with_suffix(".torchscript")
name = Path("pnnx.exe" if WINDOWS else "pnnx") # PNNX filename
pnnx = name if name.is_file() else ROOT / name
if not pnnx.is_file():
LOGGER.warning(
f"{prefix} WARNING ⚠️ PNNX not found. Attempting to download binary file from "
"https://github.com/pnnx/pnnx/.\nNote PNNX Binary file must be placed in current working directory "
f"or in {ROOT}. See PNNX repo for full installation instructions."
)
system = "macos" if MACOS else "windows" if WINDOWS else "linux-aarch64" if ARM64 else "linux"
_, assets = get_github_assets(repo="pnnx/pnnx", retry=True)
if assets:
url = [x for x in assets if f"{system}.zip" in x][0]
else:
url = f"https://github.com/pnnx/pnnx/releases/download/20240226/pnnx-20240226-{system}.zip"
LOGGER.warning(f"{prefix} WARNING ⚠️ PNNX GitHub assets not found, using default {url}")
asset = attempt_download_asset(url, repo="pnnx/pnnx", release="latest")
if check_is_path_safe(Path.cwd(), asset): # avoid path traversal security vulnerability
unzip_dir = Path(asset).with_suffix("")
(unzip_dir / name).rename(pnnx) # move binary to ROOT
shutil.rmtree(unzip_dir) # delete unzip dir
Path(asset).unlink() # delete zip
pnnx.chmod(0o777) # set read, write, and execute permissions for everyone
ncnn_args = [
f'ncnnparam={f / "model.ncnn.param"}',
f'ncnnbin={f / "model.ncnn.bin"}',
f'ncnnpy={f / "model_ncnn.py"}',
]
pnnx_args = [
f'pnnxparam={f / "model.pnnx.param"}',
f'pnnxbin={f / "model.pnnx.bin"}',
f'pnnxpy={f / "model_pnnx.py"}',
f'pnnxonnx={f / "model.pnnx.onnx"}',
]
cmd = [
str(pnnx),
str(f_ts),
*ncnn_args,
*pnnx_args,
f"fp16={int(self.args.half)}",
f"device={self.device.type}",
f'inputshape="{[self.args.batch, 3, *self.imgsz]}"',
]
f.mkdir(exist_ok=True) # make ncnn_model directory
LOGGER.info(f"{prefix} running '{' '.join(cmd)}'")
subprocess.run(cmd, check=True)
# Remove debug files
pnnx_files = [x.split("=")[-1] for x in pnnx_args]
for f_debug in ("debug.bin", "debug.param", "debug2.bin", "debug2.param", *pnnx_files):
Path(f_debug).unlink(missing_ok=True)
yaml_save(f / "metadata.yaml", self.metadata) # add metadata.yaml
return str(f), None
@try_export
def export_coreml(self, prefix=colorstr("CoreML:")):
"""YOLOv8 CoreML export."""
mlmodel = self.args.format.lower() == "mlmodel" # legacy *.mlmodel export format requested
check_requirements("coremltools>=6.0,<=6.2" if mlmodel else "coremltools>=7.0")
import coremltools as ct # noqa
LOGGER.info(f"\n{prefix} starting export with coremltools {ct.__version__}...")
assert not WINDOWS, "CoreML export is not supported on Windows, please run on macOS or Linux."
f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage")
if f.is_dir():
shutil.rmtree(f)
bias = [0.0, 0.0, 0.0]
scale = 1 / 255
classifier_config = None
if self.model.task == "classify":
classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None
model = self.model
elif self.model.task == "detect":
model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model
else:
if self.args.nms:
LOGGER.warning(f"{prefix} WARNING ⚠️ 'nms=True' is only available for Detect models like 'yolov8n.pt'.")
# TODO CoreML Segment and Pose model pipelining
model = self.model
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model
ct_model = ct.convert(
ts,
inputs=[ct.ImageType("image", shape=self.im.shape, scale=scale, bias=bias)],
classifier_config=classifier_config,
convert_to="neuralnetwork" if mlmodel else "mlprogram",
)
bits, mode = (8, "kmeans") if self.args.int8 else (16, "linear") if self.args.half else (32, None)
if bits < 32:
if "kmeans" in mode:
check_requirements("scikit-learn") # scikit-learn package required for k-means quantization
if mlmodel:
ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
elif bits == 8: # mlprogram already quantized to FP16
import coremltools.optimize.coreml as cto
op_config = cto.OpPalettizerConfig(mode="kmeans", nbits=bits, weight_threshold=512)
config = cto.OptimizationConfig(global_config=op_config)
ct_model = cto.palettize_weights(ct_model, config=config)
if self.args.nms and self.model.task == "detect":
if mlmodel:
# coremltools<=6.2 NMS export requires Python<3.11
check_version(PYTHON_VERSION, "<3.11", name="Python ", hard=True)
weights_dir = None
else:
ct_model.save(str(f)) # save otherwise weights_dir does not exist
weights_dir = str(f / "Data/com.apple.CoreML/weights")
ct_model = self._pipeline_coreml(ct_model, weights_dir=weights_dir)
m = self.metadata # metadata dict
ct_model.short_description = m.pop("description")
ct_model.author = m.pop("author")
ct_model.license = m.pop("license")
ct_model.version = m.pop("version")
ct_model.user_defined_metadata.update({k: str(v) for k, v in m.items()})
try:
ct_model.save(str(f)) # save *.mlpackage
except Exception as e:
LOGGER.warning(
f"{prefix} WARNING ⚠️ CoreML export to *.mlpackage failed ({e}), reverting to *.mlmodel export. "
f"Known coremltools Python 3.11 and Windows bugs https://github.com/apple/coremltools/issues/1928."
)
f = f.with_suffix(".mlmodel")
ct_model.save(str(f))
return f, ct_model
@try_export
def export_engine(self, prefix=colorstr("TensorRT:")):
"""YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
f_onnx, _ = self.export_onnx() # run before trt import https://github.com/ultralytics/ultralytics/issues/7016
try:
import tensorrt as trt # noqa
except ImportError:
if LINUX:
check_requirements("nvidia-tensorrt", cmds="-U --index-url https://pypi.ngc.nvidia.com")
import tensorrt as trt # noqa
check_version(trt.__version__, "7.0.0", hard=True) # require tensorrt>=7.0.0
self.args.simplify = True
LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...")
assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}"
f = self.file.with_suffix(".engine") # TensorRT engine file
logger = trt.Logger(trt.Logger.INFO)
if self.args.verbose:
logger.min_severity = trt.Logger.Severity.VERBOSE
builder = trt.Builder(logger)
config = builder.create_builder_config()
config.max_workspace_size = self.args.workspace * 1 << 30
# config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice
flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(flag)
parser = trt.OnnxParser(network, logger)
if not parser.parse_from_file(f_onnx):
raise RuntimeError(f"failed to load ONNX file: {f_onnx}")
inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]
for inp in inputs:
LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')
for out in outputs:
LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
if self.args.dynamic:
shape = self.im.shape
if shape[0] <= 1:
LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
profile = builder.create_optimization_profile()
for inp in inputs:
profile.set_shape(inp.name, (1, *shape[1:]), (max(1, shape[0] // 2), *shape[1:]), shape)
config.add_optimization_profile(profile)
LOGGER.info(
f"{prefix} building FP{16 if builder.platform_has_fast_fp16 and self.args.half else 32} engine as {f}"
)
if builder.platform_has_fast_fp16 and self.args.half:
config.set_flag(trt.BuilderFlag.FP16)
del self.model
torch.cuda.empty_cache()
# Write file
with builder.build_engine(network, config) as engine, open(f, "wb") as t:
# Metadata
meta = json.dumps(self.metadata)
t.write(len(meta).to_bytes(4, byteorder="little", signed=True))
t.write(meta.encode())
# Model
t.write(engine.serialize())
return f, None
@try_export
def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
"""YOLOv8 TensorFlow SavedModel export."""
cuda = torch.cuda.is_available()
try:
import tensorflow as tf # noqa
except ImportError:
suffix = "-macos" if MACOS else "-aarch64" if ARM64 else "" if cuda else "-cpu"
version = "" if ARM64 else "<=2.13.1"
check_requirements(f"tensorflow{suffix}{version}")
import tensorflow as tf # noqa
if ARM64:
check_requirements("cmake") # 'cmake' is needed to build onnxsim on aarch64
check_requirements(
(
"onnx>=1.12.0",
"onnx2tf>=1.15.4,<=1.17.5",
"sng4onnx>=1.0.1",
"onnxsim>=0.4.33",
"onnx_graphsurgeon>=0.3.26",
"tflite_support",
"flatbuffers>=23.5.26,<100", # update old 'flatbuffers' included inside tensorflow package
"onnxruntime-gpu" if cuda else "onnxruntime",
),
cmds="--extra-index-url https://pypi.ngc.nvidia.com",
) # onnx_graphsurgeon only on NVIDIA
LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
check_version(
tf.__version__,
"<=2.13.1",
name="tensorflow",
verbose=True,
msg="https://github.com/ultralytics/ultralytics/issues/5161",
)
import onnx2tf
f = Path(str(self.file).replace(self.file.suffix, "_saved_model"))
if f.is_dir():
shutil.rmtree(f) # delete output folder
# Pre-download calibration file to fix https://github.com/PINTO0309/onnx2tf/issues/545
onnx2tf_file = Path("calibration_image_sample_data_20x128x128x3_float32.npy")
if not onnx2tf_file.exists():
attempt_download_asset(f"{onnx2tf_file}.zip", unzip=True, delete=True)
# Export to ONNX
self.args.simplify = True
f_onnx, _ = self.export_onnx()
# Export to TF
tmp_file = f / "tmp_tflite_int8_calibration_images.npy" # int8 calibration images file
np_data = None
if self.args.int8:
verbosity = "info"
if self.args.data:
# Generate calibration data for integer quantization
LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
data = check_det_dataset(self.args.data)
dataset = YOLODataset(data["val"], data=data, imgsz=self.imgsz[0], augment=False)
images = []
for i, batch in enumerate(dataset):
if i >= 100: # maximum number of calibration images
break
im = batch["img"].permute(1, 2, 0)[None] # list to nparray, CHW to BHWC
images.append(im)
f.mkdir()
images = torch.cat(images, 0).float()
# mean = images.view(-1, 3).mean(0) # imagenet mean [123.675, 116.28, 103.53]
# std = images.view(-1, 3).std(0) # imagenet std [58.395, 57.12, 57.375]
np.save(str(tmp_file), images.numpy()) # BHWC
np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]]
else:
verbosity = "error"
LOGGER.info(f"{prefix} starting TFLite export with onnx2tf {onnx2tf.__version__}...")
onnx2tf.convert(
input_onnx_file_path=f_onnx,
output_folder_path=str(f),
not_use_onnxsim=True,
verbosity=verbosity,
output_integer_quantized_tflite=self.args.int8,
quant_type="per-tensor", # "per-tensor" (faster) or "per-channel" (slower but more accurate)
custom_input_op_name_np_data_path=np_data,
)
yaml_save(f / "metadata.yaml", self.metadata) # add metadata.yaml
# Remove/rename TFLite models
if self.args.int8:
tmp_file.unlink(missing_ok=True)
for file in f.rglob("*_dynamic_range_quant.tflite"):
file.rename(file.with_name(file.stem.replace("_dynamic_range_quant", "_int8") + file.suffix))
for file in f.rglob("*_integer_quant_with_int16_act.tflite"):
file.unlink() # delete extra fp16 activation TFLite files
# Add TFLite metadata
for file in f.rglob("*.tflite"):
f.unlink() if "quant_with_int16_act.tflite" in str(f) else self._add_tflite_metadata(file)
return str(f), tf.saved_model.load(f, tags=None, options=None) # load saved_model as Keras model
@try_export
def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")):
"""YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow."""
import tensorflow as tf # noqa
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa
LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
f = self.file.with_suffix(".pb")
m = tf.function(lambda x: keras_model(x)) # full model
m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
frozen_func = convert_variables_to_constants_v2(m)
frozen_func.graph.as_graph_def()
tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
return f, None
@try_export
def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")):
"""YOLOv8 TensorFlow Lite export."""
import tensorflow as tf # noqa
LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
saved_model = Path(str(self.file).replace(self.file.suffix, "_saved_model"))
if self.args.int8:
f = saved_model / f"{self.file.stem}_int8.tflite" # fp32 in/out
elif self.args.half:
f = saved_model / f"{self.file.stem}_float16.tflite" # fp32 in/out
else:
f = saved_model / f"{self.file.stem}_float32.tflite"
return str(f), None
@try_export
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
"""YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
LOGGER.warning(f"{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185")
cmd = "edgetpu_compiler --version"
help_url = "https://coral.ai/docs/edgetpu/compiler/"
assert LINUX, f"export only supported on Linux. See {help_url}"
if subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True).returncode != 0:
LOGGER.info(f"\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}")
sudo = subprocess.run("sudo --version >/dev/null", shell=True).returncode == 0 # sudo installed on system
for c in (
"curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -",
'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | '
"sudo tee /etc/apt/sources.list.d/coral-edgetpu.list",
"sudo apt-get update",
"sudo apt-get install edgetpu-compiler",
):
subprocess.run(c if sudo else c.replace("sudo ", ""), shell=True, check=True)
ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model
cmd = f'edgetpu_compiler -s -d -k 10 --out_dir "{Path(f).parent}" "{tflite_model}"'
LOGGER.info(f"{prefix} running '{cmd}'")
subprocess.run(cmd, shell=True)
self._add_tflite_metadata(f)
return f, None
@try_export
def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
"""YOLOv8 TensorFlow.js export."""
check_requirements("tensorflowjs")
if ARM64:
# Fix error: `np.object` was a deprecated alias for the builtin `object` when exporting to TF.js on ARM64
check_requirements("numpy==1.23.5")
import tensorflow as tf
import tensorflowjs as tfjs # noqa
LOGGER.info(f"\n{prefix} starting export with tensorflowjs {tfjs.__version__}...")
f = str(self.file).replace(self.file.suffix, "_web_model") # js dir
f_pb = str(self.file.with_suffix(".pb")) # *.pb path
gd = tf.Graph().as_graph_def() # TF GraphDef
with open(f_pb, "rb") as file:
gd.ParseFromString(file.read())
outputs = ",".join(gd_outputs(gd))
LOGGER.info(f"\n{prefix} output node names: {outputs}")
quantization = "--quantize_float16" if self.args.half else "--quantize_uint8" if self.args.int8 else ""
with spaces_in_path(f_pb) as fpb_, spaces_in_path(f) as f_: # exporter can not handle spaces in path
cmd = (
"tensorflowjs_converter "
f'--input_format=tf_frozen_model {quantization} --output_node_names={outputs} "{fpb_}" "{f_}"'
)
LOGGER.info(f"{prefix} running '{cmd}'")
subprocess.run(cmd, shell=True)
if " " in f:
LOGGER.warning(f"{prefix} WARNING ⚠️ your model may not work correctly with spaces in path '{f}'.")
# f_json = Path(f) / 'model.json' # *.json path
# with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order
# subst = re.sub(
# r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
# r'"Identity.?.?": {"name": "Identity.?.?"}, '
# r'"Identity.?.?": {"name": "Identity.?.?"}, '
# r'"Identity.?.?": {"name": "Identity.?.?"}}}',
# r'{"outputs": {"Identity": {"name": "Identity"}, '
# r'"Identity_1": {"name": "Identity_1"}, '
# r'"Identity_2": {"name": "Identity_2"}, '
# r'"Identity_3": {"name": "Identity_3"}}}',
# f_json.read_text(),
# )
# j.write(subst)
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
return f, None
def _add_tflite_metadata(self, file):
"""Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
from tflite_support import flatbuffers # noqa
from tflite_support import metadata as _metadata # noqa
from tflite_support import metadata_schema_py_generated as _metadata_fb # noqa
# Create model info
model_meta = _metadata_fb.ModelMetadataT()
model_meta.name = self.metadata["description"]
model_meta.version = self.metadata["version"]
model_meta.author = self.metadata["author"]
model_meta.license = self.metadata["license"]
# Label file
tmp_file = Path(file).parent / "temp_meta.txt"
with open(tmp_file, "w") as f:
f.write(str(self.metadata))
label_file = _metadata_fb.AssociatedFileT()
label_file.name = tmp_file.name
label_file.type = _metadata_fb.AssociatedFileType.TENSOR_AXIS_LABELS
# Create input info
input_meta = _metadata_fb.TensorMetadataT()
input_meta.name = "image"
input_meta.description = "Input image to be detected."
input_meta.content = _metadata_fb.ContentT()
input_meta.content.contentProperties = _metadata_fb.ImagePropertiesT()
input_meta.content.contentProperties.colorSpace = _metadata_fb.ColorSpaceType.RGB
input_meta.content.contentPropertiesType = _metadata_fb.ContentProperties.ImageProperties
# Create output info
output1 = _metadata_fb.TensorMetadataT()
output1.name = "output"
output1.description = "Coordinates of detected objects, class labels, and confidence score"
output1.associatedFiles = [label_file]
if self.model.task == "segment":
output2 = _metadata_fb.TensorMetadataT()
output2.name = "output"
output2.description = "Mask protos"
output2.associatedFiles = [label_file]
# Create subgraph info
subgraph = _metadata_fb.SubGraphMetadataT()
subgraph.inputTensorMetadata = [input_meta]
subgraph.outputTensorMetadata = [output1, output2] if self.model.task == "segment" else [output1]
model_meta.subgraphMetadata = [subgraph]
b = flatbuffers.Builder(0)
b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
metadata_buf = b.Output()
populator = _metadata.MetadataPopulator.with_model_file(str(file))
populator.load_metadata_buffer(metadata_buf)
populator.load_associated_files([str(tmp_file)])
populator.populate()
tmp_file.unlink()
def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
"""YOLOv8 CoreML pipeline."""
import coremltools as ct # noqa
LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...")
_, _, h, w = list(self.im.shape) # BCHW
# Output shapes
spec = model.get_spec()
out0, out1 = iter(spec.description.output)
if MACOS:
from PIL import Image
img = Image.new("RGB", (w, h)) # w=192, h=320
out = model.predict({"image": img})
out0_shape = out[out0.name].shape # (3780, 80)
out1_shape = out[out1.name].shape # (3780, 4)
else: # linux and windows can not run model.predict(), get sizes from PyTorch model output y
out0_shape = self.output_shape[2], self.output_shape[1] - 4 # (3780, 80)
out1_shape = self.output_shape[2], 4 # (3780, 4)
# Checks
names = self.metadata["names"]
nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
_, nc = out0_shape # number of anchors, number of classes
# _, nc = out0.type.multiArrayType.shape
assert len(names) == nc, f"{len(names)} names found for nc={nc}" # check
# Define output shapes (missing)
out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80)
out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4)
# spec.neuralNetwork.preprocessing[0].featureName = '0'
# Flexible input shapes
# from coremltools.models.neural_network import flexible_shape_utils
# s = [] # shapes
# s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192))
# s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384)) # (height, width)
# flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s)
# r = flexible_shape_utils.NeuralNetworkImageSizeRange() # shape ranges
# r.add_height_range((192, 640))
# r.add_width_range((192, 640))
# flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r)
# Print
# print(spec.description)
# Model from spec
model = ct.models.MLModel(spec, weights_dir=weights_dir)
# 3. Create NMS protobuf
nms_spec = ct.proto.Model_pb2.Model()
nms_spec.specificationVersion = 5
for i in range(2):
decoder_output = model._spec.description.output[i].SerializeToString()
nms_spec.description.input.add()
nms_spec.description.input[i].ParseFromString(decoder_output)
nms_spec.description.output.add()
nms_spec.description.output[i].ParseFromString(decoder_output)
nms_spec.description.output[0].name = "confidence"
nms_spec.description.output[1].name = "coordinates"
output_sizes = [nc, 4]
for i in range(2):
ma_type = nms_spec.description.output[i].type.multiArrayType
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[0].lowerBound = 0
ma_type.shapeRange.sizeRanges[0].upperBound = -1
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
del ma_type.shape[:]
nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = out0.name # 1x507x80
nms.coordinatesInputFeatureName = out1.name # 1x507x4
nms.confidenceOutputFeatureName = "confidence"
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"
nms.iouThreshold = 0.45
nms.confidenceThreshold = 0.25
nms.pickTop.perClass = True
nms.stringClassLabels.vector.extend(names.values())
nms_model = ct.models.MLModel(nms_spec)
# 4. Pipeline models together
pipeline = ct.models.pipeline.Pipeline(
input_features=[
("image", ct.models.datatypes.Array(3, ny, nx)),
("iouThreshold", ct.models.datatypes.Double()),
("confidenceThreshold", ct.models.datatypes.Double()),
],
output_features=["confidence", "coordinates"],
)
pipeline.add_model(model)
pipeline.add_model(nms_model)
# Correct datatypes
pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString())
pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())
# Update metadata
pipeline.spec.specificationVersion = 5
pipeline.spec.description.metadata.userDefined.update(
{"IoU threshold": str(nms.iouThreshold), "Confidence threshold": str(nms.confidenceThreshold)}
)
# Save the model
model = ct.models.MLModel(pipeline.spec, weights_dir=weights_dir)
model.input_description["image"] = "Input image"
model.input_description["iouThreshold"] = f"(optional) IoU threshold override (default: {nms.iouThreshold})"
model.input_description["confidenceThreshold"] = (
f"(optional) Confidence threshold override (default: {nms.confidenceThreshold})"
)
model.output_description["confidence"] = 'Boxes × Class confidence (see user-defined metadata "classes")'
model.output_description["coordinates"] = "Boxes × [x, y, width, height] (relative to image size)"
LOGGER.info(f"{prefix} pipeline success")
return model
def add_callback(self, event: str, callback):
"""Appends the given callback."""
self.callbacks[event].append(callback)
def run_callbacks(self, event: str):
"""Execute all callbacks for a given event."""
for callback in self.callbacks.get(event, []):
callback(self)
class IOSDetectModel(torch.nn.Module):
"""Wrap an Ultralytics YOLO model for Apple iOS CoreML export."""
def __init__(self, model, im):
"""Initialize the IOSDetectModel class with a YOLO model and example image."""
super().__init__()
_, _, h, w = im.shape # batch, channel, height, width
self.model = model
self.nc = len(model.names) # number of classes
if w == h:
self.normalize = 1.0 / w # scalar
else:
self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h]) # broadcast (slower, smaller)
def forward(self, x):
"""Normalize predictions of object detection model with input size-dependent factors."""
xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
# Ultralytics YOLO 🚀, AGPL-3.0 license
import inspect
import sys
from pathlib import Path
from typing import Union
import numpy as np
import torch
from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir
from ultralytics.hub.utils import HUB_WEB_ROOT
from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load
from ultralytics.utils import ASSETS, DEFAULT_CFG_DICT, LOGGER, RANK, SETTINGS, callbacks, checks, emojis, yaml_load
from huggingface_hub import PyTorchModelHubMixin
class Model(nn.Module, PyTorchModelHubMixin):
"""
A base class for implementing YOLO models, unifying APIs across different model types.
This class provides a common interface for various operations related to YOLO models, such as training,
validation, prediction, exporting, and benchmarking. It handles different types of models, including those
loaded from local files, Ultralytics HUB, or Triton Server. The class is designed to be flexible and
extendable for different tasks and model configurations.
Args:
model (Union[str, Path], optional): Path or name of the model to load or create. This can be a local file
path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'.
task (Any, optional): The task type associated with the YOLO model. This can be used to specify the model's
application domain, such as object detection, segmentation, etc. Defaults to None.
verbose (bool, optional): If True, enables verbose output during the model's operations. Defaults to False.
Attributes:
callbacks (dict): A dictionary of callback functions for various events during model operations.
predictor (BasePredictor): The predictor object used for making predictions.
model (nn.Module): The underlying PyTorch model.
trainer (BaseTrainer): The trainer object used for training the model.
ckpt (dict): The checkpoint data if the model is loaded from a *.pt file.
cfg (str): The configuration of the model if loaded from a *.yaml file.
ckpt_path (str): The path to the checkpoint file.
overrides (dict): A dictionary of overrides for model configuration.
metrics (dict): The latest training/validation metrics.
session (HUBTrainingSession): The Ultralytics HUB session, if applicable.
task (str): The type of task the model is intended for.
model_name (str): The name of the model.
Methods:
__call__: Alias for the predict method, enabling the model instance to be callable.
_new: Initializes a new model based on a configuration file.
_load: Loads a model from a checkpoint file.
_check_is_pytorch_model: Ensures that the model is a PyTorch model.
reset_weights: Resets the model's weights to their initial state.
load: Loads model weights from a specified file.
save: Saves the current state of the model to a file.
info: Logs or returns information about the model.
fuse: Fuses Conv2d and BatchNorm2d layers for optimized inference.
predict: Performs object detection predictions.
track: Performs object tracking.
val: Validates the model on a dataset.
benchmark: Benchmarks the model on various export formats.
export: Exports the model to different formats.
train: Trains the model on a dataset.
tune: Performs hyperparameter tuning.
_apply: Applies a function to the model's tensors.
add_callback: Adds a callback function for an event.
clear_callback: Clears all callbacks for an event.
reset_callbacks: Resets all callbacks to their default functions.
_get_hub_session: Retrieves or creates an Ultralytics HUB session.
is_triton_model: Checks if a model is a Triton Server model.
is_hub_model: Checks if a model is an Ultralytics HUB model.
_reset_ckpt_args: Resets checkpoint arguments when loading a PyTorch model.
_smart_load: Loads the appropriate module based on the model task.
task_map: Provides a mapping from model tasks to corresponding classes.
Raises:
FileNotFoundError: If the specified model file does not exist or is inaccessible.
ValueError: If the model file or configuration is invalid or unsupported.
ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
TypeError: If the model is not a PyTorch model when required.
AttributeError: If required attributes or methods are not implemented or available.
NotImplementedError: If a specific model task or mode is not supported.
"""
def __init__(
self,
model: Union[str, Path] = "yolov8n.pt",
task: str = None,
verbose: bool = False,
) -> None:
"""
Initializes a new instance of the YOLO model class.
This constructor sets up the model based on the provided model path or name. It handles various types of model
sources, including local files, Ultralytics HUB models, and Triton Server models. The method initializes several
important attributes of the model and prepares it for operations like training, prediction, or export.
Args:
model (Union[str, Path], optional): The path or model file to load or create. This can be a local
file path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'.
task (Any, optional): The task type associated with the YOLO model, specifying its application domain.
Defaults to None.
verbose (bool, optional): If True, enables verbose output during the model's initialization and subsequent
operations. Defaults to False.
Raises:
FileNotFoundError: If the specified model file does not exist or is inaccessible.
ValueError: If the model file or configuration is invalid or unsupported.
ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
"""
super().__init__()
self.callbacks = callbacks.get_default_callbacks()
self.predictor = None # reuse predictor
self.model = None # model object
self.trainer = None # trainer object
self.ckpt = None # if loaded from *.pt
self.cfg = None # if loaded from *.yaml
self.ckpt_path = None
self.overrides = {} # overrides for trainer object
self.metrics = None # validation/training metrics
self.session = None # HUB session
self.task = task # task type
model = str(model).strip()
# Check if Ultralytics HUB model from https://hub.ultralytics.com
if self.is_hub_model(model):
# Fetch model from HUB
checks.check_requirements("hub-sdk>=0.0.6")
self.session = self._get_hub_session(model)
model = self.session.model_file
# Check if Triton Server model
elif self.is_triton_model(model):
self.model_name = self.model = model
self.task = task
return
# Load or create new YOLO model
if Path(model).suffix in (".yaml", ".yml"):
self._new(model, task=task, verbose=verbose)
else:
self._load(model, task=task)
def __call__(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
**kwargs,
) -> list:
"""
An alias for the predict method, enabling the model instance to be callable.
This method simplifies the process of making predictions by allowing the model instance to be called directly
with the required arguments for prediction.
Args:
source (str | Path | int | PIL.Image | np.ndarray, optional): The source of the image for making
predictions. Accepts various types, including file paths, URLs, PIL images, and numpy arrays.
Defaults to None.
stream (bool, optional): If True, treats the input source as a continuous stream for predictions.
Defaults to False.
**kwargs (any): Additional keyword arguments for configuring the prediction process.
Returns:
(List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
"""
return self.predict(source, stream, **kwargs)
@staticmethod
def _get_hub_session(model: str):
"""Creates a session for Hub Training."""
from ultralytics.hub.session import HUBTrainingSession
session = HUBTrainingSession(model)
return session if session.client.authenticated else None
@staticmethod
def is_triton_model(model: str) -> bool:
"""Is model a Triton Server URL string, i.e. <scheme>://<netloc>/<endpoint>/<task_name>"""
from urllib.parse import urlsplit
url = urlsplit(model)
return url.netloc and url.path and url.scheme in {"http", "grpc"}
@staticmethod
def is_hub_model(model: str) -> bool:
"""Check if the provided model is a HUB model."""
return any(
(
model.startswith(f"{HUB_WEB_ROOT}/models/"), # i.e. https://hub.ultralytics.com/models/MODEL_ID
[len(x) for x in model.split("_")] == [42, 20], # APIKEY_MODEL
len(model) == 20 and not Path(model).exists() and all(x not in model for x in "./\\"), # MODEL
)
)
def _new(self, cfg: str, task=None, model=None, verbose=False) -> None:
"""
Initializes a new model and infers the task type from the model definitions.
Args:
cfg (str): model configuration file
task (str | None): model task
model (BaseModel): Customized model.
verbose (bool): display model info on load
"""
cfg_dict = yaml_model_load(cfg)
self.cfg = cfg
self.task = task or guess_model_task(cfg_dict)
self.model = (model or self._smart_load("model"))(cfg_dict, verbose=verbose and RANK == -1) # build model
self.overrides["model"] = self.cfg
self.overrides["task"] = self.task
# Below added to allow export from YAMLs
self.model.args = {**DEFAULT_CFG_DICT, **self.overrides} # combine default and model args (prefer model args)
self.model.task = self.task
self.model_name = cfg
def _load(self, weights: str, task=None) -> None:
"""
Initializes a new model and infers the task type from the model head.
Args:
weights (str): model checkpoint to be loaded
task (str | None): model task
"""
if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
weights = checks.check_file(weights) # automatically download and return local filename
weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolov8n -> yolov8n.pt
if Path(weights).suffix == ".pt":
self.model, self.ckpt = attempt_load_one_weight(weights)
self.task = self.model.args["task"]
self.overrides = self.model.args = self._reset_ckpt_args(self.model.args)
self.ckpt_path = self.model.pt_path
else:
weights = checks.check_file(weights) # runs in all cases, not redundant with above call
self.model, self.ckpt = weights, None
self.task = task or guess_model_task(weights)
self.ckpt_path = weights
self.overrides["model"] = weights
self.overrides["task"] = self.task
self.model_name = weights
def _check_is_pytorch_model(self) -> None:
"""Raises TypeError is model is not a PyTorch model."""
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt"
pt_module = isinstance(self.model, nn.Module)
if not (pt_module or pt_str):
raise TypeError(
f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. "
f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported "
f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, "
f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device "
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'"
)
def reset_weights(self) -> "Model":
"""
Resets the model parameters to randomly initialized values, effectively discarding all training information.
This method iterates through all modules in the model and resets their parameters if they have a
'reset_parameters' method. It also ensures that all parameters have 'requires_grad' set to True, enabling them
to be updated during training.
Returns:
self (ultralytics.engine.model.Model): The instance of the class with reset weights.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
for m in self.model.modules():
if hasattr(m, "reset_parameters"):
m.reset_parameters()
for p in self.model.parameters():
p.requires_grad = True
return self
def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model":
"""
Loads parameters from the specified weights file into the model.
This method supports loading weights from a file or directly from a weights object. It matches parameters by
name and shape and transfers them to the model.
Args:
weights (str | Path): Path to the weights file or a weights object. Defaults to 'yolov8n.pt'.
Returns:
self (ultralytics.engine.model.Model): The instance of the class with loaded weights.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
if isinstance(weights, (str, Path)):
weights, self.ckpt = attempt_load_one_weight(weights)
self.model.load(weights)
return self
def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) -> None:
"""
Saves the current model state to a file.
This method exports the model's checkpoint (ckpt) to the specified filename.
Args:
filename (str | Path): The name of the file to save the model to. Defaults to 'saved_model.pt'.
use_dill (bool): Whether to try using dill for serialization if available. Defaults to True.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
from ultralytics import __version__
from datetime import datetime
updates = {
"date": datetime.now().isoformat(),
"version": __version__,
"license": "AGPL-3.0 License (https://ultralytics.com/license)",
"docs": "https://docs.ultralytics.com",
}
torch.save({**self.ckpt, **updates}, filename, use_dill=use_dill)
def info(self, detailed: bool = False, verbose: bool = True):
"""
Logs or returns model information.
This method provides an overview or detailed information about the model, depending on the arguments passed.
It can control the verbosity of the output.
Args:
detailed (bool): If True, shows detailed information about the model. Defaults to False.
verbose (bool): If True, prints the information. If False, returns the information. Defaults to True.
Returns:
(list): Various types of information about the model, depending on the 'detailed' and 'verbose' parameters.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
return self.model.info(detailed=detailed, verbose=verbose)
def fuse(self):
"""
Fuses Conv2d and BatchNorm2d layers in the model.
This method optimizes the model by fusing Conv2d and BatchNorm2d layers, which can improve inference speed.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
self.model.fuse()
def embed(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
**kwargs,
) -> list:
"""
Generates image embeddings based on the provided source.
This method is a wrapper around the 'predict()' method, focusing on generating embeddings from an image source.
It allows customization of the embedding process through various keyword arguments.
Args:
source (str | int | PIL.Image | np.ndarray): The source of the image for generating embeddings.
The source can be a file path, URL, PIL image, numpy array, etc. Defaults to None.
stream (bool): If True, predictions are streamed. Defaults to False.
**kwargs (any): Additional keyword arguments for configuring the embedding process.
Returns:
(List[torch.Tensor]): A list containing the image embeddings.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
if not kwargs.get("embed"):
kwargs["embed"] = [len(self.model.model) - 2] # embed second-to-last layer if no indices passed
return self.predict(source, stream, **kwargs)
def predict(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
predictor=None,
**kwargs,
) -> list:
"""
Performs predictions on the given image source using the YOLO model.
This method facilitates the prediction process, allowing various configurations through keyword arguments.
It supports predictions with custom predictors or the default predictor method. The method handles different
types of image sources and can operate in a streaming mode. It also provides support for SAM-type models
through 'prompts'.
The method sets up a new predictor if not already present and updates its arguments with each call.
It also issues a warning and uses default assets if the 'source' is not provided. The method determines if it
is being called from the command line interface and adjusts its behavior accordingly, including setting defaults
for confidence threshold and saving behavior.
Args:
source (str | int | PIL.Image | np.ndarray, optional): The source of the image for making predictions.
Accepts various types, including file paths, URLs, PIL images, and numpy arrays. Defaults to ASSETS.
stream (bool, optional): Treats the input source as a continuous stream for predictions. Defaults to False.
predictor (BasePredictor, optional): An instance of a custom predictor class for making predictions.
If None, the method uses a default predictor. Defaults to None.
**kwargs (any): Additional keyword arguments for configuring the prediction process. These arguments allow
for further customization of the prediction behavior.
Returns:
(List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
Raises:
AttributeError: If the predictor is not properly set up.
"""
if source is None:
source = ASSETS
LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using 'source={source}'.")
is_cli = (sys.argv[0].endswith("yolo") or sys.argv[0].endswith("ultralytics")) and any(
x in sys.argv for x in ("predict", "track", "mode=predict", "mode=track")
)
custom = {"conf": 0.25, "batch": 1, "save": is_cli, "mode": "predict"} # method defaults
args = {**self.overrides, **custom, **kwargs} # highest priority args on the right
prompts = args.pop("prompts", None) # for SAM-type models
if not self.predictor:
self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks)
self.predictor.setup_model(model=self.model, verbose=is_cli)
else: # only update args if predictor is already setup
self.predictor.args = get_cfg(self.predictor.args, args)
if "project" in args or "name" in args:
self.predictor.save_dir = get_save_dir(self.predictor.args)
if prompts and hasattr(self.predictor, "set_prompts"): # for SAM-type models
self.predictor.set_prompts(prompts)
return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream)
def track(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
persist: bool = False,
**kwargs,
) -> list:
"""
Conducts object tracking on the specified input source using the registered trackers.
This method performs object tracking using the model's predictors and optionally registered trackers. It is
capable of handling different types of input sources such as file paths or video streams. The method supports
customization of the tracking process through various keyword arguments. It registers trackers if they are not
already present and optionally persists them based on the 'persist' flag.
The method sets a default confidence threshold specifically for ByteTrack-based tracking, which requires low
confidence predictions as input. The tracking mode is explicitly set in the keyword arguments.
Args:
source (str, optional): The input source for object tracking. It can be a file path, URL, or video stream.
stream (bool, optional): Treats the input source as a continuous video stream. Defaults to False.
persist (bool, optional): Persists the trackers between different calls to this method. Defaults to False.
**kwargs (any): Additional keyword arguments for configuring the tracking process. These arguments allow
for further customization of the tracking behavior.
Returns:
(List[ultralytics.engine.results.Results]): A list of tracking results, encapsulated in the Results class.
Raises:
AttributeError: If the predictor does not have registered trackers.
"""
if not hasattr(self.predictor, "trackers"):
from ultralytics.trackers import register_tracker
register_tracker(self, persist)
kwargs["conf"] = kwargs.get("conf") or 0.1 # ByteTrack-based method needs low confidence predictions as input
kwargs["batch"] = kwargs.get("batch") or 1 # batch-size 1 for tracking in videos
kwargs["mode"] = "track"
return self.predict(source=source, stream=stream, **kwargs)
def val(
self,
validator=None,
**kwargs,
):
"""
Validates the model using a specified dataset and validation configuration.
This method facilitates the model validation process, allowing for a range of customization through various
settings and configurations. It supports validation with a custom validator or the default validation approach.
The method combines default configurations, method-specific defaults, and user-provided arguments to configure
the validation process. After validation, it updates the model's metrics with the results obtained from the
validator.
The method supports various arguments that allow customization of the validation process. For a comprehensive
list of all configurable options, users should refer to the 'configuration' section in the documentation.
Args:
validator (BaseValidator, optional): An instance of a custom validator class for validating the model. If
None, the method uses a default validator. Defaults to None.
**kwargs (any): Arbitrary keyword arguments representing the validation configuration. These arguments are
used to customize various aspects of the validation process.
Returns:
(dict): Validation metrics obtained from the validation process.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
custom = {"rect": True} # method defaults
args = {**self.overrides, **custom, **kwargs, "mode": "val"} # highest priority args on the right
validator = (validator or self._smart_load("validator"))(args=args, _callbacks=self.callbacks)
validator(model=self.model)
self.metrics = validator.metrics
return validator.metrics
def benchmark(
self,
**kwargs,
):
"""
Benchmarks the model across various export formats to evaluate performance.
This method assesses the model's performance in different export formats, such as ONNX, TorchScript, etc.
It uses the 'benchmark' function from the ultralytics.utils.benchmarks module. The benchmarking is configured
using a combination of default configuration values, model-specific arguments, method-specific defaults, and
any additional user-provided keyword arguments.
The method supports various arguments that allow customization of the benchmarking process, such as dataset
choice, image size, precision modes, device selection, and verbosity. For a comprehensive list of all
configurable options, users should refer to the 'configuration' section in the documentation.
Args:
**kwargs (any): Arbitrary keyword arguments to customize the benchmarking process. These are combined with
default configurations, model-specific arguments, and method defaults.
Returns:
(dict): A dictionary containing the results of the benchmarking process.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
from ultralytics.utils.benchmarks import benchmark
custom = {"verbose": False} # method defaults
args = {**DEFAULT_CFG_DICT, **self.model.args, **custom, **kwargs, "mode": "benchmark"}
return benchmark(
model=self,
data=kwargs.get("data"), # if no 'data' argument passed set data=None for default datasets
imgsz=args["imgsz"],
half=args["half"],
int8=args["int8"],
device=args["device"],
verbose=kwargs.get("verbose"),
)
def export(
self,
**kwargs,
):
"""
Exports the model to a different format suitable for deployment.
This method facilitates the export of the model to various formats (e.g., ONNX, TorchScript) for deployment
purposes. It uses the 'Exporter' class for the export process, combining model-specific overrides, method
defaults, and any additional arguments provided. The combined arguments are used to configure export settings.
The method supports a wide range of arguments to customize the export process. For a comprehensive list of all
possible arguments, refer to the 'configuration' section in the documentation.
Args:
**kwargs (any): Arbitrary keyword arguments to customize the export process. These are combined with the
model's overrides and method defaults.
Returns:
(object): The exported model in the specified format, or an object related to the export process.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
from .exporter import Exporter
custom = {"imgsz": self.model.args["imgsz"], "batch": 1, "data": None, "verbose": False} # method defaults
args = {**self.overrides, **custom, **kwargs, "mode": "export"} # highest priority args on the right
return Exporter(overrides=args, _callbacks=self.callbacks)(model=self.model)
def train(
self,
trainer=None,
**kwargs,
):
"""
Trains the model using the specified dataset and training configuration.
This method facilitates model training with a range of customizable settings and configurations. It supports
training with a custom trainer or the default training approach defined in the method. The method handles
different scenarios, such as resuming training from a checkpoint, integrating with Ultralytics HUB, and
updating model and configuration after training.
When using Ultralytics HUB, if the session already has a loaded model, the method prioritizes HUB training
arguments and issues a warning if local arguments are provided. It checks for pip updates and combines default
configurations, method-specific defaults, and user-provided arguments to configure the training process. After
training, it updates the model and its configurations, and optionally attaches metrics.
Args:
trainer (BaseTrainer, optional): An instance of a custom trainer class for training the model. If None, the
method uses a default trainer. Defaults to None.
**kwargs (any): Arbitrary keyword arguments representing the training configuration. These arguments are
used to customize various aspects of the training process.
Returns:
(dict | None): Training metrics if available and training is successful; otherwise, None.
Raises:
AssertionError: If the model is not a PyTorch model.
PermissionError: If there is a permission issue with the HUB session.
ModuleNotFoundError: If the HUB SDK is not installed.
"""
self._check_is_pytorch_model()
if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model
if any(kwargs):
LOGGER.warning("WARNING ⚠️ using HUB training arguments, ignoring local training arguments.")
kwargs = self.session.train_args # overwrite kwargs
checks.check_pip_update_available()
overrides = yaml_load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
custom = {"data": DEFAULT_CFG_DICT["data"] or TASK2DATA[self.task]} # method defaults
args = {**overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
if args.get("resume"):
args["resume"] = self.ckpt_path
self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks)
if not args.get("resume"): # manually set model only if not resuming
self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
self.model = self.trainer.model
if SETTINGS["hub"] is True and not self.session:
# Create a model in HUB
try:
self.session = self._get_hub_session(self.model_name)
if self.session:
self.session.create_model(args)
# Check model was created
if not getattr(self.session.model, "id", None):
self.session = None
except (PermissionError, ModuleNotFoundError):
# Ignore PermissionError and ModuleNotFoundError which indicates hub-sdk not installed
pass
self.trainer.hub_session = self.session # attach optional HUB session
self.trainer.train()
# Update model and cfg after training
if RANK in (-1, 0):
ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
self.model, _ = attempt_load_one_weight(ckpt)
self.overrides = self.model.args
self.metrics = getattr(self.trainer.validator, "metrics", None) # TODO: no metrics returned by DDP
return self.metrics
def tune(
self,
use_ray=False,
iterations=10,
*args,
**kwargs,
):
"""
Conducts hyperparameter tuning for the model, with an option to use Ray Tune.
This method supports two modes of hyperparameter tuning: using Ray Tune or a custom tuning method.
When Ray Tune is enabled, it leverages the 'run_ray_tune' function from the ultralytics.utils.tuner module.
Otherwise, it uses the internal 'Tuner' class for tuning. The method combines default, overridden, and
custom arguments to configure the tuning process.
Args:
use_ray (bool): If True, uses Ray Tune for hyperparameter tuning. Defaults to False.
iterations (int): The number of tuning iterations to perform. Defaults to 10.
*args (list): Variable length argument list for additional arguments.
**kwargs (any): Arbitrary keyword arguments. These are combined with the model's overrides and defaults.
Returns:
(dict): A dictionary containing the results of the hyperparameter search.
Raises:
AssertionError: If the model is not a PyTorch model.
"""
self._check_is_pytorch_model()
if use_ray:
from ultralytics.utils.tuner import run_ray_tune
return run_ray_tune(self, max_samples=iterations, *args, **kwargs)
else:
from .tuner import Tuner
custom = {} # method defaults
args = {**self.overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
def _apply(self, fn) -> "Model":
"""Apply to(), cpu(), cuda(), half(), float() to model tensors that are not parameters or registered buffers."""
self._check_is_pytorch_model()
self = super()._apply(fn) # noqa
self.predictor = None # reset predictor as device may have changed
self.overrides["device"] = self.device # was str(self.device) i.e. device(type='cuda', index=0) -> 'cuda:0'
return self
@property
def names(self) -> list:
"""
Retrieves the class names associated with the loaded model.
This property returns the class names if they are defined in the model. It checks the class names for validity
using the 'check_class_names' function from the ultralytics.nn.autobackend module.
Returns:
(list | None): The class names of the model if available, otherwise None.
"""
from ultralytics.nn.autobackend import check_class_names
return check_class_names(self.model.names) if hasattr(self.model, "names") else None
@property
def device(self) -> torch.device:
"""
Retrieves the device on which the model's parameters are allocated.
This property is used to determine whether the model's parameters are on CPU or GPU. It only applies to models
that are instances of nn.Module.
Returns:
(torch.device | None): The device (CPU/GPU) of the model if it is a PyTorch model, otherwise None.
"""
return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None
@property
def transforms(self):
"""
Retrieves the transformations applied to the input data of the loaded model.
This property returns the transformations if they are defined in the model.
Returns:
(object | None): The transform object of the model if available, otherwise None.
"""
return self.model.transforms if hasattr(self.model, "transforms") else None
def add_callback(self, event: str, func) -> None:
"""
Adds a callback function for a specified event.
This method allows the user to register a custom callback function that is triggered on a specific event during
model training or inference.
Args:
event (str): The name of the event to attach the callback to.
func (callable): The callback function to be registered.
Raises:
ValueError: If the event name is not recognized.
"""
self.callbacks[event].append(func)
def clear_callback(self, event: str) -> None:
"""
Clears all callback functions registered for a specified event.
This method removes all custom and default callback functions associated with the given event.
Args:
event (str): The name of the event for which to clear the callbacks.
Raises:
ValueError: If the event name is not recognized.
"""
self.callbacks[event] = []
def reset_callbacks(self) -> None:
"""
Resets all callbacks to their default functions.
This method reinstates the default callback functions for all events, removing any custom callbacks that were
added previously.
"""
for event in callbacks.default_callbacks.keys():
self.callbacks[event] = [callbacks.default_callbacks[event][0]]
@staticmethod
def _reset_ckpt_args(args: dict) -> dict:
"""Reset arguments when loading a PyTorch model."""
include = {"imgsz", "data", "task", "single_cls"} # only remember these arguments when loading a PyTorch model
return {k: v for k, v in args.items() if k in include}
# def __getattr__(self, attr):
# """Raises error if object has no requested attribute."""
# name = self.__class__.__name__
# raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
def _smart_load(self, key: str):
"""Load model/trainer/validator/predictor."""
try:
return self.task_map[self.task][key]
except Exception as e:
name = self.__class__.__name__
mode = inspect.stack()[1][3] # get the function name.
raise NotImplementedError(
emojis(f"WARNING ⚠️ '{name}' model does not support '{mode}' mode for '{self.task}' task yet.")
) from e
@property
def task_map(self) -> dict:
"""
Map head to model, trainer, validator, and predictor classes.
Returns:
task_map (dict): The map of model task to mode classes.
"""
raise NotImplementedError("Please provide task map for your model!")
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc.
Usage - sources:
$ yolo mode=predict model=yolov8n.pt source=0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
list.streams # list of streams
'path/*.jpg' # glob
'https://youtu.be/LNwODJXcvt4' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP stream
Usage - formats:
$ yolo mode=predict model=yolov8n.pt # PyTorch
yolov8n.torchscript # TorchScript
yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
yolov8n_openvino_model # OpenVINO
yolov8n.engine # TensorRT
yolov8n.mlpackage # CoreML (macOS-only)
yolov8n_saved_model # TensorFlow SavedModel
yolov8n.pb # TensorFlow GraphDef
yolov8n.tflite # TensorFlow Lite
yolov8n_edgetpu.tflite # TensorFlow Edge TPU
yolov8n_paddle_model # PaddlePaddle
yolov8n_ncnn_model # NCNN
"""
import platform
import re
import threading
from pathlib import Path
import cv2
import numpy as np
import torch
from ultralytics.cfg import get_cfg, get_save_dir
from ultralytics.data import load_inference_source
from ultralytics.data.augment import LetterBox, classify_transforms
from ultralytics.nn.autobackend import AutoBackend
from ultralytics.utils import DEFAULT_CFG, LOGGER, MACOS, WINDOWS, callbacks, colorstr, ops
from ultralytics.utils.checks import check_imgsz, check_imshow
from ultralytics.utils.files import increment_path
from ultralytics.utils.torch_utils import select_device, smart_inference_mode
STREAM_WARNING = """
WARNING ⚠️ inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
Example:
results = model(source=..., stream=True) # generator of Results objects
for r in results:
boxes = r.boxes # Boxes object for bbox outputs
masks = r.masks # Masks object for segment masks outputs
probs = r.probs # Class probabilities for classification outputs
"""
class BasePredictor:
"""
BasePredictor.
A base class for creating predictors.
Attributes:
args (SimpleNamespace): Configuration for the predictor.
save_dir (Path): Directory to save results.
done_warmup (bool): Whether the predictor has finished setup.
model (nn.Module): Model used for prediction.
data (dict): Data configuration.
device (torch.device): Device used for prediction.
dataset (Dataset): Dataset used for prediction.
vid_writer (dict): Dictionary of {save_path: video_writer, ...} writer for saving video output.
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""
Initializes the BasePredictor class.
Args:
cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
overrides (dict, optional): Configuration overrides. Defaults to None.
"""
self.args = get_cfg(cfg, overrides)
self.save_dir = get_save_dir(self.args)
if self.args.conf is None:
self.args.conf = 0.25 # default conf=0.25
self.done_warmup = False
if self.args.show:
self.args.show = check_imshow(warn=True)
# Usable if setup is done
self.model = None
self.data = self.args.data # data_dict
self.imgsz = None
self.device = None
self.dataset = None
self.vid_writer = {} # dict of {save_path: video_writer, ...}
self.plotted_img = None
self.source_type = None
self.seen = 0
self.windows = []
self.batch = None
self.results = None
self.transforms = None
self.callbacks = _callbacks or callbacks.get_default_callbacks()
self.txt_path = None
self._lock = threading.Lock() # for automatic thread-safe inference
callbacks.add_integration_callbacks(self)
def preprocess(self, im):
"""
Prepares input image before inference.
Args:
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
"""
not_tensor = not isinstance(im, torch.Tensor)
if not_tensor:
im = np.stack(self.pre_transform(im))
im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
im = np.ascontiguousarray(im) # contiguous
im = torch.from_numpy(im)
im = im.to(self.device)
im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32
if not_tensor:
im /= 255 # 0 - 255 to 0.0 - 1.0
return im
def inference(self, im, *args, **kwargs):
"""Runs inference on a given image using the specified model and arguments."""
visualize = (
increment_path(self.save_dir / Path(self.batch[0][0]).stem, mkdir=True)
if self.args.visualize and (not self.source_type.tensor)
else False
)
return self.model(im, augment=self.args.augment, visualize=visualize, embed=self.args.embed, *args, **kwargs)
def pre_transform(self, im):
"""
Pre-transform input image before inference.
Args:
im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
Returns:
(list): A list of transformed images.
"""
same_shapes = len({x.shape for x in im}) == 1
letterbox = LetterBox(self.imgsz, auto=same_shapes and self.model.pt, stride=self.model.stride)
return [letterbox(image=x) for x in im]
def postprocess(self, preds, img, orig_imgs):
"""Post-processes predictions for an image and returns them."""
return preds
def __call__(self, source=None, model=None, stream=False, *args, **kwargs):
"""Performs inference on an image or stream."""
self.stream = stream
if stream:
return self.stream_inference(source, model, *args, **kwargs)
else:
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one
def predict_cli(self, source=None, model=None):
"""
Method used for CLI prediction.
It uses always generator as outputs as not required by CLI mode.
"""
gen = self.stream_inference(source, model)
for _ in gen: # noqa, running CLI inference without accumulating any outputs (do not modify)
pass
def setup_source(self, source):
"""Sets up source and inference mode."""
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
self.transforms = (
getattr(
self.model.model,
"transforms",
classify_transforms(self.imgsz[0], crop_fraction=self.args.crop_fraction),
)
if self.args.task == "classify"
else None
)
self.dataset = load_inference_source(
source=source,
batch=self.args.batch,
vid_stride=self.args.vid_stride,
buffer=self.args.stream_buffer,
)
self.source_type = self.dataset.source_type
if not getattr(self, "stream", True) and (
self.source_type.stream
or self.source_type.screenshot
or len(self.dataset) > 1000 # many images
or any(getattr(self.dataset, "video_flag", [False]))
): # videos
LOGGER.warning(STREAM_WARNING)
self.vid_writer = {}
@smart_inference_mode()
def stream_inference(self, source=None, model=None, *args, **kwargs):
"""Streams real-time inference on camera feed and saves results to file."""
if self.args.verbose:
LOGGER.info("")
# Setup model
if not self.model:
self.setup_model(model)
with self._lock: # for thread-safe inference
# Setup source every time predict is called
self.setup_source(source if source is not None else self.args.source)
# Check if save_dir/ label file exists
if self.args.save or self.args.save_txt:
(self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
# Warmup model
if not self.done_warmup:
self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz))
self.done_warmup = True
self.seen, self.windows, self.batch = 0, [], None
profilers = (
ops.Profile(device=self.device),
ops.Profile(device=self.device),
ops.Profile(device=self.device),
)
self.run_callbacks("on_predict_start")
for self.batch in self.dataset:
self.run_callbacks("on_predict_batch_start")
paths, im0s, s = self.batch
# Preprocess
with profilers[0]:
im = self.preprocess(im0s)
# Inference
with profilers[1]:
preds = self.inference(im, *args, **kwargs)
if self.args.embed:
yield from [preds] if isinstance(preds, torch.Tensor) else preds # yield embedding tensors
continue
# Postprocess
with profilers[2]:
self.results = self.postprocess(preds, im, im0s)
self.run_callbacks("on_predict_postprocess_end")
# Visualize, save, write results
n = len(im0s)
for i in range(n):
self.seen += 1
self.results[i].speed = {
"preprocess": profilers[0].dt * 1e3 / n,
"inference": profilers[1].dt * 1e3 / n,
"postprocess": profilers[2].dt * 1e3 / n,
}
if self.args.verbose or self.args.save or self.args.save_txt or self.args.show:
s[i] += self.write_results(i, Path(paths[i]), im, s)
# Print batch results
if self.args.verbose:
LOGGER.info("\n".join(s))
self.run_callbacks("on_predict_batch_end")
yield from self.results
# Release assets
for v in self.vid_writer.values():
if isinstance(v, cv2.VideoWriter):
v.release()
# Print final results
if self.args.verbose and self.seen:
t = tuple(x.t / self.seen * 1e3 for x in profilers) # speeds per image
LOGGER.info(
f"Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape "
f"{(min(self.args.batch, self.seen), 3, *im.shape[2:])}" % t
)
if self.args.save or self.args.save_txt or self.args.save_crop:
nl = len(list(self.save_dir.glob("labels/*.txt"))) # number of labels
s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else ""
LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}{s}")
self.run_callbacks("on_predict_end")
def setup_model(self, model, verbose=True):
"""Initialize YOLO model with given parameters and set it to evaluation mode."""
self.model = AutoBackend(
weights=model or self.args.model,
device=select_device(self.args.device, verbose=verbose),
dnn=self.args.dnn,
data=self.args.data,
fp16=self.args.half,
batch=self.args.batch,
fuse=True,
verbose=verbose,
)
self.device = self.model.device # update device
self.args.half = self.model.fp16 # update half
self.model.eval()
def write_results(self, i, p, im, s):
"""Write inference results to a file or directory."""
string = "" # print string
if len(im.shape) == 3:
im = im[None] # expand for batch dim
if self.source_type.stream or self.source_type.from_img or self.source_type.tensor: # batch_size >= 1
string += f"{i}: "
frame = self.dataset.count
else:
match = re.search(r"frame (\d+)/", s[i])
frame = int(match.group(1)) if match else None # 0 if frame undetermined
self.txt_path = self.save_dir / "labels" / (p.stem + ("" if self.dataset.mode == "image" else f"_{frame}"))
string += "%gx%g " % im.shape[2:]
result = self.results[i]
result.save_dir = self.save_dir.__str__() # used in other locations
string += result.verbose() + f"{result.speed['inference']:.1f}ms"
# Add predictions to image
if self.args.save or self.args.show:
self.plotted_img = result.plot(
line_width=self.args.line_width,
boxes=self.args.show_boxes,
conf=self.args.show_conf,
labels=self.args.show_labels,
im_gpu=None if self.args.retina_masks else im[i],
)
# Save results
if self.args.save_txt:
result.save_txt(f"{self.txt_path}.txt", save_conf=self.args.save_conf)
if self.args.save_crop:
result.save_crop(save_dir=self.save_dir / "crops", file_name=self.txt_path.stem)
if self.args.show:
self.show(str(p))
if self.args.save:
self.save_predicted_images(str(self.save_dir / p.name), frame)
return string
def save_predicted_images(self, save_path="", frame=0):
"""Save video predictions as mp4 at specified path."""
im = self.plotted_img
# Save videos and streams
if self.dataset.mode in {"stream", "video"}:
fps = self.dataset.fps if self.dataset.mode == "video" else 30
frames_path = f'{save_path.split(".", 1)[0]}_frames/'
if save_path not in self.vid_writer: # new video
if self.args.save_frames:
Path(frames_path).mkdir(parents=True, exist_ok=True)
suffix, fourcc = (".mp4", "avc1") if MACOS else (".avi", "WMV2") if WINDOWS else (".avi", "MJPG")
self.vid_writer[save_path] = cv2.VideoWriter(
filename=str(Path(save_path).with_suffix(suffix)),
fourcc=cv2.VideoWriter_fourcc(*fourcc),
fps=fps, # integer required, floats produce error in MP4 codec
frameSize=(im.shape[1], im.shape[0]), # (width, height)
)
# Save video
self.vid_writer[save_path].write(im)
if self.args.save_frames:
cv2.imwrite(f"{frames_path}{frame}.jpg", im)
# Save images
else:
cv2.imwrite(save_path, im)
def show(self, p=""):
"""Display an image in a window using OpenCV imshow()."""
im = self.plotted_img
if platform.system() == "Linux" and p not in self.windows:
self.windows.append(p)
cv2.namedWindow(p, cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(p, im.shape[1], im.shape[0]) # (width, height)
cv2.imshow(p, im)
cv2.waitKey(300 if self.dataset.mode == "image" else 1) # 1 millisecond
def run_callbacks(self, event: str):
"""Runs all registered callbacks for a specific event."""
for callback in self.callbacks.get(event, []):
callback(self)
def add_callback(self, event: str, func):
"""Add callback."""
self.callbacks[event].append(func)
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Ultralytics Results, Boxes and Masks classes for handling inference results.
Usage: See https://docs.ultralytics.com/modes/predict/
"""
from copy import deepcopy
from functools import lru_cache
from pathlib import Path
import numpy as np
import torch
from ultralytics.data.augment import LetterBox
from ultralytics.utils import LOGGER, SimpleClass, ops
from ultralytics.utils.plotting import Annotator, colors, save_one_box
from ultralytics.utils.torch_utils import smart_inference_mode
class BaseTensor(SimpleClass):
"""Base tensor class with additional methods for easy manipulation and device handling."""
def __init__(self, data, orig_shape) -> None:
"""
Initialize BaseTensor with data and original shape.
Args:
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
orig_shape (tuple): Original shape of image.
"""
assert isinstance(data, (torch.Tensor, np.ndarray))
self.data = data
self.orig_shape = orig_shape
@property
def shape(self):
"""Return the shape of the data tensor."""
return self.data.shape
def cpu(self):
"""Return a copy of the tensor on CPU memory."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
def numpy(self):
"""Return a copy of the tensor as a numpy array."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
def cuda(self):
"""Return a copy of the tensor on GPU memory."""
return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
def to(self, *args, **kwargs):
"""Return a copy of the tensor with the specified device and dtype."""
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
def __len__(self): # override len(results)
"""Return the length of the data tensor."""
return len(self.data)
def __getitem__(self, idx):
"""Return a BaseTensor with the specified index of the data tensor."""
return self.__class__(self.data[idx], self.orig_shape)
class Results(SimpleClass):
"""
A class for storing and manipulating inference results.
Attributes:
orig_img (numpy.ndarray): Original image as a numpy array.
orig_shape (tuple): Original image shape in (height, width) format.
boxes (Boxes, optional): Object containing detection bounding boxes.
masks (Masks, optional): Object containing detection masks.
probs (Probs, optional): Object containing class probabilities for classification tasks.
keypoints (Keypoints, optional): Object containing detected keypoints for each object.
speed (dict): Dictionary of preprocess, inference, and postprocess speeds (ms/image).
names (dict): Dictionary of class names.
path (str): Path to the image file.
Methods:
update(boxes=None, masks=None, probs=None, obb=None): Updates object attributes with new detection results.
cpu(): Returns a copy of the Results object with all tensors on CPU memory.
numpy(): Returns a copy of the Results object with all tensors as numpy arrays.
cuda(): Returns a copy of the Results object with all tensors on GPU memory.
to(*args, **kwargs): Returns a copy of the Results object with tensors on a specified device and dtype.
new(): Returns a new Results object with the same image, path, and names.
plot(...): Plots detection results on an input image, returning an annotated image.
show(): Show annotated results to screen.
save(filename): Save annotated results to file.
verbose(): Returns a log string for each task, detailing detections and classifications.
save_txt(txt_file, save_conf=False): Saves detection results to a text file.
save_crop(save_dir, file_name=Path("im.jpg")): Saves cropped detection images.
tojson(normalize=False): Converts detection results to JSON format.
"""
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None) -> None:
"""
Initialize the Results class.
Args:
orig_img (numpy.ndarray): The original image as a numpy array.
path (str): The path to the image file.
names (dict): A dictionary of class names.
boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection.
obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection.
"""
self.orig_img = orig_img
self.orig_shape = orig_img.shape[:2]
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
self.probs = Probs(probs) if probs is not None else None
self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
self.obb = OBB(obb, self.orig_shape) if obb is not None else None
self.speed = {"preprocess": None, "inference": None, "postprocess": None} # milliseconds per image
self.names = names
self.path = path
self.save_dir = None
self._keys = "boxes", "masks", "probs", "keypoints", "obb"
def __getitem__(self, idx):
"""Return a Results object for the specified index."""
return self._apply("__getitem__", idx)
def __len__(self):
"""Return the number of detections in the Results object."""
for k in self._keys:
v = getattr(self, k)
if v is not None:
return len(v)
def update(self, boxes=None, masks=None, probs=None, obb=None):
"""Update the boxes, masks, and probs attributes of the Results object."""
if boxes is not None:
self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
if masks is not None:
self.masks = Masks(masks, self.orig_shape)
if probs is not None:
self.probs = probs
if obb is not None:
self.obb = OBB(obb, self.orig_shape)
def _apply(self, fn, *args, **kwargs):
"""
Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
function is internally called by methods like .to(), .cuda(), .cpu(), etc.
Args:
fn (str): The name of the function to apply.
*args: Variable length argument list to pass to the function.
**kwargs: Arbitrary keyword arguments to pass to the function.
Returns:
Results: A new Results object with attributes modified by the applied function.
"""
r = self.new()
for k in self._keys:
v = getattr(self, k)
if v is not None:
setattr(r, k, getattr(v, fn)(*args, **kwargs))
return r
def cpu(self):
"""Return a copy of the Results object with all tensors on CPU memory."""
return self._apply("cpu")
def numpy(self):
"""Return a copy of the Results object with all tensors as numpy arrays."""
return self._apply("numpy")
def cuda(self):
"""Return a copy of the Results object with all tensors on GPU memory."""
return self._apply("cuda")
def to(self, *args, **kwargs):
"""Return a copy of the Results object with tensors on the specified device and dtype."""
return self._apply("to", *args, **kwargs)
def new(self):
"""Return a new Results object with the same image, path, and names."""
return Results(orig_img=self.orig_img, path=self.path, names=self.names)
def plot(
self,
conf=True,
line_width=None,
font_size=None,
font="Arial.ttf",
pil=False,
img=None,
im_gpu=None,
kpt_radius=5,
kpt_line=True,
labels=True,
boxes=True,
masks=True,
probs=True,
show=False,
save=False,
filename=None,
):
"""
Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image.
Args:
conf (bool): Whether to plot the detection confidence score.
line_width (float, optional): The line width of the bounding boxes. If None, it is scaled to the image size.
font_size (float, optional): The font size of the text. If None, it is scaled to the image size.
font (str): The font to use for the text.
pil (bool): Whether to return the image as a PIL Image.
img (numpy.ndarray): Plot to another image. if not, plot to original image.
im_gpu (torch.Tensor): Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting.
kpt_radius (int, optional): Radius of the drawn keypoints. Default is 5.
kpt_line (bool): Whether to draw lines connecting keypoints.
labels (bool): Whether to plot the label of bounding boxes.
boxes (bool): Whether to plot the bounding boxes.
masks (bool): Whether to plot the masks.
probs (bool): Whether to plot classification probability
show (bool): Whether to display the annotated image directly.
save (bool): Whether to save the annotated image to `filename`.
filename (str): Filename to save image to if save is True.
Returns:
(numpy.ndarray): A numpy array of the annotated image.
Example:
```python
from PIL import Image
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
results = model('bus.jpg') # results list
for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image
```
"""
if img is None and isinstance(self.orig_img, torch.Tensor):
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
names = self.names
is_obb = self.obb is not None
pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
pred_masks, show_masks = self.masks, masks
pred_probs, show_probs = self.probs, probs
annotator = Annotator(
deepcopy(self.orig_img if img is None else img),
line_width,
font_size,
font,
pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True
example=names,
)
# Plot Segment results
if pred_masks and show_masks:
if im_gpu is None:
img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
im_gpu = (
torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device)
.permute(2, 0, 1)
.flip(0)
.contiguous()
/ 255
)
idx = pred_boxes.cls if pred_boxes else range(len(pred_masks))
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
# Plot Detect results
if pred_boxes is not None and show_boxes:
for d in reversed(pred_boxes):
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
name = ("" if id is None else f"id:{id} ") + names[c]
label = (f"{name} {conf:.2f}" if conf else name) if labels else None
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
annotator.box_label(box, label, color=colors(c, True), rotated=is_obb)
# Plot Classify results
if pred_probs is not None and show_probs:
text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5)
x = round(self.orig_shape[0] * 0.03)
annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors
# Plot Pose results
if self.keypoints is not None:
for k in reversed(self.keypoints.data):
annotator.kpts(k, self.orig_shape, radius=kpt_radius, kpt_line=kpt_line)
# Show results
if show:
annotator.show(self.path)
# Save results
if save:
annotator.save(filename)
return annotator.result()
def show(self, *args, **kwargs):
"""Show annotated results image."""
self.plot(show=True, *args, **kwargs)
def save(self, filename=None, *args, **kwargs):
"""Save annotated results image."""
if not filename:
filename = f"results_{Path(self.path).name}"
self.plot(save=True, filename=filename, *args, **kwargs)
return filename
def verbose(self):
"""Return log string for each task."""
log_string = ""
probs = self.probs
boxes = self.boxes
if len(self) == 0:
return log_string if probs is not None else f"{log_string}(no detections), "
if probs is not None:
log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
if boxes:
for c in boxes.cls.unique():
n = (boxes.cls == c).sum() # detections per class
log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
return log_string
def save_txt(self, txt_file, save_conf=False):
"""
Save predictions into txt file.
Args:
txt_file (str): txt file path.
save_conf (bool): save confidence score or not.
"""
is_obb = self.obb is not None
boxes = self.obb if is_obb else self.boxes
masks = self.masks
probs = self.probs
kpts = self.keypoints
texts = []
if probs is not None:
# Classify
[texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5]
elif boxes:
# Detect/segment/pose
for j, d in enumerate(boxes):
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1)))
if masks:
seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2)
line = (c, *seg)
if kpts is not None:
kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
line += (*kpt.reshape(-1).tolist(),)
line += (conf,) * save_conf + (() if id is None else (id,))
texts.append(("%g " * len(line)).rstrip() % line)
if texts:
Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory
with open(txt_file, "a") as f:
f.writelines(text + "\n" for text in texts)
def save_crop(self, save_dir, file_name=Path("im.jpg")):
"""
Save cropped predictions to `save_dir/cls/file_name.jpg`.
Args:
save_dir (str | pathlib.Path): Save path.
file_name (str | pathlib.Path): File name.
"""
if self.probs is not None:
LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
return
if self.obb is not None:
LOGGER.warning("WARNING ⚠️ OBB task do not support `save_crop`.")
return
for d in self.boxes:
save_one_box(
d.xyxy,
self.orig_img.copy(),
file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
BGR=True,
)
def summary(self, normalize=False, decimals=5):
"""Convert the results to a summarized format."""
if self.probs is not None:
LOGGER.warning("Warning: Classify results do not support the `summary()` method yet.")
return
# Create list of detection dictionaries
results = []
data = self.boxes.data.cpu().tolist()
h, w = self.orig_shape if normalize else (1, 1)
for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id
box = {
"x1": round(row[0] / w, decimals),
"y1": round(row[1] / h, decimals),
"x2": round(row[2] / w, decimals),
"y2": round(row[3] / h, decimals),
}
conf = round(row[-2], decimals)
class_id = int(row[-1])
result = {"name": self.names[class_id], "class": class_id, "confidence": conf, "box": box}
if self.boxes.is_track:
result["track_id"] = int(row[-3]) # track ID
if self.masks:
result["segments"] = {
"x": (self.masks.xy[i][:, 0] / w).round(decimals).tolist(),
"y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
}
if self.keypoints is not None:
x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor
result["keypoints"] = {
"x": (x / w).numpy().round(decimals).tolist(), # decimals named argument required
"y": (y / h).numpy().round(decimals).tolist(),
"visible": visible.numpy().round(decimals).tolist(),
}
results.append(result)
return results
def tojson(self, normalize=False, decimals=5):
"""Convert the results to JSON format."""
import json
return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
class Boxes(BaseTensor):
"""
Manages detection boxes, providing easy access and manipulation of box coordinates, confidence scores, class
identifiers, and optional tracking IDs. Supports multiple formats for box coordinates, including both absolute and
normalized forms.
Attributes:
data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
orig_shape (tuple): The original image size as a tuple (height, width), used for normalization.
is_track (bool): Indicates whether tracking IDs are included in the box data.
Properties:
xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
cls (torch.Tensor | numpy.ndarray): Class labels for each box.
id (torch.Tensor | numpy.ndarray, optional): Tracking IDs for each box, if available.
xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, calculated on demand.
xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes, relative to `orig_shape`.
xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes, relative to `orig_shape`.
Methods:
cpu(): Moves the boxes to CPU memory.
numpy(): Converts the boxes to a numpy array format.
cuda(): Moves the boxes to CUDA (GPU) memory.
to(device, dtype=None): Moves the boxes to the specified device.
"""
def __init__(self, boxes, orig_shape) -> None:
"""
Initialize the Boxes class.
Args:
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, with
shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values.
If present, the third last column contains track IDs.
orig_shape (tuple): Original image size, in the format (height, width).
"""
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
assert n in (6, 7), f"expected 6 or 7 values but got {n}" # xyxy, track_id, conf, cls
super().__init__(boxes, orig_shape)
self.is_track = n == 7
self.orig_shape = orig_shape
@property
def xyxy(self):
"""Return the boxes in xyxy format."""
return self.data[:, :4]
@property
def conf(self):
"""Return the confidence values of the boxes."""
return self.data[:, -2]
@property
def cls(self):
"""Return the class values of the boxes."""
return self.data[:, -1]
@property
def id(self):
"""Return the track IDs of the boxes (if available)."""
return self.data[:, -3] if self.is_track else None
@property
@lru_cache(maxsize=2) # maxsize 1 should suffice
def xywh(self):
"""Return the boxes in xywh format."""
return ops.xyxy2xywh(self.xyxy)
@property
@lru_cache(maxsize=2)
def xyxyn(self):
"""Return the boxes in xyxy format normalized by original image size."""
xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
xyxy[..., [0, 2]] /= self.orig_shape[1]
xyxy[..., [1, 3]] /= self.orig_shape[0]
return xyxy
@property
@lru_cache(maxsize=2)
def xywhn(self):
"""Return the boxes in xywh format normalized by original image size."""
xywh = ops.xyxy2xywh(self.xyxy)
xywh[..., [0, 2]] /= self.orig_shape[1]
xywh[..., [1, 3]] /= self.orig_shape[0]
return xywh
class Masks(BaseTensor):
"""
A class for storing and manipulating detection masks.
Attributes:
xy (list): A list of segments in pixel coordinates.
xyn (list): A list of normalized segments.
Methods:
cpu(): Returns the masks tensor on CPU memory.
numpy(): Returns the masks tensor as a numpy array.
cuda(): Returns the masks tensor on GPU memory.
to(device, dtype): Returns the masks tensor with the specified device and dtype.
"""
def __init__(self, masks, orig_shape) -> None:
"""Initialize the Masks class with the given masks tensor and original image shape."""
if masks.ndim == 2:
masks = masks[None, :]
super().__init__(masks, orig_shape)
@property
@lru_cache(maxsize=1)
def xyn(self):
"""Return normalized segments."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.data)
]
@property
@lru_cache(maxsize=1)
def xy(self):
"""Return segments in pixel coordinates."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.data)
]
class Keypoints(BaseTensor):
"""
A class for storing and manipulating detection keypoints.
Attributes:
xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection.
xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1].
conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None.
Methods:
cpu(): Returns a copy of the keypoints tensor on CPU memory.
numpy(): Returns a copy of the keypoints tensor as a numpy array.
cuda(): Returns a copy of the keypoints tensor on GPU memory.
to(device, dtype): Returns a copy of the keypoints tensor with the specified device and dtype.
"""
@smart_inference_mode() # avoid keypoints < conf in-place error
def __init__(self, keypoints, orig_shape) -> None:
"""Initializes the Keypoints object with detection keypoints and original image size."""
if keypoints.ndim == 2:
keypoints = keypoints[None, :]
if keypoints.shape[2] == 3: # x, y, conf
mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
keypoints[..., :2][mask] = 0
super().__init__(keypoints, orig_shape)
self.has_visible = self.data.shape[-1] == 3
@property
@lru_cache(maxsize=1)
def xy(self):
"""Returns x, y coordinates of keypoints."""
return self.data[..., :2]
@property
@lru_cache(maxsize=1)
def xyn(self):
"""Returns normalized x, y coordinates of keypoints."""
xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
xy[..., 0] /= self.orig_shape[1]
xy[..., 1] /= self.orig_shape[0]
return xy
@property
@lru_cache(maxsize=1)
def conf(self):
"""Returns confidence values of keypoints if available, else None."""
return self.data[..., 2] if self.has_visible else None
class Probs(BaseTensor):
"""
A class for storing and manipulating classification predictions.
Attributes:
top1 (int): Index of the top 1 class.
top5 (list[int]): Indices of the top 5 classes.
top1conf (torch.Tensor): Confidence of the top 1 class.
top5conf (torch.Tensor): Confidences of the top 5 classes.
Methods:
cpu(): Returns a copy of the probs tensor on CPU memory.
numpy(): Returns a copy of the probs tensor as a numpy array.
cuda(): Returns a copy of the probs tensor on GPU memory.
to(): Returns a copy of the probs tensor with the specified device and dtype.
"""
def __init__(self, probs, orig_shape=None) -> None:
"""Initialize the Probs class with classification probabilities and optional original shape of the image."""
super().__init__(probs, orig_shape)
@property
@lru_cache(maxsize=1)
def top1(self):
"""Return the index of top 1."""
return int(self.data.argmax())
@property
@lru_cache(maxsize=1)
def top5(self):
"""Return the indices of top 5."""
return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
@property
@lru_cache(maxsize=1)
def top1conf(self):
"""Return the confidence of top 1."""
return self.data[self.top1]
@property
@lru_cache(maxsize=1)
def top5conf(self):
"""Return the confidences of top 5."""
return self.data[self.top5]
class OBB(BaseTensor):
"""
A class for storing and manipulating Oriented Bounding Boxes (OBB).
Args:
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
orig_shape (tuple): Original image size, in the format (height, width).
Attributes:
xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by orig image size.
xyxyxyxy (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format.
xyxy (torch.Tensor | numpy.ndarray): The horizontal boxes in xyxyxyxy format.
data (torch.Tensor): The raw OBB tensor (alias for `boxes`).
Methods:
cpu(): Move the object to CPU memory.
numpy(): Convert the object to a numpy array.
cuda(): Move the object to CUDA memory.
to(*args, **kwargs): Move the object to the specified device.
"""
def __init__(self, boxes, orig_shape) -> None:
"""Initialize the Boxes class."""
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
assert n in (7, 8), f"expected 7 or 8 values but got {n}" # xywh, rotation, track_id, conf, cls
super().__init__(boxes, orig_shape)
self.is_track = n == 8
self.orig_shape = orig_shape
@property
def xywhr(self):
"""Return the rotated boxes in xywhr format."""
return self.data[:, :5]
@property
def conf(self):
"""Return the confidence values of the boxes."""
return self.data[:, -2]
@property
def cls(self):
"""Return the class values of the boxes."""
return self.data[:, -1]
@property
def id(self):
"""Return the track IDs of the boxes (if available)."""
return self.data[:, -3] if self.is_track else None
@property
@lru_cache(maxsize=2)
def xyxyxyxy(self):
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
return ops.xywhr2xyxyxyxy(self.xywhr)
@property
@lru_cache(maxsize=2)
def xyxyxyxyn(self):
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
xyxyxyxyn[..., 0] /= self.orig_shape[1]
xyxyxyxyn[..., 1] /= self.orig_shape[0]
return xyxyxyxyn
@property
@lru_cache(maxsize=2)
def xyxy(self):
"""
Return the horizontal boxes in xyxy format, (N, 4).
Accepts both torch and numpy boxes.
"""
x1 = self.xyxyxyxy[..., 0].min(1).values
x2 = self.xyxyxyxy[..., 0].max(1).values
y1 = self.xyxyxyxy[..., 1].min(1).values
y2 = self.xyxyxyxy[..., 1].max(1).values
xyxy = [x1, y1, x2, y2]
return np.stack(xyxy, axis=-1) if isinstance(self.data, np.ndarray) else torch.stack(xyxy, dim=-1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment