Commit e63cf68a authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #2842 canceled with stages
# YOLOv8 OpenVINO Inference in C++ 🦾
Welcome to the YOLOv8 OpenVINO Inference example in C++! This guide will help you get started with leveraging the powerful YOLOv8 models using OpenVINO and OpenCV API in your C++ projects. Whether you're looking to enhance performance or add flexibility to your applications, this example has got you covered.
## 🌟 Features
- 🚀 **Model Format Support**: Compatible with `ONNX` and `OpenVINO IR` formats.
-**Precision Options**: Run models in `FP32`, `FP16`, and `INT8` precisions.
- 🔄 **Dynamic Shape Loading**: Easily handle models with dynamic input shapes.
## 📋 Dependencies
To ensure smooth execution, please make sure you have the following dependencies installed:
| Dependency | Version |
| ---------- | -------- |
| OpenVINO | >=2023.3 |
| OpenCV | >=4.5.0 |
| C++ | >=14 |
| CMake | >=3.12.0 |
## ⚙️ Build Instructions
Follow these steps to build the project:
1. Clone the repository:
```bash
git clone https://github.com/ultralytics/ultralytics.git
cd ultralytics/YOLOv8-OpenVINO-CPP-Inference
```
2. Create a build directory and compile the project:
```bash
mkdir build
cd build
cmake ..
make
```
## 🛠️ Usage
Once built, you can run inference on an image using the following command:
```bash
./detect <model_path.{onnx, xml}> <image_path.jpg>
```
## 🔄 Exporting YOLOv8 Models
To use your YOLOv8 model with OpenVINO, you need to export it first. Use the command below to export the model:
```bash
yolo export model=yolov8s.pt imgsz=640 format=openvino
```
## 📸 Screenshots
### Running Using OpenVINO Model
![Running OpenVINO Model](https://github.com/ultralytics/ultralytics/assets/76827698/2d7cf201-3def-4357-824c-12446ccf85a9)
### Running Using ONNX Model
![Running ONNX Model](https://github.com/ultralytics/ultralytics/assets/76827698/9b90031c-cc81-4cfb-8b34-c619e09035a7)
## ❤️ Contributions
We hope this example helps you integrate YOLOv8 with OpenVINO and OpenCV into your C++ projects effortlessly. Happy coding! 🚀
#include "inference.h"
#include <memory>
#include <opencv2/dnn.hpp>
#include <random>
namespace yolo {
// Constructor to initialize the model with default input shape
Inference::Inference(const std::string &model_path, const float &model_confidence_threshold, const float &model_NMS_threshold) {
model_input_shape_ = cv::Size(640, 640); // Set the default size for models with dynamic shapes to prevent errors.
model_confidence_threshold_ = model_confidence_threshold;
model_NMS_threshold_ = model_NMS_threshold;
InitializeModel(model_path);
}
// Constructor to initialize the model with specified input shape
Inference::Inference(const std::string &model_path, const cv::Size model_input_shape, const float &model_confidence_threshold, const float &model_NMS_threshold) {
model_input_shape_ = model_input_shape;
model_confidence_threshold_ = model_confidence_threshold;
model_NMS_threshold_ = model_NMS_threshold;
InitializeModel(model_path);
}
void Inference::InitializeModel(const std::string &model_path) {
ov::Core core; // OpenVINO core object
std::shared_ptr<ov::Model> model = core.read_model(model_path); // Read the model from file
// If the model has dynamic shapes, reshape it to the specified input shape
if (model->is_dynamic()) {
model->reshape({1, 3, static_cast<long int>(model_input_shape_.height), static_cast<long int>(model_input_shape_.width)});
}
// Preprocessing setup for the model
ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({255, 255, 255});
ppp.input().model().set_layout("NCHW");
ppp.output().tensor().set_element_type(ov::element::f32);
model = ppp.build(); // Build the preprocessed model
// Compile the model for inference
compiled_model_ = core.compile_model(model, "AUTO");
inference_request_ = compiled_model_.create_infer_request(); // Create inference request
short width, height;
// Get input shape from the model
const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
const ov::Shape input_shape = inputs[0].get_shape();
height = input_shape[1];
width = input_shape[2];
model_input_shape_ = cv::Size2f(width, height);
// Get output shape from the model
const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
const ov::Shape output_shape = outputs[0].get_shape();
height = output_shape[1];
width = output_shape[2];
model_output_shape_ = cv::Size(width, height);
}
// Method to run inference on an input frame
void Inference::RunInference(cv::Mat &frame) {
Preprocessing(frame); // Preprocess the input frame
inference_request_.infer(); // Run inference
PostProcessing(frame); // Postprocess the inference results
}
// Method to preprocess the input frame
void Inference::Preprocessing(const cv::Mat &frame) {
cv::Mat resized_frame;
cv::resize(frame, resized_frame, model_input_shape_, 0, 0, cv::INTER_AREA); // Resize the frame to match the model input shape
// Calculate scaling factor
scale_factor_.x = static_cast<float>(frame.cols / model_input_shape_.width);
scale_factor_.y = static_cast<float>(frame.rows / model_input_shape_.height);
float *input_data = (float *)resized_frame.data; // Get pointer to resized frame data
const ov::Tensor input_tensor = ov::Tensor(compiled_model_.input().get_element_type(), compiled_model_.input().get_shape(), input_data); // Create input tensor
inference_request_.set_input_tensor(input_tensor); // Set input tensor for inference
}
// Method to postprocess the inference results
void Inference::PostProcessing(cv::Mat &frame) {
std::vector<int> class_list;
std::vector<float> confidence_list;
std::vector<cv::Rect> box_list;
// Get the output tensor from the inference request
const float *detections = inference_request_.get_output_tensor().data<const float>();
const cv::Mat detection_outputs(model_output_shape_, CV_32F, (float *)detections); // Create OpenCV matrix from output tensor
// Iterate over detections and collect class IDs, confidence scores, and bounding boxes
for (int i = 0; i < detection_outputs.cols; ++i) {
const cv::Mat classes_scores = detection_outputs.col(i).rowRange(4, detection_outputs.rows);
cv::Point class_id;
double score;
cv::minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id); // Find the class with the highest score
// Check if the detection meets the confidence threshold
if (score > model_confidence_threshold_) {
class_list.push_back(class_id.y);
confidence_list.push_back(score);
const float x = detection_outputs.at<float>(0, i);
const float y = detection_outputs.at<float>(1, i);
const float w = detection_outputs.at<float>(2, i);
const float h = detection_outputs.at<float>(3, i);
cv::Rect box;
box.x = static_cast<int>(x);
box.y = static_cast<int>(y);
box.width = static_cast<int>(w);
box.height = static_cast<int>(h);
box_list.push_back(box);
}
}
// Apply Non-Maximum Suppression (NMS) to filter overlapping bounding boxes
std::vector<int> NMS_result;
cv::dnn::NMSBoxes(box_list, confidence_list, model_confidence_threshold_, model_NMS_threshold_, NMS_result);
// Collect final detections after NMS
for (int i = 0; i < NMS_result.size(); ++i) {
Detection result;
const unsigned short id = NMS_result[i];
result.class_id = class_list[id];
result.confidence = confidence_list[id];
result.box = GetBoundingBox(box_list[id]);
DrawDetectedObject(frame, result);
}
}
// Method to get the bounding box in the correct scale
cv::Rect Inference::GetBoundingBox(const cv::Rect &src) const {
cv::Rect box = src;
box.x = (box.x - box.width / 2) * scale_factor_.x;
box.y = (box.y - box.height / 2) * scale_factor_.y;
box.width *= scale_factor_.x;
box.height *= scale_factor_.y;
return box;
}
void Inference::DrawDetectedObject(cv::Mat &frame, const Detection &detection) const {
const cv::Rect &box = detection.box;
const float &confidence = detection.confidence;
const int &class_id = detection.class_id;
// Generate a random color for the bounding box
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(120, 255);
const cv::Scalar &color = cv::Scalar(dis(gen), dis(gen), dis(gen));
// Draw the bounding box around the detected object
cv::rectangle(frame, cv::Point(box.x, box.y), cv::Point(box.x + box.width, box.y + box.height), color, 3);
// Prepare the class label and confidence text
std::string classString = classes_[class_id] + std::to_string(confidence).substr(0, 4);
// Get the size of the text box
cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 0.75, 2, 0);
cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
// Draw the text box
cv::rectangle(frame, textBox, color, cv::FILLED);
// Put the class label and confidence text above the bounding box
cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 0.75, cv::Scalar(0, 0, 0), 2, 0);
}
} // namespace yolo
#ifndef YOLO_INFERENCE_H_
#define YOLO_INFERENCE_H_
#include <string>
#include <vector>
#include <opencv2/imgproc.hpp>
#include <openvino/openvino.hpp>
namespace yolo {
struct Detection {
short class_id;
float confidence;
cv::Rect box;
};
class Inference {
public:
Inference() {}
// Constructor to initialize the model with default input shape
Inference(const std::string &model_path, const float &model_confidence_threshold, const float &model_NMS_threshold);
// Constructor to initialize the model with specified input shape
Inference(const std::string &model_path, const cv::Size model_input_shape, const float &model_confidence_threshold, const float &model_NMS_threshold);
void RunInference(cv::Mat &frame);
private:
void InitializeModel(const std::string &model_path);
void Preprocessing(const cv::Mat &frame);
void PostProcessing(cv::Mat &frame);
cv::Rect GetBoundingBox(const cv::Rect &src) const;
void DrawDetectedObject(cv::Mat &frame, const Detection &detections) const;
cv::Point2f scale_factor_; // Scaling factor for the input frame
cv::Size2f model_input_shape_; // Input shape of the model
cv::Size model_output_shape_; // Output shape of the model
ov::InferRequest inference_request_; // OpenVINO inference request
ov::CompiledModel compiled_model_; // OpenVINO compiled model
float model_confidence_threshold_; // Confidence threshold for detections
float model_NMS_threshold_; // Non-Maximum Suppression threshold
std::vector<std::string> classes_ {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"
};
};
} // namespace yolo
#endif // YOLO_INFERENCE_H_
#include "inference.h"
#include <iostream>
#include <opencv2/highgui.hpp>
int main(int argc, char **argv) {
// Check if the correct number of arguments is provided
if (argc != 3) {
std::cerr << "usage: " << argv[0] << " <model_path> <image_path>" << std::endl;
return 1;
}
// Get the model and image paths from the command-line arguments
const std::string model_path = argv[1];
const std::string image_path = argv[2];
// Read the input image
cv::Mat image = cv::imread(image_path);
// Check if the image was successfully loaded
if (image.empty()) {
std::cerr << "ERROR: image is empty" << std::endl;
return 1;
}
// Define the confidence and NMS thresholds
const float confidence_threshold = 0.5;
const float NMS_threshold = 0.5;
// Initialize the YOLO inference with the specified model and parameters
yolo::Inference inference(model_path, cv::Size(640, 640), confidence_threshold, NMS_threshold);
// Run inference on the input image
inference.RunInference(image);
// Display the image with the detections
cv::imshow("image", image);
cv::waitKey(0);
return 0;
}
# Regions Counting Using YOLOv8 (Inference on Video)
> **Region Counter** is now part of **[Ultralytics Solutions](https://docs.ultralytics.com/solutions/)**, offering improved features and regular updates. Enjoy improved features and regular updates!
🔗 **[Explore Object Counting in Regions Here](https://docs.ultralytics.com/guides/region-counting/)**
> 🔔 **Notice:**
> The GitHub example will remain available but **will no longer be actively maintained**. For the latest updates and improvements, please use the official [link](https://docs.ultralytics.com/guides/region-counting/). Thank you!
Region counting is a method employed to tally the objects within a specified area, allowing for more sophisticated analyses when multiple regions are considered. These regions can be adjusted interactively using a Left Mouse Click, and the counting process occurs in real time. Regions can be adjusted to suit the user's preferences and requirements.
<div>
<p align="center">
<img src="https://github.com/RizwanMunawar/ultralytics/assets/62513924/5ab3bbd7-fd12-4849-928e-5f294d6c3fcf" width="45%" alt="YOLOv8 region counting visual 1">
<img src="https://github.com/RizwanMunawar/ultralytics/assets/62513924/e7c1aea7-474d-4d78-8d48-b50854ffe1ca" width="45%" alt="YOLOv8 region counting visual 2">
</p>
</div>
## Table of Contents
- [Step 1: Install the Required Libraries](#step-1-install-the-required-libraries)
- [Step 2: Run the Region Counting Using Ultralytics YOLOv8](#step-2-run-the-region-counting-using-ultralytics-yolov8)
- [Usage Options](#usage-options)
- [FAQ](#faq)
## Step 1: Install the Required Libraries
Clone the repository, install dependencies and `cd` to this local directory for commands in Step 2.
```bash
# Clone ultralytics repo
git clone https://github.com/ultralytics/ultralytics
# cd to local directory
cd ultralytics/examples/YOLOv8-Region-Counter
```
## Step 2: Run the Region Counting Using Ultralytics YOLOv8
Here are the basic commands for running the inference:
### Note
After the video begins playing, you can freely move the region anywhere within the video by simply clicking and dragging using the left mouse button.
```bash
# If you want to save results
python yolov8_region_counter.py --source "path/to/video.mp4" --save-img --view-img
# If you want to run model on CPU
python yolov8_region_counter.py --source "path/to/video.mp4" --save-img --view-img --device cpu
# If you want to change model file
python yolov8_region_counter.py --source "path/to/video.mp4" --save-img --weights "path/to/model.pt"
# If you want to detect specific class (first class and third class)
python yolov8_region_counter.py --source "path/to/video.mp4" --classes 0 2 --weights "path/to/model.pt"
# If you don't want to save results
python yolov8_region_counter.py --source "path/to/video.mp4" --view-img
```
## Usage Options
- `--source`: Specifies the path to the video file you want to run inference on.
- `--device`: Specifies the device `cpu` or `0`
- `--save-img`: Flag to save the detection results as images.
- `--weights`: Specifies a different YOLOv8 model file (e.g., `yolov8n.pt`, `yolov8s.pt`, `yolov8m.pt`, `yolov8l.pt`, `yolov8x.pt`).
- `--classes`: Specifies the class to be detected
- `--line-thickness`: Specifies the bounding box thickness
- `--region-thickness`: Specifies the region boxes thickness
- `--track-thickness`: Specifies the track line thickness
## FAQ
**1. What Does Region Counting Involve?**
Region counting is a computational method utilized to ascertain the quantity of objects within a specific area in recorded video or real-time streams. This technique finds frequent application in image processing, computer vision, and pattern recognition, facilitating the analysis and segmentation of objects or features based on their spatial relationships.
**2. Is Friendly Region Plotting Supported by the Region Counter?**
The Region Counting offers the capability to create regions in various formats, such as polygons and rectangles. You have the flexibility to modify region attributes, including coordinates, colors, and other details, as demonstrated in the following code:
```python
from shapely.geometry import Polygon
counting_regions = [
{
"name": "YOLOv8 Polygon Region",
"polygon": Polygon(
[(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]
), # Polygon with five points (Pentagon)
"counts": 0,
"dragging": False,
"region_color": (255, 42, 4), # BGR Value
"text_color": (255, 255, 255), # Region Text Color
},
{
"name": "YOLOv8 Rectangle Region",
"polygon": Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Rectangle with four points
"counts": 0,
"dragging": False,
"region_color": (37, 255, 225), # BGR Value
"text_color": (0, 0, 0), # Region Text Color
},
]
```
**3. Why Combine Region Counting with YOLOv8?**
YOLOv8 specializes in the detection and tracking of objects in video streams. Region counting complements this by enabling object counting within designated areas, making it a valuable application of YOLOv8.
**4. How Can I Troubleshoot Issues?**
To gain more insights during inference, you can include the `--debug` flag in your command:
```bash
python yolov8_region_counter.py --source "path to video file" --debug
```
**5. Can I Employ Other YOLO Versions?**
Certainly, you have the flexibility to specify different YOLO model weights using the `--weights` option.
**6. Where Can I Access Additional Information?**
For a comprehensive guide on using YOLOv8 with Object Tracking, please refer to [Multi-Object Tracking with Ultralytics YOLO](https://docs.ultralytics.com/modes/track/).
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import argparse
from collections import defaultdict
from pathlib import Path
import cv2
import numpy as np
from shapely.geometry import Polygon
from shapely.geometry.point import Point
from ultralytics import YOLO
from ultralytics.utils.files import increment_path
from ultralytics.utils.plotting import Annotator, colors
track_history = defaultdict(list)
current_region = None
counting_regions = [
{
"name": "YOLOv8 Polygon Region",
"polygon": Polygon([(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]), # Polygon points
"counts": 0,
"dragging": False,
"region_color": (255, 42, 4), # BGR Value
"text_color": (255, 255, 255), # Region Text Color
},
{
"name": "YOLOv8 Rectangle Region",
"polygon": Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Polygon points
"counts": 0,
"dragging": False,
"region_color": (37, 255, 225), # BGR Value
"text_color": (0, 0, 0), # Region Text Color
},
]
def mouse_callback(event, x, y, flags, param):
"""
Handles mouse events for region manipulation.
Args:
event (int): The mouse event type (e.g., cv2.EVENT_LBUTTONDOWN).
x (int): The x-coordinate of the mouse pointer.
y (int): The y-coordinate of the mouse pointer.
flags (int): Additional flags passed by OpenCV.
param: Additional parameters passed to the callback (not used in this function).
Global Variables:
current_region (dict): A dictionary representing the current selected region.
Mouse Events:
- LBUTTONDOWN: Initiates dragging for the region containing the clicked point.
- MOUSEMOVE: Moves the selected region if dragging is active.
- LBUTTONUP: Ends dragging for the selected region.
Notes:
- This function is intended to be used as a callback for OpenCV mouse events.
- Requires the existence of the 'counting_regions' list and the 'Polygon' class.
Example:
>>> cv2.setMouseCallback(window_name, mouse_callback)
"""
global current_region
# Mouse left button down event
if event == cv2.EVENT_LBUTTONDOWN:
for region in counting_regions:
if region["polygon"].contains(Point((x, y))):
current_region = region
current_region["dragging"] = True
current_region["offset_x"] = x
current_region["offset_y"] = y
# Mouse move event
elif event == cv2.EVENT_MOUSEMOVE:
if current_region is not None and current_region["dragging"]:
dx = x - current_region["offset_x"]
dy = y - current_region["offset_y"]
current_region["polygon"] = Polygon(
[(p[0] + dx, p[1] + dy) for p in current_region["polygon"].exterior.coords]
)
current_region["offset_x"] = x
current_region["offset_y"] = y
# Mouse left button up event
elif event == cv2.EVENT_LBUTTONUP:
if current_region is not None and current_region["dragging"]:
current_region["dragging"] = False
def run(
weights="yolov8n.pt",
source=None,
device="cpu",
view_img=False,
save_img=False,
exist_ok=False,
classes=None,
line_thickness=2,
track_thickness=2,
region_thickness=2,
):
"""
Run Region counting on a video using YOLOv8 and ByteTrack.
Supports movable region for real time counting inside specific area.
Supports multiple regions counting.
Regions can be Polygons or rectangle in shape
Args:
weights (str): Model weights path.
source (str): Video file path.
device (str): processing device cpu, 0, 1
view_img (bool): Show results.
save_img (bool): Save results.
exist_ok (bool): Overwrite existing files.
classes (list): classes to detect and track
line_thickness (int): Bounding box thickness.
track_thickness (int): Tracking line thickness
region_thickness (int): Region thickness.
"""
vid_frame_count = 0
# Check source path
if not Path(source).exists():
raise FileNotFoundError(f"Source path '{source}' does not exist.")
# Setup Model
model = YOLO(f"{weights}")
model.to("cuda") if device == "0" else model.to("cpu")
# Extract classes names
names = model.names
# Video setup
videocapture = cv2.VideoCapture(source)
frame_width = int(videocapture.get(3))
frame_height = int(videocapture.get(4))
fps = int(videocapture.get(5))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
# Output setup
save_dir = increment_path(Path("ultralytics_rc_output") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.avi"), fourcc, fps, (frame_width, frame_height))
# Iterate over video frames
while videocapture.isOpened():
success, frame = videocapture.read()
if not success:
break
vid_frame_count += 1
# Extract the results
results = model.track(frame, persist=True, classes=classes)
if results[0].boxes.id is not None:
boxes = results[0].boxes.xyxy.cpu()
track_ids = results[0].boxes.id.int().cpu().tolist()
clss = results[0].boxes.cls.cpu().tolist()
annotator = Annotator(frame, line_width=line_thickness, example=str(names))
for box, track_id, cls in zip(boxes, track_ids, clss):
annotator.box_label(box, str(names[cls]), color=colors(cls, True))
bbox_center = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2 # Bbox center
track = track_history[track_id] # Tracking Lines plot
track.append((float(bbox_center[0]), float(bbox_center[1])))
if len(track) > 30:
track.pop(0)
points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(frame, [points], isClosed=False, color=colors(cls, True), thickness=track_thickness)
# Check if detection inside region
for region in counting_regions:
if region["polygon"].contains(Point((bbox_center[0], bbox_center[1]))):
region["counts"] += 1
# Draw regions (Polygons/Rectangles)
for region in counting_regions:
region_label = str(region["counts"])
region_color = region["region_color"]
region_text_color = region["text_color"]
polygon_coordinates = np.array(region["polygon"].exterior.coords, dtype=np.int32)
centroid_x, centroid_y = int(region["polygon"].centroid.x), int(region["polygon"].centroid.y)
text_size, _ = cv2.getTextSize(
region_label, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.7, thickness=line_thickness
)
text_x = centroid_x - text_size[0] // 2
text_y = centroid_y + text_size[1] // 2
cv2.rectangle(
frame,
(text_x - 5, text_y - text_size[1] - 5),
(text_x + text_size[0] + 5, text_y + 5),
region_color,
-1,
)
cv2.putText(
frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color, line_thickness
)
cv2.polylines(frame, [polygon_coordinates], isClosed=True, color=region_color, thickness=region_thickness)
if view_img:
if vid_frame_count == 1:
cv2.namedWindow("Ultralytics YOLOv8 Region Counter Movable")
cv2.setMouseCallback("Ultralytics YOLOv8 Region Counter Movable", mouse_callback)
cv2.imshow("Ultralytics YOLOv8 Region Counter Movable", frame)
if save_img:
video_writer.write(frame)
for region in counting_regions: # Reinitialize count for each region
region["counts"] = 0
if cv2.waitKey(1) & 0xFF == ord("q"):
break
del vid_frame_count
video_writer.release()
videocapture.release()
cv2.destroyAllWindows()
def parse_opt():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
parser.add_argument("--line-thickness", type=int, default=2, help="bounding box thickness")
parser.add_argument("--track-thickness", type=int, default=2, help="Tracking line thickness")
parser.add_argument("--region-thickness", type=int, default=4, help="Region thickness")
return parser.parse_args()
def main(options):
"""Main function."""
run(**vars(options))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
# YOLO11 with SAHI (Inference on Video)
[SAHI](https://docs.ultralytics.com/guides/sahi-tiled-inference/) is designed to optimize object detection algorithms for large-scale and high-resolution imagery. It partitions images into manageable slices, performs object detection on each slice, and then stitches the results back together. This tutorial will guide you through the process of running YOLO11 inference on video files with the aid of SAHI.
## Table of Contents
- [Step 1: Install the Required Libraries](#step-1-install-the-required-libraries)
- [Step 2: Run the Inference with SAHI using Ultralytics YOLO11](#step-2-run-the-inference-with-sahi-using-ultralytics-yolo11)
- [Usage Options](#usage-options)
- [FAQ](#faq)
## Step 1: Install the Required Libraries
Clone the repository, install dependencies and `cd` to this local directory for commands in Step 2.
```bash
# Clone ultralytics repo
git clone https://github.com/ultralytics/ultralytics
# Install dependencies
pip install -U sahi ultralytics
# cd to local directory
cd ultralytics/examples/YOLOv8-SAHI-Inference-Video
```
## Step 2: Run the Inference with SAHI using Ultralytics YOLO11
Here are the basic commands for running the inference:
```bash
#if you want to save results
python yolov8_sahi.py --source "path/to/video.mp4" --save-img
#if you want to change model file
python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolo11n.pt"
```
## Usage Options
- `--source`: Specifies the path to the video file you want to run inference on.
- `--save-img`: Flag to save the detection results as images.
- `--weights`: Specifies a different YOLO11 model file (e.g., `yolo11n.pt`, `yolov8s.pt`, `yolo11m.pt`, `yolo11l.pt`, `yolo11x.pt`).
## FAQ
**1. What is SAHI?**
SAHI stands for Slicing Aided Hyper Inference. It is a library designed to optimize object detection algorithms for large-scale and high-resolution images. The library source code is available on [GitHub](https://github.com/obss/sahi).
**2. Why use SAHI with YOLO11?**
SAHI can handle large-scale images by slicing them into smaller, more manageable sizes without compromising the detection quality. This makes it a great companion to YOLO11, especially when working with high-resolution videos.
**3. How do I debug issues?**
You can add the `--debug` flag to your command to print out more information during inference:
```bash
python yolov8_sahi.py --source "path to video file" --debug
```
**4. Can I use other YOLO versions?**
Yes, you can specify different YOLO model weights using the `--weights` option.
**5. Where can I find more information?**
For a full guide to YOLO11 with SAHI see [https://docs.ultralytics.com/guides/sahi-tiled-inference](https://docs.ultralytics.com/guides/sahi-tiled-inference/).
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import argparse
from pathlib import Path
import cv2
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
from sahi.utils.ultralytics import download_yolo11n_model
from ultralytics.utils.files import increment_path
from ultralytics.utils.plotting import Annotator, colors
class SAHIInference:
"""Runs Ultralytics YOLO11 and SAHI for object detection on video with options to view, save, and track results."""
def __init__(self):
"""Initializes the SAHIInference class for performing sliced inference using SAHI with YOLO11 models."""
self.detection_model = None
def load_model(self, weights):
"""Loads a YOLO11 model with specified weights for object detection using SAHI."""
yolo11_model_path = f"models/{weights}"
download_yolo11n_model(yolo11_model_path)
self.detection_model = AutoDetectionModel.from_pretrained(
model_type="ultralytics", model_path=yolo11_model_path, device="cpu"
)
def inference(
self,
weights="yolo11n.pt",
source="test.mp4",
view_img=False,
save_img=False,
exist_ok=False,
):
"""
Run object detection on a video using YOLO11 and SAHI.
Args:
weights (str): Model weights path.
source (str): Video file path.
view_img (bool): Show results.
save_img (bool): Save results.
exist_ok (bool): Overwrite existing files.
"""
# Video setup
cap = cv2.VideoCapture(source)
assert cap.isOpened(), "Error reading video file"
frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
# Output setup
save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(
str(save_dir / f"{Path(source).stem}.avi"),
cv2.VideoWriter_fourcc(*"MJPG"),
int(cap.get(5)),
(frame_width, frame_height),
)
# Load model
self.load_model(weights)
while cap.isOpened():
success, frame = cap.read()
if not success:
break
annotator = Annotator(frame) # Initialize annotator for plotting detection and tracking results
results = get_sliced_prediction(
frame[..., ::-1],
self.detection_model,
slice_height=512,
slice_width=512,
)
detection_data = [
(det.category.name, det.category.id, (det.bbox.minx, det.bbox.miny, det.bbox.maxx, det.bbox.maxy))
for det in results.object_prediction_list
]
for det in detection_data:
annotator.box_label(det[2], label=str(det[0]), color=colors(int(det[1]), True))
if view_img:
cv2.imshow(Path(source).stem, frame)
if save_img:
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
video_writer.release()
cap.release()
cv2.destroyAllWindows()
def parse_opt(self):
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, default="yolo11n.pt", help="initial weights path")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
return parser.parse_args()
if __name__ == "__main__":
inference = SAHIInference()
inference.inference(**vars(inference.parse_opt()))
# YOLOv8-Segmentation-ONNXRuntime-Python Demo
This repository provides a Python demo for performing segmentation with YOLOv8 using ONNX Runtime, highlighting the interoperability of YOLOv8 models without the need for the full PyTorch stack.
## Features
- **Framework Agnostic**: Runs segmentation inference purely on ONNX Runtime without importing PyTorch.
- **Efficient Inference**: Supports both FP32 and FP16 precision for ONNX models, catering to different computational needs.
- **Ease of Use**: Utilizes simple command-line arguments for model execution.
- **Broad Compatibility**: Leverages Numpy and OpenCV for image processing, ensuring broad compatibility with various environments.
## Installation
Install the required packages using pip. You will need `ultralytics` for exporting YOLOv8-seg ONNX model and using some utility functions, `onnxruntime-gpu` for GPU-accelerated inference, and `opencv-python` for image processing.
```bash
pip install ultralytics
pip install onnxruntime-gpu # For GPU support
# pip install onnxruntime # Use this instead if you don't have an NVIDIA GPU
pip install numpy
pip install opencv-python
```
## Getting Started
### 1. Export the YOLOv8 ONNX Model
Export the YOLOv8 segmentation model to ONNX format using the provided `ultralytics` package.
```bash
yolo export model=yolov8s-seg.pt imgsz=640 format=onnx opset=12 simplify
```
### 2. Run Inference
Perform inference with the exported ONNX model on your images.
```bash
python main.py --model <MODEL_PATH> --source <IMAGE_PATH>
```
### Example Output
After running the command, you should see segmentation results similar to this:
<img src="https://user-images.githubusercontent.com/51357717/279988626-eb74823f-1563-4d58-a8e4-0494025b7c9a.jpg" alt="Segmentation Demo" width="800">
## Advanced Usage
For more advanced usage, including real-time video processing, please refer to the `main.py` script's command-line arguments.
## Contributing
We welcome contributions to improve this demo! Please submit issues and pull requests for bug reports, feature requests, or submitting a new algorithm enhancement.
## License
This project is licensed under the AGPL-3.0 License - see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for details.
## Acknowledgments
- The YOLOv8-Segmentation-ONNXRuntime-Python demo is contributed by GitHub user [jamjamjon](https://github.com/jamjamjon).
- Thanks to the ONNX Runtime community for providing a robust and efficient inference engine.
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import argparse
import cv2
import numpy as np
import onnxruntime as ort
from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml
from ultralytics.utils.plotting import Colors
class YOLOv8Seg:
"""YOLOv8 segmentation model."""
def __init__(self, onnx_model):
"""
Initialization.
Args:
onnx_model (str): Path to the ONNX model.
"""
# Build Ort session
self.session = ort.InferenceSession(
onnx_model,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
if ort.get_device() == "GPU"
else ["CPUExecutionProvider"],
)
# Numpy dtype: support both FP32 and FP16 onnx model
self.ndtype = np.half if self.session.get_inputs()[0].type == "tensor(float16)" else np.single
# Get model width and height(YOLOv8-seg only has one input)
self.model_height, self.model_width = [x.shape for x in self.session.get_inputs()][0][-2:]
# Load COCO class names
self.classes = yaml_load(check_yaml("coco8.yaml"))["names"]
# Create color palette
self.color_palette = Colors()
def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32):
"""
The whole pipeline: pre-process -> inference -> post-process.
Args:
im0 (Numpy.ndarray): original input image.
conf_threshold (float): confidence threshold for filtering predictions.
iou_threshold (float): iou threshold for NMS.
nm (int): the number of masks.
Returns:
boxes (List): list of bounding boxes.
segments (List): list of segments.
masks (np.ndarray): [N, H, W], output masks.
"""
# Pre-process
im, ratio, (pad_w, pad_h) = self.preprocess(im0)
# Ort inference
preds = self.session.run(None, {self.session.get_inputs()[0].name: im})
# Post-process
boxes, segments, masks = self.postprocess(
preds,
im0=im0,
ratio=ratio,
pad_w=pad_w,
pad_h=pad_h,
conf_threshold=conf_threshold,
iou_threshold=iou_threshold,
nm=nm,
)
return boxes, segments, masks
def preprocess(self, img):
"""
Pre-processes the input image.
Args:
img (Numpy.ndarray): image about to be processed.
Returns:
img_process (Numpy.ndarray): image preprocessed for inference.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
"""
# Resize and pad input image using letterbox() (Borrowed from Ultralytics)
shape = img.shape[:2] # original image shape
new_shape = (self.model_height, self.model_width)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
img = np.ascontiguousarray(np.einsum("HWC->CHW", img)[::-1], dtype=self.ndtype) / 255.0
img_process = img[None] if len(img.shape) == 3 else img
return img_process, ratio, (pad_w, pad_h)
def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32):
"""
Post-process the prediction.
Args:
preds (Numpy.ndarray): predictions come from ort.session.run().
im0 (Numpy.ndarray): [h, w, c] original input image.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
conf_threshold (float): conf threshold.
iou_threshold (float): iou threshold.
nm (int): the number of masks.
Returns:
boxes (List): list of bounding boxes.
segments (List): list of segments.
masks (np.ndarray): [N, H, W], output masks.
"""
x, protos = preds[0], preds[1] # Two outputs: predictions and protos
# Transpose dim 1: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
x = np.einsum("bcn->bnc", x)
# Predictions filtering by conf-threshold
x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]
# Create a new matrix which merge these(box, score, cls, nm) into one
# For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]]
# NMS filtering
x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
# Decode and return
if len(x) > 0:
# Bounding boxes format change: cxcywh -> xyxy
x[..., [0, 1]] -= x[..., [2, 3]] / 2
x[..., [2, 3]] += x[..., [0, 1]]
# Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
x[..., :4] /= min(ratio)
# Bounding boxes boundary clamp
x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
# Process masks
masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape)
# Masks -> Segments(contours)
segments = self.masks2segments(masks)
return x[..., :6], segments, masks # boxes, segments, masks
else:
return [], [], []
@staticmethod
def masks2segments(masks):
"""
Takes a list of masks(n,h,w) and returns a list of segments(n,xy), from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160).
Returns:
segments (List): list of segment masks.
"""
segments = []
for x in masks.astype("uint8"):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE
if c:
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # no segments found
segments.append(c.astype("float32"))
return segments
@staticmethod
def crop_mask(masks, boxes):
"""
Takes a mask and a bounding box, and returns a mask that is cropped to the bounding box, from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
masks (Numpy.ndarray): [n, h, w] tensor of masks.
boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form.
Returns:
(Numpy.ndarray): The masks are being cropped to the bounding box.
"""
n, h, w = masks.shape
x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
r = np.arange(w, dtype=x1.dtype)[None, None, :]
c = np.arange(h, dtype=x1.dtype)[None, :, None]
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
def process_mask(self, protos, masks_in, bboxes, im0_shape):
"""
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
quality but is slower, from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
protos (numpy.ndarray): [mask_dim, mask_h, mask_w].
masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms.
bboxes (numpy.ndarray): bboxes re-scaled to original image shape.
im0_shape (tuple): the size of the input image (h,w,c).
Returns:
(numpy.ndarray): The upsampled masks.
"""
c, mh, mw = protos.shape
masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN
masks = np.ascontiguousarray(masks)
masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape
masks = np.einsum("HWN -> NHW", masks) # HWN -> NHW
masks = self.crop_mask(masks, bboxes)
return np.greater(masks, 0.5)
@staticmethod
def scale_mask(masks, im0_shape, ratio_pad=None):
"""
Takes a mask, and resizes it to the original image size, from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
im0_shape (tuple): the original image shape.
ratio_pad (tuple): the ratio of the padding to the original image.
Returns:
masks (np.ndarray): The masks that are being returned.
"""
im1_shape = masks.shape[:2]
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
else:
pad = ratio_pad[1]
# Calculate tlbr of mask
top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, x
bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
masks = cv2.resize(
masks, (im0_shape[1], im0_shape[0]), interpolation=cv2.INTER_LINEAR
) # INTER_CUBIC would be better
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True):
"""
Draw and visualize results.
Args:
im (np.ndarray): original image, shape [h, w, c].
bboxes (numpy.ndarray): [n, 4], n is number of bboxes.
segments (List): list of segment masks.
vis (bool): imshow using OpenCV.
save (bool): save image annotated.
Returns:
None
"""
# Draw rectangles and polygons
im_canvas = im.copy()
for (*box, conf, cls_), segment in zip(bboxes, segments):
# draw contour and fill mask
cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2) # white borderline
cv2.fillPoly(im_canvas, np.int32([segment]), self.color_palette(int(cls_), bgr=True))
# draw bbox rectangle
cv2.rectangle(
im,
(int(box[0]), int(box[1])),
(int(box[2]), int(box[3])),
self.color_palette(int(cls_), bgr=True),
1,
cv2.LINE_AA,
)
cv2.putText(
im,
f"{self.classes[cls_]}: {conf:.3f}",
(int(box[0]), int(box[1] - 9)),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
self.color_palette(int(cls_), bgr=True),
2,
cv2.LINE_AA,
)
# Mix image
im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)
# Show image
if vis:
cv2.imshow("demo", im)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Save image
if save:
cv2.imwrite("demo.jpg", im)
if __name__ == "__main__":
# Create an argument parser to handle command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, required=True, help="Path to ONNX model")
parser.add_argument("--source", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image")
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
args = parser.parse_args()
# Build model
model = YOLOv8Seg(args.model)
# Read image by OpenCV
img = cv2.imread(args.source)
# Inference
boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
# Draw bboxes and polygons
if len(boxes) > 0:
model.draw_and_visualize(img, boxes, segments, vis=False, save=True)
# YOLOv8 - TFLite Runtime
This example shows how to run inference with YOLOv8 TFLite model. It supports FP32, FP16 and INT8 models.
## Installation
### Installing `tflite-runtime`
To load TFLite models, install the `tflite-runtime` package using:
```bash
pip install tflite-runtime
```
### Installing `tensorflow-gpu` (For NVIDIA GPU Users)
Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`:
```bash
pip install tensorflow-gpu
```
**Note:** Ensure you have compatible GPU drivers installed on your system.
### Installing `tensorflow` (CPU Version)
For CPU usage or non-NVIDIA GPUs, install TensorFlow with:
```bash
pip install tensorflow
```
## Usage
Follow these instructions to run YOLOv8 after successful installation.
Convert the YOLOv8 model to TFLite format:
```bash
yolo export model=yolov8n.pt imgsz=640 format=tflite int8
```
Locate the TFLite model in `yolov8n_saved_model`. Then, execute the following in your terminal:
```bash
python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf 0.25 --iou 0.45 --metadata "metadata.yaml"
```
Replace `best_full_integer_quant.tflite` with the TFLite model path, `image.jpg` with the input image path, `metadata.yaml` with the one generated by `ultralytics` during export, and adjust the confidence (conf) and IoU thresholds (iou) as necessary.
### Output
The output would show the detections along with the class labels and confidences of each detected object.
![image](https://github.com/wamiqraza/Attribute-recognition-and-reidentification-Market1501-dataset/blob/main/img/bus.jpg)
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import argparse
from typing import Tuple, Union
import cv2
import numpy as np
import tensorflow as tf
import yaml
from ultralytics.utils import ASSETS
try:
from tflite_runtime.interpreter import Interpreter
except ImportError:
import tensorflow as tf
Interpreter = tf.lite.Interpreter
class YOLOv8TFLite:
"""
YOLOv8TFLite.
A class for performing object detection using the YOLOv8 model with TensorFlow Lite.
Attributes:
model (str): Path to the TensorFlow Lite model file.
conf (float): Confidence threshold for filtering detections.
iou (float): Intersection over Union threshold for non-maximum suppression.
metadata (Optional[str]): Path to the metadata file, if any.
Methods:
detect(img_path: str) -> np.ndarray:
Performs inference and returns the output image with drawn detections.
"""
def __init__(self, model: str, conf: float = 0.25, iou: float = 0.45, metadata: Union[str, None] = None):
"""
Initializes an instance of the YOLOv8TFLite class.
Args:
model (str): Path to the TFLite model.
conf (float, optional): Confidence threshold for filtering detections. Defaults to 0.25.
iou (float, optional): IoU (Intersection over Union) threshold for non-maximum suppression. Defaults to 0.45.
metadata (Union[str, None], optional): Path to the metadata file or None if not used. Defaults to None.
"""
self.conf = conf
self.iou = iou
if metadata is None:
self.classes = {i: i for i in range(1000)}
else:
with open(metadata) as f:
self.classes = yaml.safe_load(f)["names"]
np.random.seed(42)
self.color_palette = np.random.uniform(128, 255, size=(len(self.classes), 3))
self.model = Interpreter(model_path=model)
self.model.allocate_tensors()
input_details = self.model.get_input_details()[0]
self.in_width, self.in_height = input_details["shape"][1:3]
self.in_index = input_details["index"]
self.in_scale, self.in_zero_point = input_details["quantization"]
self.int8 = input_details["dtype"] == np.int8
output_details = self.model.get_output_details()[0]
self.out_index = output_details["index"]
self.out_scale, self.out_zero_point = output_details["quantization"]
def letterbox(self, img: np.ndarray, new_shape: Tuple = (640, 640)) -> Tuple[np.ndarray, Tuple[float, float]]:
"""Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models."""
shape = img.shape[:2] # current shape [height, width]
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
return img, (top / img.shape[0], left / img.shape[1])
def draw_detections(self, img: np.ndarray, box: np.ndarray, score: np.float32, class_id: int) -> None:
"""
Draws bounding boxes and labels on the input image based on the detected objects.
Args:
img (np.ndarray): The input image to draw detections on.
box (np.ndarray): Detected bounding box in the format [x1, y1, width, height].
score (np.float32): Corresponding detection score.
class_id (int): Class ID for the detected object.
Returns:
None
"""
x1, y1, w, h = box
color = self.color_palette[class_id]
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
label = f"{self.classes[class_id]}: {score:.2f}"
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_x = x1
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
cv2.rectangle(
img,
(int(label_x), int(label_y - label_height)),
(int(label_x + label_width), int(label_y + label_height)),
color,
cv2.FILLED,
)
cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
def preprocess(self, img: np.ndarray) -> Tuple[np.ndarray, Tuple[float, float]]:
"""
Preprocesses the input image before performing inference.
Args:
img (np.ndarray): The input image to be preprocessed.
Returns:
Tuple[np.ndarray, Tuple[float, float]]: A tuple containing:
- The preprocessed image (np.ndarray).
- A tuple of two float values representing the padding applied (top/bottom, left/right).
"""
img, pad = self.letterbox(img, (self.in_width, self.in_height))
img = img[..., ::-1][None] # N,H,W,C for TFLite
img = np.ascontiguousarray(img)
img = img.astype(np.float32)
return img / 255, pad
def postprocess(self, img: np.ndarray, outputs: np.ndarray, pad: Tuple[float, float]) -> np.ndarray:
"""
Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
Args:
img (numpy.ndarray): The input image.
outputs (numpy.ndarray): The output of the model.
pad (Tuple[float, float]): Padding used by letterbox.
Returns:
numpy.ndarray: The input image with detections drawn on it.
"""
outputs[:, 0] -= pad[1]
outputs[:, 1] -= pad[0]
outputs[:, :4] *= max(img.shape)
outputs = outputs.transpose(0, 2, 1)
outputs[..., 0] -= outputs[..., 2] / 2
outputs[..., 1] -= outputs[..., 3] / 2
for out in outputs:
scores = out[:, 4:].max(-1)
keep = scores > self.conf
boxes = out[keep, :4]
scores = scores[keep]
class_ids = out[keep, 4:].argmax(-1)
indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf, self.iou).flatten()
[self.draw_detections(img, boxes[i], scores[i], class_ids[i]) for i in indices]
return img
def detect(self, img_path: str) -> np.ndarray:
"""
Performs inference using a TFLite model and returns the output image with drawn detections.
Args:
img_path (str): The path to the input image file.
Returns:
np.ndarray: The output image with drawn detections.
"""
img = cv2.imread(img_path)
x, pad = self.preprocess(img)
if self.int8:
x = (x / self.in_scale + self.in_zero_point).astype(np.int8)
self.model.set_tensor(self.in_index, x)
self.model.invoke()
y = self.model.get_tensor(self.out_index)
if self.int8:
y = (y.astype(np.float32) - self.out_zero_point) * self.out_scale
return self.postprocess(img, y, pad)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--model",
type=str,
default="yolov8n_saved_model/yolov8n_full_integer_quant.tflite",
help="Path to TFLite model.",
)
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image")
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
parser.add_argument("--metadata", type=str, default="yolov8n_saved_model/metadata.yaml", help="Metadata yaml")
args = parser.parse_args()
detector = YOLOv8TFLite(args.model, args.conf, args.iou, args.metadata)
result = detector.detect(str(ASSETS / "bus.jpg"))
cv2.imshow("Output", result)
cv2.waitKey(0)
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "PN1cAxdvd61e"
},
"source": [
"<div align=\"center\">\n",
"\n",
" <a href=\"https://ultralytics.com/yolo\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/heatmaps.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
" <a href=\"https://www.kaggle.com/models/ultralytics/yolo11\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
" <a href=\"https://ultralytics.com/discord\"><img alt=\"Discord\" src=\"https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue\"></a>\n",
"\n",
"Welcome to the Ultralytics YOLO11 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLO11</a> is the latest version of the YOLO (You Only Look Once) AI models developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
"YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
"We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 <a href=\"https://docs.ultralytics.com/guides/heatmaps\">Heatmap Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
"\n",
"</div>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "o68Sg1oOeZm2"
},
"source": [
"# Setup\n",
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
"[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9dSwz_uOReMI",
"outputId": "99866c77-e210-41e1-d581-8508371ce634"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ultralytics 8.2.17 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete ✅ (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n"
]
}
],
"source": [
"%pip install ultralytics\n",
"import ultralytics\n",
"\n",
"ultralytics.checks()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "m7VkxQ2aeg7k"
},
"source": [
"# Introduction to Heatmaps\n",
"\n",
"A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.\n",
"\n",
"## Real World Applications\n",
"\n",
"| Transportation | Retail |\n",
"|:-----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------:|\n",
"| ![Ultralytics YOLO11 Transportation Heatmap](https://github.com/RizwanMunawar/ultralytics/assets/62513924/288d7053-622b-4452-b4e4-1f41aeb764aa) | ![Ultralytics YOLO11 Retail Heatmap](https://github.com/RizwanMunawar/ultralytics/assets/62513924/edef75ad-50a7-4c0a-be4a-a66cdfc12802) |\n",
"| Ultralytics YOLO11 Transportation Heatmap | Ultralytics YOLO11 Retail Heatmap |\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Cx-u59HQdu2o"
},
"outputs": [],
"source": [
"import cv2\n",
"\n",
"from ultralytics import solutions\n",
"\n",
"# Open video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
"assert cap.isOpened(), \"Error reading video file\"\n",
"\n",
"# Get video properties\n",
"w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))\n",
"\n",
"# Initialize video writer\n",
"video_writer = cv2.VideoWriter(\"heatmap_output.avi\", cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (w, h))\n",
"\n",
"# Initialize heatmap object\n",
"heatmap_obj = solutions.Heatmap(\n",
" colormap=cv2.COLORMAP_PARULA, # Color of the heatmap\n",
" show=True, # Display the image during processing\n",
" model=\"yolo11n.pt\", # Ultralytics YOLO11 model file\n",
")\n",
"\n",
"while cap.isOpened():\n",
" success, im0 = cap.read()\n",
" if not success:\n",
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
" # Generate heatmap on the frame\n",
" im0 = heatmap_obj.generate_heatmap(im0)\n",
"\n",
" # Write the frame to the output video\n",
" video_writer.write(im0)\n",
"\n",
"# Release resources\n",
"cap.release()\n",
"video_writer.release()\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QrlKg-y3fEyD"
},
"source": [
"# Additional Resources\n",
"\n",
"## Community Support\n",
"\n",
"For more information on using heatmaps with Ultralytics, you can explore the comprehensive [Ultralytics Heatmaps Docs](https://docs.ultralytics.com/guides/heatmaps/). This guide covers everything from basic concepts to advanced techniques, ensuring you get the most out of your heatmap visualizations.\n",
"\n",
"## Ultralytics ⚡ Resources\n",
"\n",
"At Ultralytics, we are committed to providing cutting-edge AI solutions. Here are some key resources to learn more about our company and get involved with our community:\n",
"\n",
"- [Ultralytics HUB](https://ultralytics.com/hub): Simplify your AI projects with Ultralytics HUB, our no-code tool for effortless YOLO training and deployment.\n",
"- [Ultralytics Licensing](https://ultralytics.com/license): Review our licensing terms to understand how you can use our software in your projects.\n",
"- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n",
"- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n",
"\n",
"## YOLO11 🚀 Resources\n",
"\n",
"YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n",
"\n",
"- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
"- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n",
"- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n",
"\n",
"These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "FIzICjaph_Wy"
},
"source": [
"<a align=\"center\" href=\"https://ultralytics.com/hub\" target=\"_blank\">\n",
"<img width=\"1024\", src=\"https://github.com/ultralytics/assets/raw/main/im/ultralytics-hub.png\"></a>\n",
"\n",
"<div align=\"center\">\n",
"\n",
"[中文](https://docs.ultralytics.com/zh/hub/) | [한국어](https://docs.ultralytics.com/ko/hub/) | [日本語](https://docs.ultralytics.com/ja/hub/) | [Русский](https://docs.ultralytics.com/ru/hub/) | [Deutsch](https://docs.ultralytics.com/de/hub/) | [Français](https://docs.ultralytics.com/fr/hub/) | [Español](https://docs.ultralytics.com/es/hub/) | [Português](https://docs.ultralytics.com/pt/hub/) | [Türkçe](https://docs.ultralytics.com/tr/hub/) | [Tiếng Việt](https://docs.ultralytics.com/vi/hub/) | [العربية](https://docs.ultralytics.com/ar/hub/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/hub/actions/workflows/ci.yml\"><img src=\"https://github.com/ultralytics/hub/actions/workflows/ci.yml/badge.svg\" alt=\"CI CPU\"></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/hub/blob/main/hub.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
"\n",
" <a href=\"https://ultralytics.com/discord\"><img alt=\"Discord\" src=\"https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue\"></a>\n",
" <a href=\"https://community.ultralytics.com\"><img alt=\"Ultralytics Forums\" src=\"https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue\"></a>\n",
" <a href=\"https://reddit.com/r/ultralytics\"><img alt=\"Ultralytics Reddit\" src=\"https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue\"></a>\n",
"\n",
"Welcome to the [Ultralytics](https://ultralytics.com/) HUB notebook!\n",
"\n",
"This notebook allows you to train Ultralytics [YOLO](https://github.com/ultralytics/ultralytics) 🚀 models using [HUB](https://hub.ultralytics.com/). Please browse the HUB <a href=\"https://docs.ultralytics.com/hub/\">Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/hub/issues/new/choose\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
"</div>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eRQ2ow94MiOv"
},
"source": [
"# Setup\n",
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
"[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FyDnXd-n4c7Y",
"outputId": "e1d713ec-e8a6-4422-fe61-c76ec9f03df5"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ultralytics 8.2.3 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete ✅ (2 CPUs, 12.7 GB RAM, 28.8/78.2 GB disk)\n"
]
}
],
"source": [
"%pip install ultralytics # install\n",
"from ultralytics import YOLO, checks, hub\n",
"\n",
"checks() # checks"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "cQ9BwaAqxAm4"
},
"source": [
"# Start\n",
"\n",
"⚡ Login with your API key, load your YOLO 🚀 model and start training in 3 lines of code!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XSlZaJ9Iw_iZ"
},
"outputs": [],
"source": [
"# Log in to HUB using your API key (https://hub.ultralytics.com/settings?tab=api+keys)\n",
"hub.login(\"YOUR_API_KEY\")\n",
"\n",
"# Load your model from HUB (replace 'YOUR_MODEL_ID' with your model ID)\n",
"model = YOLO(\"https://hub.ultralytics.com/models/YOUR_MODEL_ID\")\n",
"\n",
"# Train the model\n",
"results = model.train()"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"name": "Ultralytics HUB",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "PN1cAxdvd61e"
},
"source": [
"<div align=\"center\">\n",
"\n",
" <a href=\"https://ultralytics.com/yolo\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/object_counting.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
" <a href=\"https://www.kaggle.com/models/ultralytics/yolo11\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
" <a href=\"https://ultralytics.com/discord\"><img alt=\"Discord\" src=\"https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue\"></a>\n",
"\n",
"Welcome to the Ultralytics YOLO11 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLO11</a> is the latest version of the YOLO (You Only Look Once) AI models developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
"YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
"We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 <a href=\"https://docs.ultralytics.com/guides/object-counting/\"> Object Counting Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
"\n",
"</div>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "o68Sg1oOeZm2"
},
"source": [
"# Setup\n",
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
"[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9dSwz_uOReMI",
"outputId": "fd3bab88-2f25-46c0-cae9-04d2beedc0c1"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ultralytics 8.2.18 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete ✅ (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n"
]
}
],
"source": [
"%pip install ultralytics\n",
"import ultralytics\n",
"\n",
"ultralytics.checks()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "m7VkxQ2aeg7k"
},
"source": [
"# Object Counting using Ultralytics YOLO11 🚀\n",
"\n",
"## What is Object Counting?\n",
"\n",
"Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLO11 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and deep learning capabilities.\n",
"\n",
"## Advantages of Object Counting?\n",
"\n",
"- **Resource Optimization:** Object counting facilitates efficient resource management by providing accurate counts, and optimizing resource allocation in applications like inventory management.\n",
"- **Enhanced Security:** Object counting enhances security and surveillance by accurately tracking and counting entities, aiding in proactive threat detection.\n",
"- **Informed Decision-Making:** Object counting offers valuable insights for decision-making, optimizing processes in retail, traffic management, and various other domains.\n",
"\n",
"## Real World Applications\n",
"\n",
"| Logistics | Aquaculture |\n",
"|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------:|\n",
"| ![Conveyor Belt Packets Counting Using Ultralytics YOLO11](https://github.com/RizwanMunawar/ultralytics/assets/62513924/70e2d106-510c-4c6c-a57a-d34a765aa757) | ![Fish Counting in Sea using Ultralytics YOLO11](https://github.com/RizwanMunawar/ultralytics/assets/62513924/c60d047b-3837-435f-8d29-bb9fc95d2191) |\n",
"| Conveyor Belt Packets Counting Using Ultralytics YOLO11 | Fish Counting in Sea using Ultralytics YOLO11 |\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Cx-u59HQdu2o"
},
"outputs": [],
"source": [
"import cv2\n",
"\n",
"from ultralytics import solutions\n",
"\n",
"# Open the video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
"assert cap.isOpened(), \"Error reading video file\"\n",
"\n",
"# Get video properties: width, height, and frames per second (fps)\n",
"w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))\n",
"\n",
"# Define points for a line or region of interest in the video frame\n",
"line_points = [(20, 400), (1080, 400)] # Line coordinates\n",
"\n",
"# Initialize the video writer to save the output video\n",
"video_writer = cv2.VideoWriter(\"object_counting_output.avi\", cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (w, h))\n",
"\n",
"# Initialize the Object Counter with visualization options and other parameters\n",
"counter = solutions.ObjectCounter(\n",
" show=True, # Display the image during processing\n",
" region=line_points, # Region of interest points\n",
" model=\"yolo11n.pt\", # Ultralytics YOLO11 model file\n",
" line_width=2, # Thickness of the lines and bounding boxes\n",
")\n",
"\n",
"# Process video frames in a loop\n",
"while cap.isOpened():\n",
" success, im0 = cap.read()\n",
" if not success:\n",
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
" # Use the Object Counter to count objects in the frame and get the annotated image\n",
" im0 = counter.count(im0)\n",
"\n",
" # Write the annotated frame to the output video\n",
" video_writer.write(im0)\n",
"\n",
"# Release the video capture and writer objects\n",
"cap.release()\n",
"video_writer.release()\n",
"\n",
"# Close all OpenCV windows\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QrlKg-y3fEyD"
},
"source": [
"# Additional Resources\n",
"\n",
"## Community Support\n",
"\n",
"For more information on counting objects with Ultralytics, you can explore the comprehensive [Ultralytics Object Counting Docs](https://docs.ultralytics.com/guides/object-counting/). This guide covers everything from basic concepts to advanced techniques, ensuring you get the most out of counting and visualization.\n",
"\n",
"## Ultralytics ⚡ Resources\n",
"\n",
"At Ultralytics, we are committed to providing cutting-edge AI solutions. Here are some key resources to learn more about our company and get involved with our community:\n",
"\n",
"- [Ultralytics HUB](https://ultralytics.com/hub): Simplify your AI projects with Ultralytics HUB, our no-code tool for effortless YOLO training and deployment.\n",
"- [Ultralytics Licensing](https://ultralytics.com/license): Review our licensing terms to understand how you can use our software in your projects.\n",
"- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n",
"- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n",
"\n",
"## YOLO11 🚀 Resources\n",
"\n",
"YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n",
"\n",
"- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
"- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n",
"- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n",
"\n",
"These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "PN1cAxdvd61e"
},
"source": [
"<div align=\"center\">\n",
"\n",
" <a href=\"https://ultralytics.com/yolo\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/object_tracking.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
" <a href=\"https://www.kaggle.com/models/ultralytics/yolo11\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
" <a href=\"https://ultralytics.com/discord\"><img alt=\"Discord\" src=\"https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue\"></a>\n",
"\n",
"Welcome to the Ultralytics YOLO11 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLO11</a> is the latest version of the YOLO (You Only Look Once) AI models developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
"YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
"We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 <a href=\"https://docs.ultralytics.com/modes/track/\"> Tracking Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
"\n",
"</div>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "o68Sg1oOeZm2"
},
"source": [
"# Setup\n",
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
"[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9dSwz_uOReMI",
"outputId": "ed8c2370-8fc7-4e4e-f669-d0bae4d944e9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ultralytics 8.2.17 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete ✅ (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n"
]
}
],
"source": [
"%pip install ultralytics\n",
"import ultralytics\n",
"\n",
"ultralytics.checks()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "m7VkxQ2aeg7k"
},
"source": [
"# Ultralytics Object Tracking\n",
"\n",
"[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike semantic segmentation, it uniquely labels and precisely delineates each object, crucial for tasks like object detection and medical imaging.\n",
"\n",
"There are two types of instance segmentation tracking available in the Ultralytics package:\n",
"\n",
"- **Instance Segmentation with Class Objects:** Each class object is assigned a unique color for clear visual separation.\n",
"\n",
"- **Instance Segmentation with Object Tracks:** Every track is represented by a distinct color, facilitating easy identification and tracking.\n",
"\n",
"## Samples\n",
"\n",
"| Instance Segmentation | Instance Segmentation + Object Tracking |\n",
"|:---------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------:|\n",
"| ![Ultralytics Instance Segmentation](https://github.com/RizwanMunawar/ultralytics/assets/62513924/d4ad3499-1f33-4871-8fbc-1be0b2643aa2) | ![Ultralytics Instance Segmentation with Object Tracking](https://github.com/RizwanMunawar/ultralytics/assets/62513924/2e5c38cc-fd5c-4145-9682-fa94ae2010a0) |\n",
"| Ultralytics Instance Segmentation 😍 | Ultralytics Instance Segmentation with Object Tracking 🔥 |"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-ZF9DM6e6gz0"
},
"source": [
"## CLI\n",
"\n",
"Command-Line Interface (CLI) example."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-XJqhOwo6iqT"
},
"outputs": [],
"source": [
"!yolo track source=\"/path/to/video/file.mp4\" save=True"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XRcw0vIE6oNb"
},
"source": [
"## Python\n",
"\n",
"Python Instance Segmentation and Object tracking example."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Cx-u59HQdu2o"
},
"outputs": [],
"source": [
"from collections import defaultdict\n",
"\n",
"import cv2\n",
"\n",
"from ultralytics import YOLO\n",
"from ultralytics.utils.plotting import Annotator, colors\n",
"\n",
"# Dictionary to store tracking history with default empty lists\n",
"track_history = defaultdict(lambda: [])\n",
"\n",
"# Load the YOLO model with segmentation capabilities\n",
"model = YOLO(\"yolo11n-seg.pt\")\n",
"\n",
"# Open the video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
"\n",
"# Retrieve video properties: width, height, and frames per second\n",
"w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))\n",
"\n",
"# Initialize video writer to save the output video with the specified properties\n",
"out = cv2.VideoWriter(\"instance-segmentation-object-tracking.avi\", cv2.VideoWriter_fourcc(*\"MJPG\"), fps, (w, h))\n",
"\n",
"while True:\n",
" # Read a frame from the video\n",
" ret, im0 = cap.read()\n",
" if not ret:\n",
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
" # Create an annotator object to draw on the frame\n",
" annotator = Annotator(im0, line_width=2)\n",
"\n",
" # Perform object tracking on the current frame\n",
" results = model.track(im0, persist=True)\n",
"\n",
" # Check if tracking IDs and masks are present in the results\n",
" if results[0].boxes.id is not None and results[0].masks is not None:\n",
" # Extract masks and tracking IDs\n",
" masks = results[0].masks.xy\n",
" track_ids = results[0].boxes.id.int().cpu().tolist()\n",
"\n",
" # Annotate each mask with its corresponding tracking ID and color\n",
" for mask, track_id in zip(masks, track_ids):\n",
" annotator.seg_bbox(mask=mask, mask_color=colors(int(track_id), True), label=str(track_id))\n",
"\n",
" # Write the annotated frame to the output video\n",
" out.write(im0)\n",
" # Display the annotated frame\n",
" cv2.imshow(\"instance-segmentation-object-tracking\", im0)\n",
"\n",
" # Exit the loop if 'q' is pressed\n",
" if cv2.waitKey(1) & 0xFF == ord(\"q\"):\n",
" break\n",
"\n",
"# Release the video writer and capture objects, and close all OpenCV windows\n",
"out.release()\n",
"cap.release()\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QrlKg-y3fEyD"
},
"source": [
"# Additional Resources\n",
"\n",
"## Community Support\n",
"\n",
"For more information on using tracking with Ultralytics, you can explore the comprehensive [Ultralytics Tracking Docs](https://docs.ultralytics.com/modes/track/). This guide covers everything from basic concepts to advanced techniques, ensuring you get the most out of tracking and visualization.\n",
"\n",
"## Ultralytics ⚡ Resources\n",
"\n",
"At Ultralytics, we are committed to providing cutting-edge AI solutions. Here are some key resources to learn more about our company and get involved with our community:\n",
"\n",
"- [Ultralytics HUB](https://ultralytics.com/hub): Simplify your AI projects with Ultralytics HUB, our no-code tool for effortless YOLO training and deployment.\n",
"- [Ultralytics Licensing](https://ultralytics.com/license): Review our licensing terms to understand how you can use our software in your projects.\n",
"- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n",
"- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n",
"\n",
"## YOLO11 🚀 Resources\n",
"\n",
"YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n",
"\n",
"- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
"- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n",
"- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n",
"\n",
"These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "YOLO11 Tutorial",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "t6MPjfT5NrKQ"
},
"source": [
"<div align=\"center\">\n",
"\n",
" <a href=\"https://ultralytics.com/yolo\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
" <a href=\"https://www.kaggle.com/models/ultralytics/yolo11\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
"\n",
" <a href=\"https://ultralytics.com/discord\"><img alt=\"Discord\" src=\"https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue\"></a>\n",
" <a href=\"https://community.ultralytics.com\"><img alt=\"Ultralytics Forums\" src=\"https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue\"></a>\n",
" <a href=\"https://reddit.com/r/ultralytics\"><img alt=\"Ultralytics Reddit\" src=\"https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue\"></a>\n",
"\n",
"Welcome to the Ultralytics YOLO11 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLO11</a> is the latest version of the YOLO (You Only Look Once) AI models developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
"YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
"We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 <a href=\"https://docs.ultralytics.com/\">Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
"\n",
" <a href=\"https://www.youtube.com/watch?v=ZN3nRZT7b24\" target=\"_blank\">\n",
" <img src=\"https://img.youtube.com/vi/ZN3nRZT7b24/maxresdefault.jpg\" alt=\"Ultralytics Video\" width=\"720\" style=\"border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);\"></a>\n",
" \n",
" <p style=\"font-size: 16px; font-family: Arial, sans-serif; color: #555;\">\n",
" <strong>Watch: </strong> How to Train\n",
" <a href=\"https://github.com/ultralytics/ultralytics\">Ultralytics</a>\n",
" <a href=\"https://docs.ultralytics.com/models/yolo11/\">YOLO11</a> Model on Custom Dataset using Google Colab Notebook 🚀</p>\n",
"</div>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7mGmQbAO5pQb"
},
"source": [
"# Setup\n",
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
"[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
]
},
{
"cell_type": "code",
"metadata": {
"id": "wbvMlHd_QwMG",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2e992f9f-90bb-4668-de12-fed629975285"
},
"source": [
"%pip install ultralytics\n",
"import ultralytics\n",
"ultralytics.checks()"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Ultralytics 8.3.2 🚀 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
"Setup complete ✅ (2 CPUs, 12.7 GB RAM, 41.1/112.6 GB disk)\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4JnkELT0cIJg"
},
"source": [
"# 1. Predict\n",
"\n",
"YOLO11 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLO11 Predict Docs](https://docs.ultralytics.com/modes/train/).\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "zR9ZbuQCH7FX",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e3ebec6f-658a-4803-d80c-e07d12908767"
},
"source": [
"# Run inference on an image with YOLO11n\n",
"!yolo predict model=yolo11n.pt source='https://ultralytics.com/images/zidane.jpg'"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...\n",
"100% 5.35M/5.35M [00:00<00:00, 72.7MB/s]\n",
"Ultralytics 8.3.2 🚀 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
"YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
"\n",
"Downloading https://ultralytics.com/images/zidane.jpg to 'zidane.jpg'...\n",
"100% 49.2k/49.2k [00:00<00:00, 5.37MB/s]\n",
"image 1/1 /content/zidane.jpg: 384x640 2 persons, 1 tie, 63.4ms\n",
"Speed: 14.5ms preprocess, 63.4ms inference, 820.9ms postprocess per image at shape (1, 3, 384, 640)\n",
"Results saved to \u001b[1mruns/detect/predict\u001b[0m\n",
"💡 Learn more at https://docs.ultralytics.com/modes/predict\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hkAzDWJ7cWTr"
},
"source": [
"&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\n",
"<img align=\"left\" src=\"https://user-images.githubusercontent.com/26833433/212889447-69e5bdf1-5800-4e29-835e-2ed2336dede2.jpg\" width=\"600\">"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0eq1SMWl6Sfn"
},
"source": [
"# 2. Val\n",
"Validate a model's accuracy on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset's `val` or `test` splits. The latest YOLO11 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLO11 Val Docs](https://docs.ultralytics.com/modes/val/) for more information."
]
},
{
"cell_type": "code",
"metadata": {
"id": "WQPtK1QYVaD_"
},
"source": [
"# Download COCO val\n",
"import torch\n",
"torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip') # download (780M - 5000 images)\n",
"!unzip -q tmp.zip -d datasets && rm tmp.zip # unzip"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "X58w8JLpMnjH",
"outputId": "af2a5deb-029b-466d-96a4-bd3e406987fa",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"source": [
"# Validate YOLO11n on COCO8 val\n",
"!yolo val model=yolo11n.pt data=coco8.yaml"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Ultralytics 8.3.2 🚀 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
"YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
"\n",
"Dataset 'coco8.yaml' images not found ⚠️, missing path '/content/datasets/coco8/images/val'\n",
"Downloading https://ultralytics.com/assets/coco8.zip to '/content/datasets/coco8.zip'...\n",
"100% 433k/433k [00:00<00:00, 15.8MB/s]\n",
"Unzipping /content/datasets/coco8.zip to /content/datasets/coco8...: 100% 25/25 [00:00<00:00, 1188.35file/s]\n",
"Dataset download success ✅ (1.4s), saved to \u001b[1m/content/datasets\u001b[0m\n",
"\n",
"Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...\n",
"100% 755k/755k [00:00<00:00, 17.7MB/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 142.04it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco8/labels/val.cache\n",
" Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:04<00:00, 4.75s/it]\n",
" all 4 17 0.57 0.85 0.847 0.632\n",
" person 3 10 0.557 0.6 0.585 0.272\n",
" dog 1 1 0.548 1 0.995 0.697\n",
" horse 1 2 0.531 1 0.995 0.674\n",
" elephant 1 2 0.371 0.5 0.516 0.256\n",
" umbrella 1 1 0.569 1 0.995 0.995\n",
" potted plant 1 1 0.847 1 0.995 0.895\n",
"Speed: 1.0ms preprocess, 73.8ms inference, 0.0ms loss, 561.4ms postprocess per image\n",
"Results saved to \u001b[1mruns/detect/val\u001b[0m\n",
"💡 Learn more at https://docs.ultralytics.com/modes/val\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZY2VXXXu74w5"
},
"source": [
"# 3. Train\n",
"\n",
"<p align=\"\"><a href=\"https://ultralytics.com/hub\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png\"/></a></p>\n",
"\n",
"Train YOLO11 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLO11 Train Docs](https://docs.ultralytics.com/modes/train/) for more information."
]
},
{
"cell_type": "code",
"source": [
"#@title Select YOLO11 🚀 logger {run: 'auto'}\n",
"logger = 'Comet' #@param ['Comet', 'TensorBoard']\n",
"\n",
"if logger == 'Comet':\n",
" %pip install -q comet_ml\n",
" import comet_ml; comet_ml.init()\n",
"elif logger == 'TensorBoard':\n",
" %load_ext tensorboard\n",
" %tensorboard --logdir ."
],
"metadata": {
"id": "ktegpM42AooT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "1NcFxRcFdJ_O",
"outputId": "952f35f7-666f-4121-fbdf-2b3a33b28081",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"source": [
"# Train YOLO11n on COCO8 for 3 epochs\n",
"!yolo train model=yolo11n.pt data=coco8.yaml epochs=3 imgsz=640"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Ultralytics 8.3.2 🚀 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
"\u001b[34m\u001b[1mengine/trainer: \u001b[0mtask=detect, mode=train, model=yolo11n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train3\n",
"\n",
" from n params module arguments \n",
" 0 -1 1 464 ultralytics.nn.modules.conv.Conv [3, 16, 3, 2] \n",
" 1 -1 1 4672 ultralytics.nn.modules.conv.Conv [16, 32, 3, 2] \n",
" 2 -1 1 6640 ultralytics.nn.modules.block.C3k2 [32, 64, 1, False, 0.25] \n",
" 3 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n",
" 4 -1 1 26080 ultralytics.nn.modules.block.C3k2 [64, 128, 1, False, 0.25] \n",
" 5 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n",
" 6 -1 1 87040 ultralytics.nn.modules.block.C3k2 [128, 128, 1, True] \n",
" 7 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] \n",
" 8 -1 1 346112 ultralytics.nn.modules.block.C3k2 [256, 256, 1, True] \n",
" 9 -1 1 164608 ultralytics.nn.modules.block.SPPF [256, 256, 5] \n",
" 10 -1 1 249728 ultralytics.nn.modules.block.C2PSA [256, 256, 1] \n",
" 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 12 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
" 13 -1 1 111296 ultralytics.nn.modules.block.C3k2 [384, 128, 1, False] \n",
" 14 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 15 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
" 16 -1 1 32096 ultralytics.nn.modules.block.C3k2 [256, 64, 1, False] \n",
" 17 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n",
" 18 [-1, 13] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
" 19 -1 1 86720 ultralytics.nn.modules.block.C3k2 [192, 128, 1, False] \n",
" 20 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n",
" 21 [-1, 10] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
" 22 -1 1 378880 ultralytics.nn.modules.block.C3k2 [384, 256, 1, True] \n",
" 23 [16, 19, 22] 1 464912 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]] \n",
"YOLO11n summary: 319 layers, 2,624,080 parameters, 2,624,064 gradients, 6.6 GFLOPs\n",
"\n",
"Transferred 499/499 items from pretrained weights\n",
"\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/detect/train', view at http://localhost:6006/\n",
"Freezing layer 'model.23.dfl.conv.weight'\n",
"\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks with YOLO11n...\n",
"\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed ✅\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<?, ?it/s]\n",
"\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val.cache... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<?, ?it/s]\n",
"Plotting labels to runs/detect/train/labels.jpg... \n",
"\u001b[34m\u001b[1moptimizer:\u001b[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... \n",
"\u001b[34m\u001b[1moptimizer:\u001b[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)\n",
"\u001b[34m\u001b[1mTensorBoard: \u001b[0mmodel graph visualization added ✅\n",
"Image sizes 640 train, 640 val\n",
"Using 2 dataloader workers\n",
"Logging results to \u001b[1mruns/detect/train\u001b[0m\n",
"Starting training for 3 epochs...\n",
"\n",
" Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n",
" 1/3 0.719G 1.004 3.249 1.367 30 640: 100% 1/1 [00:00<00:00, 1.16it/s]\n",
" Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 5.07it/s]\n",
" all 4 17 0.58 0.85 0.849 0.631\n",
"\n",
" Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n",
" 2/3 0.715G 1.31 4.043 1.603 35 640: 100% 1/1 [00:00<00:00, 6.88it/s]\n",
" Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 9.08it/s]\n",
" all 4 17 0.581 0.85 0.851 0.63\n",
"\n",
" Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n",
" 3/3 0.692G 1.134 3.174 1.599 18 640: 100% 1/1 [00:00<00:00, 6.75it/s]\n",
" Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 11.60it/s]\n",
" all 4 17 0.582 0.85 0.855 0.632\n",
"\n",
"3 epochs completed in 0.003 hours.\n",
"Optimizer stripped from runs/detect/train/weights/last.pt, 5.5MB\n",
"Optimizer stripped from runs/detect/train/weights/best.pt, 5.5MB\n",
"\n",
"Validating runs/detect/train/weights/best.pt...\n",
"Ultralytics 8.3.2 🚀 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
"YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
" Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 23.42it/s]\n",
" all 4 17 0.579 0.85 0.855 0.615\n",
" person 3 10 0.579 0.6 0.623 0.268\n",
" dog 1 1 0.549 1 0.995 0.697\n",
" horse 1 2 0.553 1 0.995 0.675\n",
" elephant 1 2 0.364 0.5 0.528 0.261\n",
" umbrella 1 1 0.571 1 0.995 0.895\n",
" potted plant 1 1 0.857 1 0.995 0.895\n",
"Speed: 0.2ms preprocess, 4.3ms inference, 0.0ms loss, 1.2ms postprocess per image\n",
"Results saved to \u001b[1mruns/detect/train\u001b[0m\n",
"💡 Learn more at https://docs.ultralytics.com/modes/train\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# 4. Export\n",
"\n",
"Export a YOLO11 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLO11 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.\n",
"\n",
"- 💡 ProTip: Export to [ONNX](https://docs.ultralytics.com/integrations/onnx/) or [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) for up to 3x CPU speedup. \n",
"- 💡 ProTip: Export to [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/) for up to 5x GPU speedup.\n",
"\n",
"| Format | `format` Argument | Model | Metadata | Arguments |\n",
"|--------------------------------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------|\n",
"| [PyTorch](https://pytorch.org/) | - | `yolo11n.pt` | ✅ | - |\n",
"| [TorchScript](https://docs.ultralytics.com/integrations/torchscript) | `torchscript` | `yolo11n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |\n",
"| [ONNX](https://docs.ultralytics.com/integrations/onnx) | `onnx` | `yolo11n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |\n",
"| [OpenVINO](https://docs.ultralytics.com/integrations/openvino) | `openvino` | `yolo11n_openvino_model/` | ✅ | `imgsz`, `half`, `dynamic`, `int8`, `batch` |\n",
"| [TensorRT](https://docs.ultralytics.com/integrations/tensorrt) | `engine` | `yolo11n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |\n",
"| [CoreML](https://docs.ultralytics.com/integrations/coreml) | `coreml` | `yolo11n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |\n",
"| [TF SavedModel](https://docs.ultralytics.com/integrations/tf-savedmodel) | `saved_model` | `yolo11n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |\n",
"| [TF GraphDef](https://docs.ultralytics.com/integrations/tf-graphdef) | `pb` | `yolo11n.pb` | ❌ | `imgsz`, `batch` |\n",
"| [TF Lite](https://docs.ultralytics.com/integrations/tflite) | `tflite` | `yolo11n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n",
"| [TF Edge TPU](https://docs.ultralytics.com/integrations/edge-tpu) | `edgetpu` | `yolo11n_edgetpu.tflite` | ✅ | `imgsz` |\n",
"| [TF.js](https://docs.ultralytics.com/integrations/tfjs) | `tfjs` | `yolo11n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |\n",
"| [PaddlePaddle](https://docs.ultralytics.com/integrations/paddlepaddle) | `paddle` | `yolo11n_paddle_model/` | ✅ | `imgsz`, `batch` |\n",
"| [MNN](https://docs.ultralytics.com/integrations/mnn) | `mnn` | `yolo11n.mnn` | ✅ | `imgsz`, `batch`, `int8`, `half` |\n",
"| [NCNN](https://docs.ultralytics.com/integrations/ncnn) | `ncnn` | `yolo11n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |\n",
"| [IMX500](https://docs.ultralytics.com/integrations/sony-imx500) | `imx` | `yolov8n_imx_model/` | ✅ | `imgsz`, `int8` |"
],
"metadata": {
"id": "nPZZeNrLCQG6"
}
},
{
"cell_type": "code",
"source": [
"!yolo export model=yolo11n.pt format=torchscript"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CYIjW4igCjqD",
"outputId": "5357fa04-6749-4508-effe-8d4078533539"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Ultralytics 8.3.2 🚀 Python-3.10.12 torch-2.4.1+cu121 CPU (Intel Xeon 2.20GHz)\n",
"YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
"\n",
"\u001b[34m\u001b[1mPyTorch:\u001b[0m starting from 'yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB)\n",
"\n",
"\u001b[34m\u001b[1mTorchScript:\u001b[0m starting export with torch 2.4.1+cu121...\n",
"\u001b[34m\u001b[1mTorchScript:\u001b[0m export success ✅ 2.4s, saved as 'yolo11n.torchscript' (10.5 MB)\n",
"\n",
"Export complete (4.2s)\n",
"Results saved to \u001b[1m/content\u001b[0m\n",
"Predict: yolo predict task=detect model=yolo11n.torchscript imgsz=640 \n",
"Validate: yolo val task=detect model=yolo11n.torchscript imgsz=640 data=coco.yaml \n",
"Visualize: https://netron.app\n",
"💡 Learn more at https://docs.ultralytics.com/modes/export\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# 5. Python Usage\n",
"\n",
"YOLO11 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLO11 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See detailed Python usage examples in the [YOLO11 Python Docs](https://docs.ultralytics.com/usage/python/)."
],
"metadata": {
"id": "kUMOQ0OeDBJG"
}
},
{
"cell_type": "code",
"source": [
"from ultralytics import YOLO\n",
"\n",
"# Load a model\n",
"model = YOLO('yolo11n.yaml') # build a new model from scratch\n",
"model = YOLO('yolo11n.pt') # load a pretrained model (recommended for training)\n",
"\n",
"# Use the model\n",
"results = model.train(data='coco8.yaml', epochs=3) # train the model\n",
"results = model.val() # evaluate model performance on the validation set\n",
"results = model('https://ultralytics.com/images/bus.jpg') # predict on an image\n",
"results = model.export(format='onnx') # export the model to ONNX format"
],
"metadata": {
"id": "bpF9-vS_DAaf"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# 6. Tasks\n",
"\n",
"YOLO11 can train, val, predict and export models for the most common tasks in vision AI: [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/). See [YOLO11 Tasks Docs](https://docs.ultralytics.com/tasks/) for more information.\n",
"\n",
"<br><img width=\"1024\" src=\"https://raw.githubusercontent.com/ultralytics/assets/main/im/banner-tasks.png\">\n"
],
"metadata": {
"id": "Phm9ccmOKye5"
}
},
{
"cell_type": "markdown",
"source": [
"## 1. Detection\n",
"\n",
"YOLO11 _detection_ models have no suffix and are the default YOLO11 models, i.e. `yolo11n.pt` and are pretrained on COCO. See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for full details.\n"
],
"metadata": {
"id": "yq26lwpYK1lq"
}
},
{
"cell_type": "code",
"source": [
"# Load YOLO11n, train it on COCO128 for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
"model = YOLO('yolo11n.pt') # load a pretrained YOLO detection model\n",
"model.train(data='coco8.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
"metadata": {
"id": "8Go5qqS9LbC5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 2. Segmentation\n",
"\n",
"YOLO11 _segmentation_ models use the `-seg` suffix, i.e. `yolo11n-seg.pt` and are pretrained on COCO. See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for full details.\n"
],
"metadata": {
"id": "7ZW58jUzK66B"
}
},
{
"cell_type": "code",
"source": [
"# Load YOLO11n-seg, train it on COCO128-seg for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
"model = YOLO('yolo11n-seg.pt') # load a pretrained YOLO segmentation model\n",
"model.train(data='coco8-seg.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
"metadata": {
"id": "WFPJIQl_L5HT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 3. Classification\n",
"\n",
"YOLO11 _classification_ models use the `-cls` suffix, i.e. `yolo11n-cls.pt` and are pretrained on ImageNet. See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for full details.\n"
],
"metadata": {
"id": "ax3p94VNK9zR"
}
},
{
"cell_type": "code",
"source": [
"# Load YOLO11n-cls, train it on mnist160 for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
"model = YOLO('yolo11n-cls.pt') # load a pretrained YOLO classification model\n",
"model.train(data='mnist160', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
"metadata": {
"id": "5q9Zu6zlL5rS"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 4. Pose\n",
"\n",
"YOLO11 _pose_ models use the `-pose` suffix, i.e. `yolo11n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details."
],
"metadata": {
"id": "SpIaFLiO11TG"
}
},
{
"cell_type": "code",
"source": [
"# Load YOLO11n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
"model = YOLO('yolo11n-pose.pt') # load a pretrained YOLO pose model\n",
"model.train(data='coco8-pose.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
"metadata": {
"id": "si4aKFNg19vX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 4. Oriented Bounding Boxes (OBB)\n",
"\n",
"YOLO11 _OBB_ models use the `-obb` suffix, i.e. `yolo11n-obb.pt` and are pretrained on the DOTA dataset. See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for full details."
],
"metadata": {
"id": "cf5j_T9-B5F0"
}
},
{
"cell_type": "code",
"source": [
"# Load YOLO11n-obb, train it on DOTA8 for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
"model = YOLO('yolo11n-obb.pt') # load a pretrained YOLO OBB model\n",
"model.train(data='dota8.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/boats.jpg') # predict on an image"
],
"metadata": {
"id": "IJNKClOOB5YS"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "IEijrePND_2I"
},
"source": [
"# Appendix\n",
"\n",
"Additional content below."
]
},
{
"cell_type": "code",
"source": [
"# Pip install from source\n",
"!pip install git+https://github.com/ultralytics/ultralytics@main"
],
"metadata": {
"id": "pIdE6i8C3LYp"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Git clone and run tests on updates branch\n",
"!git clone https://github.com/ultralytics/ultralytics -b main\n",
"%pip install -qe ultralytics"
],
"metadata": {
"id": "uRKlwxSJdhd1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Run tests (Git clone only)\n",
"!pytest ultralytics/tests"
],
"metadata": {
"id": "GtPlh7mcCGZX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Validate multiple models\n",
"for x in 'nsmlx':\n",
" !yolo val model=yolo11{x}.pt data=coco.yaml"
],
"metadata": {
"id": "Wdc6t_bfzDDk"
},
"execution_count": null,
"outputs": []
}
]
}
from ultralytics import YOLO
model = YOLO('yolov13n.pt') # Replace with the desired model scale
model.export(format="onnx", half=True) # or format="onnx"
icon.png

77.3 KB

# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Configuration file for building the Ultralytics YOLO documentation site using MkDocs.
# Provides settings to control site metadata, customize the appearance using the
# Material theme, define the navigation structure, and enable various plugins.
# Site metadata
site_name: Ultralytics YOLO Docs
site_description: Explore Ultralytics YOLO, a cutting-edge real-time object detection and image segmentation model for various applications and hardware platforms.
site_url: https://docs.ultralytics.com
site_author: Ultralytics
repo_url: https://github.com/ultralytics/ultralytics
edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/en/
repo_name: ultralytics/ultralytics
remote_name: https://github.com/ultralytics/docs
docs_dir: "docs/en/" # where to find the markdown files
site_dir: "site/" # where to publish to
use_directory_urls: true # don't display 'index.html' in slugs
# Theme customization
theme:
name: material
language: en
custom_dir: docs/overrides/
logo: https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Reverse.svg
favicon: https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/logo/favicon-yolo.png
icon:
repo: fontawesome/brands/github
# font: # disabled for faster page load times
# text: Helvetica
# code: Roboto Mono
palette:
- media: "(prefers-color-scheme)"
toggle:
icon: material/brightness-auto
name: Switch to light mode
- media: "(prefers-color-scheme: dark)"
scheme: slate
primary: black
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to system preference
- media: "(prefers-color-scheme: light)"
scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
features:
- content.action.edit
- content.code.annotate
- content.code.copy
- content.tooltips
- search.highlight
- search.share
- search.suggest
- toc.follow
- navigation.top
- navigation.tabs
- navigation.tabs.sticky
- navigation.prune
- navigation.footer
- navigation.tracking
- navigation.instant
- navigation.instant.progress
- navigation.indexes
- navigation.sections # navigation.expand or navigation.sections
- content.tabs.link # all code tabs change simultaneously
# Customization
copyright: <a href="https://www.ultralytics.com/" target="_blank">© 2025 Ultralytics Inc.</a> All rights reserved.
extra: # version:
homepage: https://www.ultralytics.com/
# provider: mike # version drop-down menu
robots: robots.txt
analytics:
provider: google
property: G-2M5EHKC0BH
social:
- icon: fontawesome/brands/github
link: https://github.com/ultralytics
- icon: fontawesome/brands/linkedin
link: https://www.linkedin.com/company/ultralytics/
- icon: fontawesome/brands/x-twitter
link: https://twitter.com/ultralytics
- icon: fontawesome/brands/youtube
link: https://youtube.com/ultralytics?sub_confirmation=1
- icon: fontawesome/brands/docker
link: https://hub.docker.com/r/ultralytics/ultralytics/
- icon: fontawesome/brands/python
link: https://pypi.org/project/ultralytics/
- icon: fontawesome/brands/discord
link: https://discord.com/invite/ultralytics
- icon: fontawesome/brands/reddit
link: https://reddit.com/r/ultralytics
extra_css:
- stylesheets/style.css
extra_javascript:
- javascript/extra.js
- javascript/giscus.js
markdown_extensions:
- admonition
- md_in_html
- tables
- attr_list
- def_list
- pymdownx.critic
- pymdownx.caret
- pymdownx.keys
- pymdownx.mark
- pymdownx.tilde
- pymdownx.details
- pymdownx.superfences
- pymdownx.inlinehilite
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.snippets:
base_path: ./
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- pymdownx.tabbed:
alternate_style: true
# Validation settings https://www.mkdocs.org/user-guide/configuration/#validation
validation:
nav:
omitted_files: info
not_found: warn
absolute_links: info
links:
absolute_links: relative_to_docs
anchors: warn
unrecognized_links: warn
# Primary navigation ---------------------------------------------------------------------------------------------------
nav:
- Home:
- Home: index.md
- Quickstart: quickstart.md
- Modes:
- modes/index.md
- Train: modes/train.md
- Val: modes/val.md
- Predict: modes/predict.md
- Export: modes/export.md
- Track: modes/track.md
- Benchmark: modes/benchmark.md
- Tasks:
- tasks/index.md
- Detect: tasks/detect.md
- Segment: tasks/segment.md
- Classify: tasks/classify.md
- Pose: tasks/pose.md
- OBB: tasks/obb.md
- Models:
- models/index.md
- Datasets:
- datasets/index.md
- Solutions:
- solutions/index.md
- Guides:
- guides/index.md
- YOLO11 🚀 NEW: models/yolo11.md # for promotion of new pages
- Languages:
- 🇬🇧&nbsp English: https://ultralytics.com/docs/
- 🇨🇳&nbsp 简体中文: https://docs.ultralytics.com/zh/
- 🇰🇷&nbsp 한국어: https://docs.ultralytics.com/ko/
- 🇯🇵&nbsp 日本語: https://docs.ultralytics.com/ja/
- 🇷🇺&nbsp Русский: https://docs.ultralytics.com/ru/
- 🇩🇪&nbsp Deutsch: https://docs.ultralytics.com/de/
- 🇫🇷&nbsp Français: https://docs.ultralytics.com/fr/
- 🇪🇸&nbsp Español: https://docs.ultralytics.com/es/
- 🇵🇹&nbsp Português: https://docs.ultralytics.com/pt/
- 🇮🇹&nbsp Italiano: https://docs.ultralytics.com/it/
- 🇹🇷&nbsp Türkçe: https://docs.ultralytics.com/tr/
- 🇻🇳&nbsp Tiếng Việt: https://docs.ultralytics.com/vi/
- 🇸🇦&nbsp العربية: https://docs.ultralytics.com/ar/
- Quickstart:
- quickstart.md
- Usage:
- CLI: usage/cli.md
- Python: usage/python.md
- Callbacks: usage/callbacks.md
- Configuration: usage/cfg.md
- Simple Utilities: usage/simple-utilities.md
- Advanced Customization: usage/engine.md
- Modes:
- modes/index.md
- Train: modes/train.md
- Val: modes/val.md
- Predict: modes/predict.md
- Export: modes/export.md
- Track: modes/track.md
- Benchmark: modes/benchmark.md
- Tasks:
- tasks/index.md
- Detect: tasks/detect.md
- Segment: tasks/segment.md
- Classify: tasks/classify.md
- Pose: tasks/pose.md
- OBB: tasks/obb.md
- Models:
- models/index.md
- Datasets:
- datasets/index.md
- Solutions:
- solutions/index.md
- Guides:
- guides/index.md
- Modes:
- modes/index.md
- Train: modes/train.md
- Val: modes/val.md
- Predict: modes/predict.md
- Export: modes/export.md
- Track: modes/track.md
- Benchmark: modes/benchmark.md
- Tasks:
- tasks/index.md
- Detect: tasks/detect.md
- Segment: tasks/segment.md
- Classify: tasks/classify.md
- Pose: tasks/pose.md
- OBB: tasks/obb.md
- Tasks:
- tasks/index.md
- Detect: tasks/detect.md
- Segment: tasks/segment.md
- Classify: tasks/classify.md
- Pose: tasks/pose.md
- OBB: tasks/obb.md
- Modes:
- modes/index.md
- Train: modes/train.md
- Val: modes/val.md
- Predict: modes/predict.md
- Export: modes/export.md
- Track: modes/track.md
- Benchmark: modes/benchmark.md
- Models:
- models/index.md
- YOLOv3: models/yolov3.md
- YOLOv4: models/yolov4.md
- YOLOv5: models/yolov5.md
- YOLOv6: models/yolov6.md
- YOLOv7: models/yolov7.md
- YOLOv8: models/yolov8.md
- YOLOv9: models/yolov9.md
- YOLOv10: models/yolov10.md
- YOLO11 🚀 NEW: models/yolo11.md
- SAM (Segment Anything Model): models/sam.md
- SAM 2 (Segment Anything Model 2): models/sam-2.md
- MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md
- FastSAM (Fast Segment Anything Model): models/fast-sam.md
- YOLO-NAS (Neural Architecture Search): models/yolo-nas.md
- RT-DETR (Realtime Detection Transformer): models/rtdetr.md
- YOLO-World (Real-Time Open-Vocabulary Object Detection): models/yolo-world.md
- Datasets:
- datasets/index.md
- Detection:
- datasets/detect/index.md
- Argoverse: datasets/detect/argoverse.md
- COCO: datasets/detect/coco.md
- LVIS: datasets/detect/lvis.md
- COCO8: datasets/detect/coco8.md
- GlobalWheat2020: datasets/detect/globalwheat2020.md
- Objects365: datasets/detect/objects365.md
- OpenImagesV7: datasets/detect/open-images-v7.md
- SKU-110K: datasets/detect/sku-110k.md
- VisDrone: datasets/detect/visdrone.md
- VOC: datasets/detect/voc.md
- xView: datasets/detect/xview.md
- RF100: datasets/detect/roboflow-100.md
- Brain-tumor: datasets/detect/brain-tumor.md
- African-wildlife: datasets/detect/african-wildlife.md
- Signature: datasets/detect/signature.md
- Medical-pills: datasets/detect/medical-pills.md
- Segmentation:
- datasets/segment/index.md
- COCO: datasets/segment/coco.md
- COCO8-seg: datasets/segment/coco8-seg.md
- Crack-seg: datasets/segment/crack-seg.md
- Carparts-seg: datasets/segment/carparts-seg.md
- Package-seg: datasets/segment/package-seg.md
- Pose:
- datasets/pose/index.md
- COCO: datasets/pose/coco.md
- COCO8-pose: datasets/pose/coco8-pose.md
- Tiger-pose: datasets/pose/tiger-pose.md
- Hand-keypoints: datasets/pose/hand-keypoints.md
- Dog-pose: datasets/pose/dog-pose.md
- Classification:
- datasets/classify/index.md
- Caltech 101: datasets/classify/caltech101.md
- Caltech 256: datasets/classify/caltech256.md
- CIFAR-10: datasets/classify/cifar10.md
- CIFAR-100: datasets/classify/cifar100.md
- Fashion-MNIST: datasets/classify/fashion-mnist.md
- ImageNet: datasets/classify/imagenet.md
- ImageNet-10: datasets/classify/imagenet10.md
- Imagenette: datasets/classify/imagenette.md
- Imagewoof: datasets/classify/imagewoof.md
- MNIST: datasets/classify/mnist.md
- Oriented Bounding Boxes (OBB):
- datasets/obb/index.md
- DOTAv2: datasets/obb/dota-v2.md
- DOTA8: datasets/obb/dota8.md
- Multi-Object Tracking:
- datasets/track/index.md
- Solutions 🚀 NEW:
- solutions/index.md
- Object Counting: guides/object-counting.md
- Object Cropping: guides/object-cropping.md
- Object Blurring: guides/object-blurring.md
- Workouts Monitoring: guides/workouts-monitoring.md
- Objects Counting in Regions: guides/region-counting.md
- Security Alarm System: guides/security-alarm-system.md
- Heatmaps: guides/heatmaps.md
- Instance Segmentation with Object Tracking: guides/instance-segmentation-and-tracking.md
- VisionEye Mapping: guides/vision-eye.md
- Speed Estimation: guides/speed-estimation.md
- Distance Calculation: guides/distance-calculation.md
- Queue Management: guides/queue-management.md
- Parking Management: guides/parking-management.md
- Analytics: guides/analytics.md
- Live Inference: guides/streamlit-live-inference.md
- Track Objects in Zone 🚀 NEW: guides/trackzone.md
- Guides:
- guides/index.md
- YOLO Common Issues: guides/yolo-common-issues.md
- YOLO Performance Metrics: guides/yolo-performance-metrics.md
- YOLO Thread-Safe Inference: guides/yolo-thread-safe-inference.md
- Model Deployment Options: guides/model-deployment-options.md
- K-Fold Cross Validation: guides/kfold-cross-validation.md
- Hyperparameter Tuning: guides/hyperparameter-tuning.md
- SAHI Tiled Inference: guides/sahi-tiled-inference.md
- AzureML Quickstart: guides/azureml-quickstart.md
- Conda Quickstart: guides/conda-quickstart.md
- Docker Quickstart: guides/docker-quickstart.md
- Raspberry Pi: guides/raspberry-pi.md
- NVIDIA Jetson: guides/nvidia-jetson.md
- DeepStream on NVIDIA Jetson: guides/deepstream-nvidia-jetson.md
- Triton Inference Server: guides/triton-inference-server.md
- Isolating Segmentation Objects: guides/isolating-segmentation-objects.md
- Edge TPU on Raspberry Pi: guides/coral-edge-tpu-on-raspberry-pi.md
- Viewing Inference Images in a Terminal: guides/view-results-in-terminal.md
- OpenVINO Latency vs Throughput modes: guides/optimizing-openvino-latency-vs-throughput-modes.md
- ROS Quickstart: guides/ros-quickstart.md
- Steps of a Computer Vision Project: guides/steps-of-a-cv-project.md
- Defining A Computer Vision Project's Goals: guides/defining-project-goals.md
- Data Collection and Annotation: guides/data-collection-and-annotation.md
- Preprocessing Annotated Data: guides/preprocessing_annotated_data.md
- Tips for Model Training: guides/model-training-tips.md
- Insights on Model Evaluation and Fine-Tuning: guides/model-evaluation-insights.md
- A Guide on Model Testing: guides/model-testing.md
- Best Practices for Model Deployment: guides/model-deployment-practices.md
- Maintaining Your Computer Vision Model: guides/model-monitoring-and-maintenance.md
- Explorer:
- datasets/explorer/index.md
- Explorer API: datasets/explorer/api.md
- Explorer Dashboard Demo: datasets/explorer/dashboard.md
- VOC Exploration Example: datasets/explorer/explorer.md
- YOLOv5:
- yolov5/index.md
- Quickstart: yolov5/quickstart_tutorial.md
- Environments:
- Amazon Web Services (AWS): yolov5/environments/aws_quickstart_tutorial.md
- Google Cloud (GCP): yolov5/environments/google_cloud_quickstart_tutorial.md
- AzureML: yolov5/environments/azureml_quickstart_tutorial.md
- Docker Image: yolov5/environments/docker_image_quickstart_tutorial.md
- Tutorials:
- Train Custom Data: yolov5/tutorials/train_custom_data.md
- Tips for Best Training Results: yolov5/tutorials/tips_for_best_training_results.md
- Multi-GPU Training: yolov5/tutorials/multi_gpu_training.md
- PyTorch Hub: yolov5/tutorials/pytorch_hub_model_loading.md
- TFLite, ONNX, CoreML, TensorRT Export: yolov5/tutorials/model_export.md
- Test-Time Augmentation (TTA): yolov5/tutorials/test_time_augmentation.md
- Model Ensembling: yolov5/tutorials/model_ensembling.md
- Pruning/Sparsity Tutorial: yolov5/tutorials/model_pruning_and_sparsity.md
- Hyperparameter evolution: yolov5/tutorials/hyperparameter_evolution.md
- Transfer learning with frozen layers: yolov5/tutorials/transfer_learning_with_frozen_layers.md
- Architecture Summary: yolov5/tutorials/architecture_description.md
- Roboflow Datasets: yolov5/tutorials/roboflow_datasets_integration.md
- Neural Magic's DeepSparse: yolov5/tutorials/neural_magic_pruning_quantization.md
- Comet Logging: yolov5/tutorials/comet_logging_integration.md
- Clearml Logging: yolov5/tutorials/clearml_logging_integration.md
- Integrations:
- integrations/index.md
- Amazon SageMaker: integrations/amazon-sagemaker.md
- ClearML: integrations/clearml.md
- Comet ML: integrations/comet.md
- CoreML: integrations/coreml.md
- DVC: integrations/dvc.md
- Google Colab: integrations/google-colab.md
- Gradio: integrations/gradio.md
- IBM Watsonx: integrations/ibm-watsonx.md
- JupyterLab: integrations/jupyterlab.md
- Kaggle: integrations/kaggle.md
- MLflow: integrations/mlflow.md
- Neural Magic: integrations/neural-magic.md
- ONNX: integrations/onnx.md
- OpenVINO: integrations/openvino.md
- PaddlePaddle: integrations/paddlepaddle.md
- MNN: integrations/mnn.md
- NCNN: integrations/ncnn.md
- Paperspace Gradient: integrations/paperspace.md
- Ray Tune: integrations/ray-tune.md
- Roboflow: integrations/roboflow.md
- TF GraphDef: integrations/tf-graphdef.md
- TF SavedModel: integrations/tf-savedmodel.md
- TF.js: integrations/tfjs.md
- TFLite: integrations/tflite.md
- TFLite Edge TPU: integrations/edge-tpu.md
- TensorBoard: integrations/tensorboard.md
- TensorRT: integrations/tensorrt.md
- TorchScript: integrations/torchscript.md
- VS Code: integrations/vscode.md
- Weights & Biases: integrations/weights-biases.md
- Albumentations: integrations/albumentations.md
- SONY IMX500: integrations/sony-imx500.md
- HUB:
- hub/index.md
- Web:
- hub/index.md
- Quickstart: hub/quickstart.md
- Datasets: hub/datasets.md
- Projects: hub/projects.md
- Models: hub/models.md
- Pro: hub/pro.md
- Cloud Training: hub/cloud-training.md
- Inference API: hub/inference-api.md
- Teams: hub/teams.md
- Integrations: hub/integrations.md
- App:
- hub/app/index.md
- iOS: hub/app/ios.md
- Android: hub/app/android.md
- Python SDK:
- hub/sdk/index.md
- Quickstart: hub/sdk/quickstart.md
- Model: hub/sdk/model.md
- Dataset: hub/sdk/dataset.md
- Project: hub/sdk/project.md
- Reference:
- base:
- api_client: hub/sdk/reference/base/api_client.md
- auth: hub/sdk/reference/base/auth.md
- crud_client: hub/sdk/reference/base/crud_client.md
- paginated_list: hub/sdk/reference/base/paginated_list.md
- server_clients: hub/sdk/reference/base/server_clients.md
- helpers:
- error_handler: hub/sdk/reference/helpers/error_handler.md
- exceptions: hub/sdk/reference/helpers/exceptions.md
- logger: hub/sdk/reference/helpers/logger.md
- utils: hub/sdk/reference/helpers/utils.md
- hub_client: hub/sdk/reference/hub_client.md
- modules:
- datasets: hub/sdk/reference/modules/datasets.md
- models: hub/sdk/reference/modules/models.md
- projects: hub/sdk/reference/modules/projects.md
- teams: hub/sdk/reference/modules/teams.md
- users: hub/sdk/reference/modules/users.md
- REST API:
- hub/api/index.md
- Reference:
- cfg:
- __init__: reference/cfg/__init__.md
- data:
- annotator: reference/data/annotator.md
- augment: reference/data/augment.md
- base: reference/data/base.md
- build: reference/data/build.md
- converter: reference/data/converter.md
- dataset: reference/data/dataset.md
- loaders: reference/data/loaders.md
- split_dota: reference/data/split_dota.md
- utils: reference/data/utils.md
- engine:
- exporter: reference/engine/exporter.md
- model: reference/engine/model.md
- predictor: reference/engine/predictor.md
- results: reference/engine/results.md
- trainer: reference/engine/trainer.md
- tuner: reference/engine/tuner.md
- validator: reference/engine/validator.md
- hub:
- __init__: reference/hub/__init__.md
- auth: reference/hub/auth.md
- google:
- __init__: reference/hub/google/__init__.md
- session: reference/hub/session.md
- utils: reference/hub/utils.md
- models:
- fastsam:
- model: reference/models/fastsam/model.md
- predict: reference/models/fastsam/predict.md
- utils: reference/models/fastsam/utils.md
- val: reference/models/fastsam/val.md
- nas:
- model: reference/models/nas/model.md
- predict: reference/models/nas/predict.md
- val: reference/models/nas/val.md
- rtdetr:
- model: reference/models/rtdetr/model.md
- predict: reference/models/rtdetr/predict.md
- train: reference/models/rtdetr/train.md
- val: reference/models/rtdetr/val.md
- sam:
- amg: reference/models/sam/amg.md
- build: reference/models/sam/build.md
- model: reference/models/sam/model.md
- modules:
- blocks: reference/models/sam/modules/blocks.md
- decoders: reference/models/sam/modules/decoders.md
- encoders: reference/models/sam/modules/encoders.md
- memory_attention: reference/models/sam/modules/memory_attention.md
- sam: reference/models/sam/modules/sam.md
- tiny_encoder: reference/models/sam/modules/tiny_encoder.md
- transformer: reference/models/sam/modules/transformer.md
- utils: reference/models/sam/modules/utils.md
- predict: reference/models/sam/predict.md
- utils:
- loss: reference/models/utils/loss.md
- ops: reference/models/utils/ops.md
- yolo:
- classify:
- predict: reference/models/yolo/classify/predict.md
- train: reference/models/yolo/classify/train.md
- val: reference/models/yolo/classify/val.md
- detect:
- predict: reference/models/yolo/detect/predict.md
- train: reference/models/yolo/detect/train.md
- val: reference/models/yolo/detect/val.md
- model: reference/models/yolo/model.md
- obb:
- predict: reference/models/yolo/obb/predict.md
- train: reference/models/yolo/obb/train.md
- val: reference/models/yolo/obb/val.md
- pose:
- predict: reference/models/yolo/pose/predict.md
- train: reference/models/yolo/pose/train.md
- val: reference/models/yolo/pose/val.md
- segment:
- predict: reference/models/yolo/segment/predict.md
- train: reference/models/yolo/segment/train.md
- val: reference/models/yolo/segment/val.md
- world:
- train: reference/models/yolo/world/train.md
- train_world: reference/models/yolo/world/train_world.md
- nn:
- autobackend: reference/nn/autobackend.md
- modules:
- activation: reference/nn/modules/activation.md
- block: reference/nn/modules/block.md
- conv: reference/nn/modules/conv.md
- head: reference/nn/modules/head.md
- transformer: reference/nn/modules/transformer.md
- utils: reference/nn/modules/utils.md
- tasks: reference/nn/tasks.md
- solutions:
- ai_gym: reference/solutions/ai_gym.md
- analytics: reference/solutions/analytics.md
- distance_calculation: reference/solutions/distance_calculation.md
- heatmap: reference/solutions/heatmap.md
- object_counter: reference/solutions/object_counter.md
- parking_management: reference/solutions/parking_management.md
- queue_management: reference/solutions/queue_management.md
- region_counter: reference/solutions/region_counter.md
- security_alarm: reference/solutions/security_alarm.md
- solutions: reference/solutions/solutions.md
- speed_estimation: reference/solutions/speed_estimation.md
- streamlit_inference: reference/solutions/streamlit_inference.md
- trackzone: reference/solutions/trackzone.md
- trackers:
- basetrack: reference/trackers/basetrack.md
- bot_sort: reference/trackers/bot_sort.md
- byte_tracker: reference/trackers/byte_tracker.md
- track: reference/trackers/track.md
- utils:
- gmc: reference/trackers/utils/gmc.md
- kalman_filter: reference/trackers/utils/kalman_filter.md
- matching: reference/trackers/utils/matching.md
- utils:
- __init__: reference/utils/__init__.md
- autobatch: reference/utils/autobatch.md
- benchmarks: reference/utils/benchmarks.md
- callbacks:
- base: reference/utils/callbacks/base.md
- clearml: reference/utils/callbacks/clearml.md
- comet: reference/utils/callbacks/comet.md
- dvc: reference/utils/callbacks/dvc.md
- hub: reference/utils/callbacks/hub.md
- mlflow: reference/utils/callbacks/mlflow.md
- neptune: reference/utils/callbacks/neptune.md
- raytune: reference/utils/callbacks/raytune.md
- tensorboard: reference/utils/callbacks/tensorboard.md
- wb: reference/utils/callbacks/wb.md
- checks: reference/utils/checks.md
- dist: reference/utils/dist.md
- downloads: reference/utils/downloads.md
- errors: reference/utils/errors.md
- files: reference/utils/files.md
- instance: reference/utils/instance.md
- loss: reference/utils/loss.md
- metrics: reference/utils/metrics.md
- ops: reference/utils/ops.md
- patches: reference/utils/patches.md
- plotting: reference/utils/plotting.md
- tal: reference/utils/tal.md
- torch_utils: reference/utils/torch_utils.md
- triton: reference/utils/triton.md
- tuner: reference/utils/tuner.md
- Help:
- Help: help/index.md
- Frequently Asked Questions (FAQ): help/FAQ.md
- Contributing Guide: help/contributing.md
- Continuous Integration (CI) Guide: help/CI.md
- Contributor License Agreement (CLA): help/CLA.md
- Minimum Reproducible Example (MRE) Guide: help/minimum-reproducible-example.md
- Code of Conduct: help/code-of-conduct.md
- Environmental, Health and Safety (EHS) Policy: help/environmental-health-safety.md
- Security Policy: help/security.md
- Privacy Policy: help/privacy.md
# Plugins including 301 redirects navigation ---------------------------------------------------------------------------
plugins:
- macros
# - search:
# lang: en
- mkdocstrings:
enabled: true
default_handler: python
handlers:
python:
options:
docstring_options:
ignore_init_summary: true
merge_init_into_class: true
docstring_style: google
show_root_heading: true
show_source: true
separate_signature: true
line_length: 80
show_signature_annotations: true
show_symbol_type_heading: true # insiders
show_symbol_type_toc: true # insiders
show_inheritance_diagram: true # insiders
- ultralytics:
add_desc: False
add_image: True
add_authors: True
add_json_ld: True
add_share_buttons: True
add_css: False
default_image: https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png
- redirects:
redirect_maps:
hi/index.md: index.md
nl/index.md: index.md
callbacks.md: usage/callbacks.md
cfg.md: usage/cfg.md
cli.md: usage/cli.md
config.md: usage/cfg.md
engine.md: usage/engine.md
environments/AWS-Quickstart.md: yolov5/environments/aws_quickstart_tutorial.md
environments/Docker-Quickstart.md: yolov5/environments/docker_image_quickstart_tutorial.md
environments/GCP-Quickstart.md: yolov5/environments/google_cloud_quickstart_tutorial.md
FAQ/augmentation.md: yolov5/tutorials/tips_for_best_training_results.md
package-framework.md: index.md
package-framework/mock_detector.md: index.md
predict.md: modes/predict.md
python.md: usage/python.md
quick-start.md: quickstart.md
app.md: hub/app/index.md
sdk.md: index.md
hub/inference_api.md: hub/inference-api.md
usage/hyperparameter_tuning.md: integrations/ray-tune.md
models/sam2.md: models/sam-2.md
reference/base_pred.md: reference/engine/predictor.md
reference/base_trainer.md: reference/engine/trainer.md
reference/exporter.md: reference/engine/exporter.md
reference/model.md: reference/engine/model.md
reference/nn.md: reference/nn/modules/head.md
reference/ops.md: reference/utils/ops.md
reference/results.md: reference/engine/results.md
reference/base_val.md: index.md
reference/index.md: reference/cfg/__init__.md
tasks/classification.md: tasks/classify.md
tasks/detection.md: tasks/detect.md
tasks/segmentation.md: tasks/segment.md
tasks/keypoints.md: tasks/pose.md
tasks/tracking.md: modes/track.md
SECURITY.md: help/security.md
help/minimum_reproducible_example.md: help/minimum-reproducible-example.md
help/code_of_conduct.md: help/code-of-conduct.md
tutorials/architecture-summary.md: yolov5/tutorials/architecture_description.md
tutorials/clearml-logging.md: yolov5/tutorials/clearml_logging_integration.md
tutorials/comet-logging.md: yolov5/tutorials/comet_logging_integration.md
tutorials/hyperparameter-evolution.md: yolov5/tutorials/hyperparameter_evolution.md
tutorials/model-ensembling.md: yolov5/tutorials/model_ensembling.md
tutorials/multi-gpu-training.md: yolov5/tutorials/multi_gpu_training.md
tutorials/nvidia-jetson.md: guides/nvidia-jetson.md
tutorials/pruning-sparsity.md: yolov5/tutorials/model_pruning_and_sparsity.md
tutorials/pytorch-hub.md: yolov5/tutorials/pytorch_hub_model_loading.md
tutorials/roboflow.md: yolov5/tutorials/roboflow_datasets_integration.md
tutorials/test-time-augmentation.md: yolov5/tutorials/test_time_augmentation.md
tutorials/torchscript-onnx-coreml-export.md: yolov5/tutorials/model_export.md
tutorials/train-custom-datasets.md: yolov5/tutorials/train_custom_data.md
tutorials/training-tips-best-results.md: yolov5/tutorials/tips_for_best_training_results.md
tutorials/transfer-learning-froze-layers.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md
tutorials/weights-and-biasis-logging.md: yolov5/tutorials/comet_logging_integration.md
yolov5/pytorch_hub.md: yolov5/tutorials/pytorch_hub_model_loading.md
yolov5/hyp_evolution.md: yolov5/tutorials/hyperparameter_evolution.md
yolov5/pruning_sparsity.md: yolov5/tutorials/model_pruning_and_sparsity.md
yolov5/roboflow.md: yolov5/tutorials/roboflow_datasets_integration.md
yolov5/comet.md: yolov5/tutorials/comet_logging_integration.md
yolov5/clearml.md: yolov5/tutorials/clearml_logging_integration.md
yolov5/tta.md: yolov5/tutorials/test_time_augmentation.md
yolov5/multi_gpu_training.md: yolov5/tutorials/multi_gpu_training.md
yolov5/ensemble.md: yolov5/tutorials/model_ensembling.md
yolov5/jetson_nano.md: guides/nvidia-jetson.md
yolov5/transfer_learn_frozen.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md
yolov5/neural_magic.md: yolov5/tutorials/neural_magic_pruning_quantization.md
yolov5/train_custom_data.md: yolov5/tutorials/train_custom_data.md
yolov5/architecture.md: yolov5/tutorials/architecture_description.md
yolov5/export.md: yolov5/tutorials/model_export.md
yolov5/yolov5_quickstart_tutorial.md: yolov5/quickstart_tutorial.md
yolov5/tips_for_best_training_results.md: yolov5/tutorials/tips_for_best_training_results.md
yolov5/tutorials/yolov5_neural_magic_tutorial.md: yolov5/tutorials/neural_magic_pruning_quantization.md
yolov5/tutorials/model_ensembling_tutorial.md: yolov5/tutorials/model_ensembling.md
yolov5/tutorials/pytorch_hub_tutorial.md: yolov5/tutorials/pytorch_hub_model_loading.md
yolov5/tutorials/yolov5_architecture_tutorial.md: yolov5/tutorials/architecture_description.md
yolov5/tutorials/multi_gpu_training_tutorial.md: yolov5/tutorials/multi_gpu_training.md
yolov5/tutorials/yolov5_pytorch_hub_tutorial.md: yolov5/tutorials/pytorch_hub_model_loading.md
yolov5/tutorials/model_export_tutorial.md: yolov5/tutorials/model_export.md
yolov5/tutorials/jetson_nano_tutorial.md: guides/nvidia-jetson.md
yolov5/tutorials/yolov5_model_ensembling_tutorial.md: yolov5/tutorials/model_ensembling.md
yolov5/tutorials/roboflow_integration.md: yolov5/tutorials/roboflow_datasets_integration.md
yolov5/tutorials/pruning_and_sparsity_tutorial.md: yolov5/tutorials/model_pruning_and_sparsity.md
yolov5/tutorials/yolov5_transfer_learning_with_frozen_layers_tutorial.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md
yolov5/tutorials/transfer_learning_with_frozen_layers_tutorial.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md
yolov5/tutorials/yolov5_model_export_tutorial.md: yolov5/tutorials/model_export.md
yolov5/tutorials/neural_magic_tutorial.md: yolov5/tutorials/neural_magic_pruning_quantization.md
yolov5/tutorials/yolov5_clearml_integration_tutorial.md: yolov5/tutorials/clearml_logging_integration.md
yolov5/tutorials/yolov5_train_custom_data.md: yolov5/tutorials/train_custom_data.md
yolov5/tutorials/comet_integration_tutorial.md: yolov5/tutorials/comet_logging_integration.md
yolov5/tutorials/yolov5_pruning_and_sparsity_tutorial.md: yolov5/tutorials/model_pruning_and_sparsity.md
yolov5/tutorials/yolov5_jetson_nano_tutorial.md: guides/nvidia-jetson.md
yolov5/tutorials/running_on_jetson_nano.md: guides/nvidia-jetson.md
yolov5/tutorials/yolov5_roboflow_integration.md: yolov5/tutorials/roboflow_datasets_integration.md
yolov5/tutorials/hyperparameter_evolution_tutorial.md: yolov5/tutorials/hyperparameter_evolution.md
yolov5/tutorials/yolov5_hyperparameter_evolution_tutorial.md: yolov5/tutorials/hyperparameter_evolution.md
yolov5/tutorials/clearml_integration_tutorial.md: yolov5/tutorials/clearml_logging_integration.md
yolov5/tutorials/test_time_augmentation_tutorial.md: yolov5/tutorials/test_time_augmentation.md
yolov5/tutorials/yolov5_test_time_augmentation_tutorial.md: yolov5/tutorials/test_time_augmentation.md
yolov5/environments/yolov5_amazon_web_services_quickstart_tutorial.md: yolov5/environments/aws_quickstart_tutorial.md
yolov5/environments/yolov5_google_cloud_platform_quickstart_tutorial.md: yolov5/environments/google_cloud_quickstart_tutorial.md
yolov5/environments/yolov5_docker_image_quickstart_tutorial.md: yolov5/environments/docker_image_quickstart_tutorial.md
reference/data/explorer/explorer.md: datasets/explorer/index.md
reference/data/explorer/gui/dash.md: datasets/explorer/index.md
reference/data/explorer/utils.md: datasets/explorer/index.md
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment