yolov5-qat

7a650e36 · mashun1 · 7a650e36 · 7a650e36 · 7a650e36 · 7a650e36
Commit 7a650e36 authored Mar 21, 2024 by mashun1
20 changed files
--- a/models/hub/yolov5s-ghost.yaml
+++ b/models/hub/yolov5s-ghost.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3Ghost, [128]],
+    [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3Ghost, [256]],
+    [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3Ghost, [512]],
+    [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3Ghost, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, GhostConv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3Ghost, [512, False]], # 13
+    [-1, 1, GhostConv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
+    [-1, 1, GhostConv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, GhostConv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5s-transformer.yaml
+++ b/models/hub/yolov5s-transformer.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5s6.yaml
+++ b/models/hub/yolov5s6.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/models/hub/yolov5x6.yaml
+++ b/models/hub/yolov5x6.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.33 # model depth multiple
+width_multiple: 1.25 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/models/segment/yolov5l-seg.yaml
+++ b/models/segment/yolov5l-seg.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/models/segment/yolov5m-seg.yaml
+++ b/models/segment/yolov5m-seg.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.67 # model depth multiple
+width_multiple: 0.75 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/models/segment/yolov5n-seg.yaml
+++ b/models/segment/yolov5n-seg.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/models/segment/yolov5s-seg.yaml
+++ b/models/segment/yolov5s-seg.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.5 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/models/segment/yolov5x-seg.yaml
+++ b/models/segment/yolov5x-seg.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.33 # model depth multiple
+width_multiple: 1.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/models/tf.py
+++ b/models/tf.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+TensorFlow, Keras and TFLite versions of YOLOv5
+Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
+Usage:
+    $ python models/tf.py --weights yolov5s.pt
+Export:
+    $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
+"""
+import argparse
+import sys
+from copy import deepcopy
+from pathlib import Path
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = ROOT.relative_to(Path.cwd())  # relative
+import numpy as np
+import tensorflow as tf
+import torch
+import torch.nn as nn
+from tensorflow import keras
+from models.common import (
+    C3,
+    SPP,
+    SPPF,
+    Bottleneck,
+    BottleneckCSP,
+    C3x,
+    Concat,
+    Conv,
+    CrossConv,
+    DWConv,
+    DWConvTranspose2d,
+    Focus,
+    autopad,
+)
+from models.experimental import MixConv2d, attempt_load
+from models.yolo import Detect, Segment
+from utils.activations import SiLU
+from utils.general import LOGGER, make_divisible, print_args
+class TFBN(keras.layers.Layer):
+    # TensorFlow BatchNormalization wrapper
+    def __init__(self, w=None):
+        """Initializes a TensorFlow BatchNormalization layer with optional pretrained weights."""
+        super().__init__()
+        self.bn = keras.layers.BatchNormalization(
+            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
+            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
+            moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
+            moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
+            epsilon=w.eps,
+        )
+    def call(self, inputs):
+        """Applies batch normalization to the inputs."""
+        return self.bn(inputs)
+class TFPad(keras.layers.Layer):
+    # Pad inputs in spatial dimensions 1 and 2
+    def __init__(self, pad):
+        """
+        Initializes a padding layer for spatial dimensions 1 and 2 with specified padding, supporting both int and tuple
+        inputs.
+        Inputs are
+        """
+        super().__init__()
+        if isinstance(pad, int):
+            self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
+        else:  # tuple/list
+            self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
+    def call(self, inputs):
+        """Pads input tensor with zeros using specified padding, suitable for int and tuple pad dimensions."""
+        return tf.pad(inputs, self.pad, mode="constant", constant_values=0)
+class TFConv(keras.layers.Layer):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        """
+        Initializes a standard convolution layer with optional batch normalization and activation; supports only
+        group=1.
+        Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
+        # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
+        conv = keras.layers.Conv2D(
+            filters=c2,
+            kernel_size=k,
+            strides=s,
+            padding="SAME" if s == 1 else "VALID",
+            use_bias=not hasattr(w, "bn"),
+            kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
+        )
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+    def call(self, inputs):
+        """Applies convolution, batch normalization, and activation function to input tensors."""
+        return self.act(self.bn(self.conv(inputs)))
+class TFDWConv(keras.layers.Layer):
+    # Depthwise convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
+        """
+        Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow
+        models.
+        Input are ch_in, ch_out, weights, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
+        conv = keras.layers.DepthwiseConv2D(
+            kernel_size=k,
+            depth_multiplier=c2 // c1,
+            strides=s,
+            padding="SAME" if s == 1 else "VALID",
+            use_bias=not hasattr(w, "bn"),
+            depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
+        )
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+    def call(self, inputs):
+        """Applies convolution, batch normalization, and activation function to input tensors."""
+        return self.act(self.bn(self.conv(inputs)))
+class TFDWConvTranspose2d(keras.layers.Layer):
+    # Depthwise ConvTranspose2d
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
+        """
+        Initializes depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.
+        Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
+        assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
+        weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
+        self.c1 = c1
+        self.conv = [
+            keras.layers.Conv2DTranspose(
+                filters=1,
+                kernel_size=k,
+                strides=s,
+                padding="VALID",
+                output_padding=p2,
+                use_bias=True,
+                kernel_initializer=keras.initializers.Constant(weight[..., i : i + 1]),
+                bias_initializer=keras.initializers.Constant(bias[i]),
+            )
+            for i in range(c1)
+        ]
+    def call(self, inputs):
+        """Processes input through parallel convolutions and concatenates results, trimming border pixels."""
+        return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
+class TFFocus(keras.layers.Layer):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        """
+        Initializes TFFocus layer to focus width and height information into channel space with custom convolution
+        parameters.
+        Inputs are ch_in, ch_out, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
+    def call(self, inputs):
+        """
+        Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4.
+        Example x(b,w,h,c) -> y(b,w/2,h/2,4c).
+        """
+        inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
+        return self.conv(tf.concat(inputs, 3))
+class TFBottleneck(keras.layers.Layer):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with optional
+        shortcut.
+        Arguments are ch_in, ch_out, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+    def call(self, inputs):
+        """Performs forward pass; if shortcut is True & input/output channels match, adds input to the convolution
+        result.
+        """
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+class TFCrossConv(keras.layers.Layer):
+    # Cross Convolution
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
+        """Initializes cross convolution layer with optional expansion, grouping, and shortcut addition capabilities."""
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
+        self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+    def call(self, inputs):
+        """Passes input through two convolutions optionally adding the input if channel dimensions match."""
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+class TFConv2d(keras.layers.Layer):
+    # Substitution for PyTorch nn.Conv2D
+    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
+        """Initializes a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D functionality for given filter
+        sizes and stride.
+        """
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        self.conv = keras.layers.Conv2D(
+            filters=c2,
+            kernel_size=k,
+            strides=s,
+            padding="VALID",
+            use_bias=bias,
+            kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
+        )
+    def call(self, inputs):
+        """Applies a convolution operation to the inputs and returns the result."""
+        return self.conv(inputs)
+class TFBottleneckCSP(keras.layers.Layer):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes CSP bottleneck layer with specified channel sizes, count, shortcut option, groups, and expansion
+        ratio.
+        Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
+        self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
+        self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
+        self.bn = TFBN(w.bn)
+        self.act = lambda x: keras.activations.swish(x)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+    def call(self, inputs):
+        """Processes input through the model layers, concatenates, normalizes, activates, and reduces the output
+        dimensions.
+        """
+        y1 = self.cv3(self.m(self.cv1(inputs)))
+        y2 = self.cv2(inputs)
+        return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
+class TFC3(keras.layers.Layer):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes CSP Bottleneck with 3 convolutions, supporting optional shortcuts and group convolutions.
+        Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+    def call(self, inputs):
+        """
+        Processes input through a sequence of transformations for object detection (YOLOv5).
+        See https://github.com/ultralytics/yolov5.
+        """
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+class TFC3x(keras.layers.Layer):
+    # 3 module with cross-convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes layer with cross-convolutions for enhanced feature extraction in object detection models.
+        Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential(
+            [TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]
+        )
+    def call(self, inputs):
+        """Processes input through cascaded convolutions and merges features, returning the final tensor output."""
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+class TFSPP(keras.layers.Layer):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
+        """Initializes a YOLOv3-SPP layer with specific input/output channels and kernel sizes for pooling."""
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
+        self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]
+    def call(self, inputs):
+        """Processes input through two TFConv layers and concatenates with max-pooled outputs at intermediate stage."""
+        x = self.cv1(inputs)
+        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
+class TFSPPF(keras.layers.Layer):
+    # Spatial pyramid pooling-Fast layer
+    def __init__(self, c1, c2, k=5, w=None):
+        """Initializes a fast spatial pyramid pooling layer with customizable in/out channels, kernel size, and
+        weights.
+        """
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
+        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")
+    def call(self, inputs):
+        """Executes the model's forward pass, concatenating input features with three max-pooled versions before final
+        convolution.
+        """
+        x = self.cv1(inputs)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
+class TFDetect(keras.layers.Layer):
+    # TF YOLOv5 Detect layer
+    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
+        """Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image
+        size.
+        """
+        super().__init__()
+        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [tf.zeros(1)] * self.nl  # init grid
+        self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
+        self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
+        self.training = False  # set to False after building model
+        self.imgsz = imgsz
+        for i in range(self.nl):
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            self.grid[i] = self._make_grid(nx, ny)
+    def call(self, inputs):
+        """Performs forward pass through the model layers to predict object bounding boxes and classifications."""
+        z = []  # inference output
+        x = []
+        for i in range(self.nl):
+            x.append(self.m[i](inputs[i]))
+            # x(bs,20,20,255) to x(bs,3,20,20,85)
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
+            if not self.training:  # inference
+                y = x[i]
+                grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
+                anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
+                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
+                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
+                # Normalize xywh to 0-1 to reduce calibration error
+                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4 : 5 + self.nc]), y[..., 5 + self.nc :]], -1)
+                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
+        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        """Generates a 2D grid of coordinates in (x, y) format with shape [1, 1, ny*nx, 2]."""
+        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
+        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
+class TFSegment(TFDetect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
+        """Initializes YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation
+        models.
+        """
+        super().__init__(nc, anchors, ch, imgsz, w)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
+        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
+        self.detect = TFDetect.call
+    def call(self, x):
+        """Applies detection and proto layers on input, returning detections and optionally protos if training."""
+        p = self.proto(x[0])
+        # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0]))  # (optional) full-size protos
+        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p)
+class TFProto(keras.layers.Layer):
+    def __init__(self, c1, c_=256, c2=32, w=None):
+        """Initializes TFProto layer with convolutional and upsampling layers for feature extraction and
+        transformation.
+        """
+        super().__init__()
+        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
+        self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
+        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
+        self.cv3 = TFConv(c_, c2, w=w.cv3)
+    def call(self, inputs):
+        """Performs forward pass through the model, applying convolutions and upscaling on input tensor."""
+        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
+class TFUpsample(keras.layers.Layer):
+    # TF version of torch.nn.Upsample()
+    def __init__(self, size, scale_factor, mode, w=None):
+        """
+        Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor is
+        even.
+        Warning: all arguments needed including 'w'
+        """
+        super().__init__()
+        assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
+        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
+        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
+        # with default arguments: align_corners=False, half_pixel_centers=False
+        # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
+        #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))
+    def call(self, inputs):
+        """Applies upsample operation to inputs using nearest neighbor interpolation."""
+        return self.upsample(inputs)
+class TFConcat(keras.layers.Layer):
+    # TF version of torch.concat()
+    def __init__(self, dimension=1, w=None):
+        """Initializes a TensorFlow layer for NCHW to NHWC concatenation, requiring dimension=1."""
+        super().__init__()
+        assert dimension == 1, "convert only NCHW to NHWC concat"
+        self.d = 3
+    def call(self, inputs):
+        """Concatenates a list of tensors along the last dimension, used for NCHW to NHWC conversion."""
+        return tf.concat(inputs, self.d)
+def parse_model(d, ch, model, imgsz):
+    """Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments."""
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw, ch_mul = (
+        d["anchors"],
+        d["nc"],
+        d["depth_multiple"],
+        d["width_multiple"],
+        d.get("channel_multiple"),
+    )
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+    if not ch_mul:
+        ch_mul = 8
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
+        m_str = m
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except NameError:
+                pass
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [
+            nn.Conv2d,
+            Conv,
+            DWConv,
+            DWConvTranspose2d,
+            Bottleneck,
+            SPP,
+            SPPF,
+            MixConv2d,
+            Focus,
+            CrossConv,
+            BottleneckCSP,
+            C3,
+            C3x,
+        ]:
+            c1, c2 = ch[f], args[0]
+            c2 = make_divisible(c2 * gw, ch_mul) if c2 != no else c2
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3, C3x]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+        elif m in [Detect, Segment]:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, ch_mul)
+            args.append(imgsz)
+        else:
+            c2 = ch[f]
+        tf_m = eval("TF" + m_str.replace("nn.", ""))
+        m_ = (
+            keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)])
+            if n > 1
+            else tf_m(*args, w=model.model[i])
+        )  # module
+        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace("__main__.", "")  # module type
+        np = sum(x.numel() for x in torch_m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f"{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}")  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return keras.Sequential(layers), sorted(save)
+class TFModel:
+    # TF YOLOv5 model
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
+        """Initializes TF YOLOv5 model with specified configuration, channels, classes, model instance, and input
+        size.
+        """
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+        # Define model
+        if nc and nc != self.yaml["nc"]:
+            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override yaml value
+        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
+    def predict(
+        self,
+        inputs,
+        tf_nms=False,
+        agnostic_nms=False,
+        topk_per_class=100,
+        topk_all=100,
+        iou_thres=0.45,
+        conf_thres=0.25,
+    ):
+        y = []  # outputs
+        x = inputs
+        for m in self.model.layers:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            x = m(x)  # run
+            y.append(x if m.i in self.savelist else None)  # save output
+        # Add TensorFlow NMS
+        if tf_nms:
+            boxes = self._xywh2xyxy(x[0][..., :4])
+            probs = x[0][:, :, 4:5]
+            classes = x[0][:, :, 5:]
+            scores = probs * classes
+            if agnostic_nms:
+                nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
+            else:
+                boxes = tf.expand_dims(boxes, 2)
+                nms = tf.image.combined_non_max_suppression(
+                    boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False
+                )
+            return (nms,)
+        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
+        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
+        # xywh = x[..., :4]  # x(6300,4) boxes
+        # conf = x[..., 4:5]  # x(6300,1) confidences
+        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
+        # return tf.concat([conf, cls, xywh], 1)
+    @staticmethod
+    def _xywh2xyxy(xywh):
+        """Converts bounding box format from [x, y, w, h] to [x1, y1, x2, y2], where xy1=top-left and xy2=bottom-
+        right.
+        """
+        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
+        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
+class AgnosticNMS(keras.layers.Layer):
+    # TF Agnostic NMS
+    def call(self, input, topk_all, iou_thres, conf_thres):
+        """Performs agnostic NMS on input tensors using given thresholds and top-K selection."""
+        return tf.map_fn(
+            lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
+            input,
+            fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
+            name="agnostic_nms",
+        )
+    @staticmethod
+    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
+        """Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence
+        thresholds.
+        """
+        boxes, classes, scores = x
+        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
+        scores_inp = tf.reduce_max(scores, -1)
+        selected_inds = tf.image.non_max_suppression(
+            boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres
+        )
+        selected_boxes = tf.gather(boxes, selected_inds)
+        padded_boxes = tf.pad(
+            selected_boxes,
+            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
+            mode="CONSTANT",
+            constant_values=0.0,
+        )
+        selected_scores = tf.gather(scores_inp, selected_inds)
+        padded_scores = tf.pad(
+            selected_scores,
+            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+            mode="CONSTANT",
+            constant_values=-1.0,
+        )
+        selected_classes = tf.gather(class_inds, selected_inds)
+        padded_classes = tf.pad(
+            selected_classes,
+            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+            mode="CONSTANT",
+            constant_values=-1.0,
+        )
+        valid_detections = tf.shape(selected_inds)[0]
+        return padded_boxes, padded_scores, padded_classes, valid_detections
+def activations(act=nn.SiLU):
+    """Converts PyTorch activations to TensorFlow equivalents, supporting LeakyReLU, Hardswish, and SiLU/Swish."""
+    if isinstance(act, nn.LeakyReLU):
+        return lambda x: keras.activations.relu(x, alpha=0.1)
+    elif isinstance(act, nn.Hardswish):
+        return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
+    elif isinstance(act, (nn.SiLU, SiLU)):
+        return lambda x: keras.activations.swish(x)
+    else:
+        raise Exception(f"no matching TensorFlow activation found for PyTorch activation {act}")
+def representative_dataset_gen(dataset, ncalib=100):
+    """Generates a representative dataset for calibration by yielding transformed numpy arrays from the input
+    dataset.
+    """
+    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
+        im = np.transpose(img, [1, 2, 0])
+        im = np.expand_dims(im, axis=0).astype(np.float32)
+        im /= 255
+        yield [im]
+        if n >= ncalib:
+            break
+def run(
+    weights=ROOT / "yolov5s.pt",  # weights path
+    imgsz=(640, 640),  # inference size h,w
+    batch_size=1,  # batch size
+    dynamic=False,  # dynamic batch size
+):
+    # PyTorch model
+    im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
+    model = attempt_load(weights, device=torch.device("cpu"), inplace=True, fuse=False)
+    _ = model(im)  # inference
+    model.info()
+    # TensorFlow model
+    im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
+    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+    _ = tf_model.predict(im)  # inference
+    # Keras model
+    im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
+    keras_model.summary()
+    LOGGER.info("PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.")
+def parse_opt():
+    """Parses and returns command-line options for model inference, including weights path, image size, batch size, and
+    dynamic batching.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
+    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
+    parser.add_argument("--batch-size", type=int, default=1, help="batch size")
+    parser.add_argument("--dynamic", action="store_true", help="dynamic batch size")
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+def main(opt):
+    """Executes the YOLOv5 model run function with parsed command line options."""
+    run(**vars(opt))
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/models/yolo.py
+++ b/models/yolo.py
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+YOLO-specific modules.
+Usage:
+    $ python models/yolo.py --cfg yolov5s.yaml
+"""
+import argparse
+import contextlib
+import math
+import os
+import platform
+import sys
+from copy import deepcopy
+from pathlib import Path
+import torch
+import torch.nn as nn
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+if platform.system() != "Windows":
+    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+from models.common import (
+    C3,
+    C3SPP,
+    C3TR,
+    SPP,
+    SPPF,
+    Bottleneck,
+    BottleneckCSP,
+    C3Ghost,
+    C3x,
+    Classify,
+    Concat,
+    Contract,
+    Conv,
+    CrossConv,
+    DetectMultiBackend,
+    DWConv,
+    DWConvTranspose2d,
+    Expand,
+    Focus,
+    GhostBottleneck,
+    GhostConv,
+    Proto,
+)
+from models.experimental import MixConv2d
+from utils.autoanchor import check_anchor_order
+from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
+from utils.plots import feature_visualization
+from utils.torch_utils import (
+    fuse_conv_and_bn,
+    initialize_weights,
+    model_info,
+    profile,
+    scale_img,
+    select_device,
+    time_sync,
+)
+try:
+    import thop  # for FLOPs computation
+except ImportError:
+    thop = None
+class Detect(nn.Module):
+    # YOLOv5 Detect head for detection models
+    stride = None  # strides computed during build
+    dynamic = False  # force grid reconstruction
+    export = False  # export mode
+    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
+        """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
+        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
+        self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.inplace = inplace  # use inplace ops (e.g. slice assignment)
+    def forward(self, x):
+        """Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
+        z = []  # inference output
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            if not self.training:  # inference
+                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
+                if isinstance(self, Segment):  # (boxes + masks)
+                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
+                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
+                else:  # Detect (boxes only)
+                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
+                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf), 4)
+                z.append(y.view(bs, self.na * nx * ny, self.no))
+        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
+    def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
+        """Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
+        d = self.anchors[i].device
+        t = self.anchors[i].dtype
+        shape = 1, self.na, ny, nx, 2  # grid shape
+        y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
+        yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x)  # torch>=0.7 compatibility
+        grid = torch.stack((xv, yv), 2).expand(shape) - 0.5  # add grid offset, i.e. y = 2.0 * x - 0.5
+        anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
+        return grid, anchor_grid
+class Segment(Detect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
+        """Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
+        super().__init__(nc, anchors, ch, inplace)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = Detect.forward
+    def forward(self, x):
+        """Processes input through the network, returning detections and prototypes; adjusts output based on
+        training/export mode.
+        """
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
+class BaseModel(nn.Module):
+    """YOLOv5 base model."""
+    def forward(self, x, profile=False, visualize=False):
+        """Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
+        visualization.
+        """
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+    def _forward_once(self, x, profile=False, visualize=False):
+        """Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            if profile:
+                self._profile_one_layer(m, x, dt)
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+            if visualize:
+                feature_visualization(x, m.type, m.i, save_dir=visualize)
+        return x
+    def _profile_one_layer(self, m, x, dt):
+        """Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
+        c = m == self.model[-1]  # is final layer, copy input as inplace fix
+        o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0  # FLOPs
+        t = time_sync()
+        for _ in range(10):
+            m(x.copy() if c else x)
+        dt.append((time_sync() - t) * 100)
+        if m == self.model[0]:
+            LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  module")
+        LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}")
+        if c:
+            LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
+    def fuse(self):
+        """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
+        LOGGER.info("Fusing layers... ")
+        for m in self.model.modules():
+            if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, "bn")  # remove batchnorm
+                m.forward = m.forward_fuse  # update forward
+        self.info()
+        return self
+    def info(self, verbose=False, img_size=640):
+        """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
+        model_info(self, verbose, img_size)
+    def _apply(self, fn):
+        """Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
+        buffers.
+        """
+        self = super()._apply(fn)
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
+        return self
+class DetectionModel(BaseModel):
+    # YOLOv5 detection model
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
+        """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg, encoding="ascii", errors="ignore") as f:
+                self.yaml = yaml.safe_load(f)  # model dict
+        # Define model
+        ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
+        if nc and nc != self.yaml["nc"]:
+            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override yaml value
+        if anchors:
+            LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
+            self.yaml["anchors"] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
+        self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
+        self.inplace = self.yaml.get("inplace", True)
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+            s = 256  # 2x min stride
+            m.inplace = self.inplace
+            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
+            check_anchor_order(m)
+            m.anchors /= m.stride.view(-1, 1, 1)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        LOGGER.info("")
+    def forward(self, x, augment=False, profile=False, visualize=False):
+        """Performs single-scale or augmented inference and may include profiling or visualization."""
+        if augment:
+            return self._forward_augment(x)  # augmented inference, None
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+    def _forward_augment(self, x):
+        """Performs augmented inference across different scales and flips, returning combined detections."""
+        img_size = x.shape[-2:]  # height, width
+        s = [1, 0.83, 0.67]  # scales
+        f = [None, 3, None]  # flips (2-ud, 3-lr)
+        y = []  # outputs
+        for si, fi in zip(s, f):
+            xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+            yi = self._forward_once(xi)[0]  # forward
+            # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+            yi = self._descale_pred(yi, fi, si, img_size)
+            y.append(yi)
+        y = self._clip_augmented(y)  # clip augmented tails
+        return torch.cat(y, 1), None  # augmented inference, train
+    def _descale_pred(self, p, flips, scale, img_size):
+        """De-scales predictions from augmented inference, adjusting for flips and image size."""
+        if self.inplace:
+            p[..., :4] /= scale  # de-scale
+            if flips == 2:
+                p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
+            elif flips == 3:
+                p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
+        else:
+            x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
+            if flips == 2:
+                y = img_size[0] - y  # de-flip ud
+            elif flips == 3:
+                x = img_size[1] - x  # de-flip lr
+            p = torch.cat((x, y, wh, p[..., 4:]), -1)
+        return p
+    def _clip_augmented(self, y):
+        """Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
+        layer counts.
+        """
+        nl = self.model[-1].nl  # number of detection layers (P3-P5)
+        g = sum(4**x for x in range(nl))  # grid points
+        e = 1  # exclude layer count
+        i = (y[0].shape[1] // g) * sum(4**x for x in range(e))  # indices
+        y[0] = y[0][:, :-i]  # large
+        i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e))  # indices
+        y[-1] = y[-1][:, i:]  # small
+        return y
+    def _initialize_biases(self, cf=None):
+        """
+        Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
+        For details see https://arxiv.org/abs/1708.02002 section 3.3.
+        """
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5 : 5 + m.nc] += (
+                math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
+            )  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
+class SegmentationModel(DetectionModel):
+    # YOLOv5 segmentation model
+    def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
+        """Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list)."""
+        super().__init__(cfg, ch, nc, anchors)
+class ClassificationModel(BaseModel):
+    # YOLOv5 classification model
+    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
+        """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
+        index.
+        """
+        super().__init__()
+        self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
+    def _from_detection_model(self, model, nc=1000, cutoff=10):
+        """Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
+        layer.
+        """
+        if isinstance(model, DetectMultiBackend):
+            model = model.model  # unwrap DetectMultiBackend
+        model.model = model.model[:cutoff]  # backbone
+        m = model.model[-1]  # last layer
+        ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels  # ch into module
+        c = Classify(ch, nc)  # Classify()
+        c.i, c.f, c.type = m.i, m.f, "models.common.Classify"  # index, from, type
+        model.model[-1] = c  # replace
+        self.model = model.model
+        self.stride = model.stride
+        self.save = []
+        self.nc = nc
+    def _from_yaml(self, cfg):
+        """Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
+        self.model = None
+def parse_model(d, ch):
+    # ch 输出通道数
+    """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw, act, ch_mul = (
+        d["anchors"],
+        d["nc"],
+        d["depth_multiple"],
+        d["width_multiple"],
+        d.get("activation"),
+        d.get("channel_multiple"),
+    )
+    if act:
+        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
+    if not ch_mul:
+        ch_mul = 8
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)  5 = anchor包含物体的概率，相对于中心的偏移量，宽度，高度？
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings module
+        for j, a in enumerate(args):
+            with contextlib.suppress(NameError):
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in {
+            Conv,
+            GhostConv,
+            Bottleneck,
+            GhostBottleneck,
+            SPP,
+            SPPF,
+            DWConv,
+            MixConv2d,
+            Focus,
+            CrossConv,
+            BottleneckCSP,
+            C3,
+            C3TR,
+            C3SPP,
+            C3Ghost,
+            nn.ConvTranspose2d,
+            DWConvTranspose2d,
+            C3x,
+        }:
+            c1, c2 = ch[f], args[0]  # 输入通道，输出通道数
+            if c2 != no:  # if not output  不是最后一层
+                c2 = make_divisible(c2 * gw, ch_mul)  # 对输出通道数进行操作
+            args = [c1, c2, *args[1:]]
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[x] for x in f)
+        # TODO: channel, gw, gd
+        elif m in {Detect, Segment}:
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, ch_mul)
+        elif m is Contract:
+            c2 = ch[f] * args[0] ** 2
+        elif m is Expand:
+            c2 = ch[f] // args[0] ** 2
+        else:
+            c2 = ch[f]
+        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace("__main__.", "")  # module type
+        np = sum(x.numel() for x in m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}")  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
+    parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--profile", action="store_true", help="profile model speed")
+    parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
+    parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
+    opt = parser.parse_args()
+    opt.cfg = check_yaml(opt.cfg)  # check YAML
+    print_args(vars(opt))
+    device = select_device(opt.device)
+    # Create model
+    im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
+    model = Model(opt.cfg).to(device)
+    # Options
+    if opt.line_profile:  # profile layer by layer
+        model(im, profile=True)
+    elif opt.profile:  # profile forward-backward
+        results = profile(input=im, ops=[model], n=3)
+    elif opt.test:  # test all models
+        for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
+            try:
+                _ = Model(cfg)
+            except Exception as e:
+                print(f"Error in {cfg}: {e}")
+    else:  # report fused model summary
+        model.fuse()
--- a/models/yolov5l.yaml
+++ b/models/yolov5l.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5m.yaml
+++ b/models/yolov5m.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.67 # model depth multiple
+width_multiple: 0.75 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5n.yaml
+++ b/models/yolov5n.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5s.yaml
+++ b/models/yolov5s.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8  # 宽度，高度 ...
+  - [30, 61, 62, 45, 59, 119] # P4/16  # 宽度，高度 ...
+  - [116, 90, 156, 198, 373, 326] # P5/32  # 宽度，高度 ...
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/models/yolov5x.yaml
+++ b/models/yolov5x.yaml
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.33 # model depth multiple
+width_multiple: 1.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/pyproject.toml
+++ b/pyproject.toml
+# Ultralyticsv5 YOLO 🚀, AGPL-3.0 license
+# Overview:
+# This pyproject.toml file manages the build, packaging, and distribution of the Ultralytics library.
+# It defines essential project metadata, dependencies, and settings used to develop and deploy the library.
+# Key Sections:
+# - [build-system]: Specifies the build requirements and backend (e.g., setuptools, wheel).
+# - [project]: Includes details like name, version, description, authors, dependencies and more.
+# - [project.optional-dependencies]: Provides additional, optional packages for extended features.
+# - [tool.*]: Configures settings for various tools (pytest, yapf, etc.) used in the project.
+# Installation:
+# The Ultralytics library can be installed using the command: 'pip install ultralytics'
+# For development purposes, you can install the package in editable mode with: 'pip install -e .'
+# This approach allows for real-time code modifications without the need for re-installation.
+# Documentation:
+# For comprehensive documentation and usage instructions, visit: https://docs.ultralytics.com
+[build-system]
+requires = ["setuptools>=43.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+# Project settings -----------------------------------------------------------------------------------------------------
+[project]
+name = "YOLOv5"
+description = "Ultralytics YOLOv5 for SOTA object detection, instance segmentation and image classification."
+readme = "README.md"
+requires-python = ">=3.8"
+license = { "text" = "AGPL-3.0" }
+keywords = ["machine-learning", "deep-learning", "computer-vision", "ML", "DL", "AI", "YOLO", "YOLOv3", "YOLOv5", "YOLOv8", "HUB", "Ultralytics"]
+authors = [
+    { name = "Glenn Jocher" },
+    { name = "Ayush Chaurasia" },
+    { name = "Jing Qiu" }
+]
+maintainers = [
+    { name = "Glenn Jocher" },
+    { name = "Ayush Chaurasia" },
+    { name = "Jing Qiu" }
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Software Development",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Image Recognition",
+    "Operating System :: POSIX :: Linux",
+    "Operating System :: MacOS",
+    "Operating System :: Microsoft :: Windows",
+]
+# Required dependencies ------------------------------------------------------------------------------------------------
+dependencies = [
+    "matplotlib>=3.3.0",
+    "numpy>=1.22.2",
+    "opencv-python>=4.6.0",
+    "pillow>=7.1.2",
+    "pyyaml>=5.3.1",
+    "requests>=2.23.0",
+    "scipy>=1.4.1",
+    "torch>=1.8.0",
+    "torchvision>=0.9.0",
+    "tqdm>=4.64.0", # progress bars
+    "psutil", # system utilization
+    "py-cpuinfo", # display CPU info
+    "thop>=0.1.1", # FLOPs computation
+    "pandas>=1.1.4",
+    "seaborn>=0.11.0", # plotting
+    "ultralytics>=8.0.232"
+]
+# Optional dependencies ------------------------------------------------------------------------------------------------
+[project.optional-dependencies]
+dev = [
+    "ipython",
+    "check-manifest",
+    "pre-commit",
+    "pytest",
+    "pytest-cov",
+    "coverage[toml]",
+    "mkdocs-material",
+    "mkdocstrings[python]",
+    "mkdocs-redirects", # for 301 redirects
+    "mkdocs-ultralytics-plugin>=0.0.34", # for meta descriptions and images, dates and authors
+]
+export = [
+    "onnx>=1.12.0", # ONNX export
+    "coremltools>=7.0; platform_system != 'Windows'", # CoreML only supported on macOS and Linux
+    "openvino-dev>=2023.0", # OpenVINO export
+    "tensorflow<=2.13.1", # TF bug https://github.com/ultralytics/ultralytics/issues/5161
+    "tensorflowjs>=3.9.0", # TF.js export, automatically installs tensorflow
+]
+# tensorflow>=2.4.1,<=2.13.1  # TF exports (-cpu, -aarch64, -macos)
+# tflite-support  # for TFLite model metadata
+# scikit-learn==0.19.2  # CoreML quantization
+# nvidia-pyindex  # TensorRT export
+# nvidia-tensorrt  # TensorRT export
+logging = [
+    "comet", # https://docs.ultralytics.com/integrations/comet/
+    "tensorboard>=2.13.0",
+    "dvclive>=2.12.0",
+]
+extra = [
+    "ipython", # interactive notebook
+    "albumentations>=1.0.3", # training augmentations
+    "pycocotools>=2.0.6", # COCO mAP
+]
+[project.urls]
+"Bug Reports" = "https://github.com/ultralytics/yolov5/issues"
+"Funding" = "https://ultralytics.com"
+"Source" = "https://github.com/ultralytics/yolov5/"
+# Tools settings -------------------------------------------------------------------------------------------------------
+[tool.pytest]
+norecursedirs = [".git", "dist", "build"]
+addopts = "--doctest-modules --durations=30 --color=yes"
+[tool.isort]
+line_length = 120
+multi_line_output = 0
+[tool.ruff]
+line-length = 120
+[tool.docformatter]
+wrap-summaries = 120
+wrap-descriptions = 120
+in-place = true
+pre-summary-newline = true
+close-quotes-on-newline = true
+[tool.codespell]
+ignore-words-list = "crate,nd,strack,dota,ane,segway,fo,gool,winn,commend"
+skip = '*.csv,*venv*,docs/??/,docs/mkdocs_??.yml'
--- a/quantization/quantize.py
+++ b/quantization/quantize.py
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+import os
+import re
+from typing import List, Callable, Union, Dict
+from tqdm import tqdm
+from copy import deepcopy
+# PyTorch
+import torch
+import torch.optim as optim
+from torch.cuda import amp
+# Pytorch Quantization
+from pytorch_quantization import nn as quant_nn
+from pytorch_quantization.nn.modules import _utils 
+from pytorch_quantization.nn.modules import _utils as quant_nn_utils
+from pytorch_quantization import calib
+from pytorch_quantization.tensor_quant import QuantDescriptor
+from pytorch_quantization import quant_modules
+from pytorch_quantization import tensor_quant
+from absl import logging as quant_logging
+import models
+# Custom Rules
+from quantization.rules import find_quantizer_pairs
+class QuantConcat(torch.nn.Module, _utils.QuantInputMixin):
+    def __init__(self, dimension=1):
+        super(QuantConcat, self).__init__()
+        self._input_quantizer = quant_nn.TensorQuantizer(QuantDescriptor(num_bits=8, calib_method="histogram"))
+        self._input_quantizer._calibrator._torch_hist = True  # 什么意思
+        self.dimension = dimension         
+    def forward(self, inputs):
+        inputs = [self._input_quantizer(input) for input in inputs]
+        return torch.cat(inputs, self.dimension)
+class QuantSiLU(torch.nn.Module, _utils.QuantInputMixin):
+    def __init__(self, **kwargs):
+        super(QuantSiLU, self).__init__()
+        self._input0_quantizer = quant_nn.TensorQuantizer(QuantDescriptor(num_bits=8, calib_method="histogram"))
+        self._input1_quantizer = quant_nn.TensorQuantizer(QuantDescriptor(num_bits=8, calib_method="histogram"))
+        self._input0_quantizer._calibrator._torch_hist = True
+        self._input1_quantizer._calibrator._torch_hist = True
+    def forward(self, input):
+        return self._input0_quantizer(input) * self._input1_quantizer(torch.sigmoid(input))
+class QuantAdd(torch.nn.Module):
+    def __init__(self, quantization):
+        super().__init__()
+        if quantization:
+            self._input0_quantizer = quant_nn.TensorQuantizer(QuantDescriptor(num_bits=8, calib_method="histogram"))
+            self._input0_quantizer._calibrator._torch_hist = True
+            self._fake_quant = True
+        self.quantization = quantization
+    def forward(self, x, y):
+        if self.quantization:
+            return self._input0_quantizer(x) + self._input0_quantizer(y)
+        return x + y
+class disable_quantization:
+    def __init__(self, model):
+        self.model  = model
+    def apply(self, disabled=True):
+        for name, module in self.model.named_modules():
+            if isinstance(module, quant_nn.TensorQuantizer):
+                module._disabled = disabled
+    def __enter__(self):
+        self.apply(True)
+    def __exit__(self, *args, **kwargs):
+        self.apply(False)
+class enable_quantization:
+    def __init__(self, model):
+        self.model  = model
+    def apply(self, enabled=True):
+        for name, module in self.model.named_modules():
+            if isinstance(module, quant_nn.TensorQuantizer):
+                module._disabled = not enabled
+    def __enter__(self):
+        self.apply(True)
+        return self
+    def __exit__(self, *args, **kwargs):
+        self.apply(False)
+def have_quantizer(module):
+    for name, module in module.named_modules():
+        if isinstance(module, quant_nn.TensorQuantizer):
+            return True
+def print_quantizer(module):
+    for name, module in module.named_modules():
+        if isinstance(module, quant_nn.TensorQuantizer):
+            print(name,module)
+# Initialize PyTorch Quantization
+def initialize(all_node_with_qdq = False):
+    quant_desc_input = QuantDescriptor(calib_method="histogram")
+    quant_nn.QuantConv2d.set_default_quant_desc_input(quant_desc_input)
+    quant_nn.QuantMaxPool2d.set_default_quant_desc_input(quant_desc_input)
+    quant_nn.QuantLinear.set_default_quant_desc_input(quant_desc_input)
+    quant_logging.set_verbosity(quant_logging.ERROR)
+    if all_node_with_qdq:
+        quant_modules._DEFAULT_QUANT_MAP.extend(
+            [quant_modules._quant_entry(torch.nn, "SiLU", QuantSiLU),
+            quant_modules._quant_entry(models.common, "Concat", QuantConcat)]
+        )
+def transfer_torch_to_quantization(nninstance : torch.nn.Module, quantmodule, all_node_with_qdq):
+    quant_instance = quantmodule.__new__(quantmodule)
+    for k, val in vars(nninstance).items():
+        setattr(quant_instance, k, val)
+    def __init__(self):
+        if all_node_with_qdq and (self.__class__.__name__ == 'QuantSiLU' or self.__class__.__name__=='QuantConcat'):
+            self.__init__()
+        elif isinstance(self, quant_nn_utils.QuantInputMixin):
+            quant_desc_input = quant_nn_utils.pop_quant_desc_in_kwargs(self.__class__, input_only=True)
+            self.init_quantizer(quant_desc_input)
+            # Turn on torch_hist to enable higher calibration speeds
+            if isinstance(self._input_quantizer._calibrator, calib.HistogramCalibrator):
+                self._input_quantizer._calibrator._torch_hist = True
+        else:
+            quant_desc_input, quant_desc_weight = quant_nn_utils.pop_quant_desc_in_kwargs(self.__class__)
+            self.init_quantizer(quant_desc_input, quant_desc_weight)
+            # Turn on torch_hist to enable higher calibration speeds
+            if isinstance(self._input_quantizer._calibrator, calib.HistogramCalibrator):
+                self._input_quantizer._calibrator._torch_hist = True
+                self._weight_quantizer._calibrator._torch_hist = True
+    __init__(quant_instance)
+    return quant_instance
+def quantization_ignore_match(ignore_policy : Union[str, List[str], Callable], path : str) -> bool:
+    if ignore_policy is None: return False
+    if isinstance(ignore_policy, Callable):
+        return ignore_policy(path)
+    if isinstance(ignore_policy, str) or isinstance(ignore_policy, List):
+        if isinstance(ignore_policy, str):
+            ignore_policy = [ignore_policy]
+        if path in ignore_policy: return True
+        for item in ignore_policy:
+            if re.match(item, path):
+                return True
+    return False
+def bottleneck_quant_forward(self, x):
+    if hasattr(self, "addop"):
+        return self.addop(x, self.cv2(self.cv1(x))) if self.add else self.cv2(self.cv1(x))
+    return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+def replace_bottleneck_forward(model):
+    for name, bottleneck in model.named_modules():
+        if bottleneck.__class__.__name__ == "Bottleneck": 
+            if bottleneck.add:
+                if not hasattr(bottleneck, "addop"):
+                    print(f"Add QuantAdd to {name}")
+                    bottleneck.addop = QuantAdd(bottleneck.add)
+                bottleneck.__class__.forward = bottleneck_quant_forward
+def replace_to_quantization_module(model : torch.nn.Module, ignore_policy : Union[str, List[str], Callable] = None, all_node_with_qdq = False):
+    module_dict = {}
+    for entry in quant_modules._DEFAULT_QUANT_MAP:
+        module = getattr(entry.orig_mod, entry.mod_name)
+        module_dict[id(module)] = entry.replace_mod
+    def recursive_and_replace_module(module, prefix=""):
+        for name in module._modules:
+            submodule = module._modules[name]
+            path      = name if prefix == "" else prefix + "." + name
+            recursive_and_replace_module(submodule, path)
+            submodule_id = id(type(submodule))
+            if submodule_id in module_dict:
+                ignored = quantization_ignore_match(ignore_policy, path)
+                if ignored:
+                    print(f"Quantization: {path} has ignored.")
+                    continue
+                module._modules[name] = transfer_torch_to_quantization(submodule, module_dict[submodule_id], all_node_with_qdq)
+    recursive_and_replace_module(model)
+def get_attr_with_path(m, path):
+    def sub_attr(m, names):
+        name = names[0]
+        value = getattr(m, name)
+        if len(names) == 1:
+            return value
+        return sub_attr(value, names[1:])
+    array = [item for item in re.split("\.|/", path) if item]
+    return sub_attr(m, array)
+def apply_custom_rules_to_quantizer(model : torch.nn.Module, export_onnx : Callable):
+    # apply rules to graph
+    export_onnx(model, "quantization-custom-rules-temp.onnx")
+    pairs = find_quantizer_pairs("quantization-custom-rules-temp.onnx")
+    print(pairs)
+    for major, sub in pairs:
+        print(f"Rules: {sub} match to {major}")
+        get_attr_with_path(model, sub)._input_quantizer = get_attr_with_path(model, major)._input_quantizer
+    os.remove("quantization-custom-rules-temp.onnx")
+    for name, bottleneck in model.named_modules():
+        if bottleneck.__class__.__name__ == "Bottleneck":
+            if bottleneck.add:
+                print(f"Rules: {name}.add match to {name}.cv1")
+                major = bottleneck.cv1.conv._input_quantizer
+                bottleneck.addop._input0_quantizer = major
+                bottleneck.addop._input1_quantizer = major
+def calibrate_model(model : torch.nn.Module, dataloader, device, num_batch=25):
+    def compute_amax(model, **kwargs):
+        for name, module in model.named_modules():
+            if isinstance(module, quant_nn.TensorQuantizer):
+                if module._calibrator is not None:
+                    if isinstance(module._calibrator, calib.MaxCalibrator):
+                        module.load_calib_amax()
+                    else:
+                        module.load_calib_amax(**kwargs)
+                    module._amax = module._amax.to(device)
+    def collect_stats(model, data_loader, device, num_batch=200):
+        """Feed data to the network and collect statistics"""
+        # Enable calibrators
+        model.eval()
+        for name, module in model.named_modules():
+            if isinstance(module, quant_nn.TensorQuantizer):
+                if module._calibrator is not None:
+                    module.disable_quant()
+                    module.enable_calib()
+                else:
+                    module.disable()
+        # Feed data to the network for collecting stats
+        with torch.no_grad():
+            for i, datas in tqdm(enumerate(data_loader), total=num_batch, desc="Collect stats for calibrating"):
+                imgs = datas[0].to(device, non_blocking=True).float() / 255.0
+                model(imgs)
+                if i >= num_batch:
+                    break
+        # Disable calibrators
+        for name, module in model.named_modules():
+            if isinstance(module, quant_nn.TensorQuantizer):
+                if module._calibrator is not None:
+                    module.enable_quant()
+                    module.disable_calib()
+                else:
+                    module.enable()
+    collect_stats(model, dataloader, device, num_batch=num_batch)
+    compute_amax(model, method="mse")
+def finetune(
+    model : torch.nn.Module, train_dataloader, per_epoch_callback : Callable = None, preprocess : Callable = None,
+    nepochs=10, early_exit_batchs_per_epoch=1000, lrschedule : Dict = None, fp16=True, learningrate=1e-5,
+    supervision_policy : Callable = None
+):
+    origin_model = deepcopy(model).eval()
+    disable_quantization(origin_model).apply()
+    model.train()
+    model.requires_grad_(True)
+    scaler       = amp.GradScaler(enabled=fp16)
+    optimizer    = optim.Adam(model.parameters(), learningrate)
+    quant_lossfn = torch.nn.MSELoss()
+    device       = next(model.parameters()).device
+    if lrschedule is None:
+        lrschedule = {
+            0: 1e-6,
+            3: 1e-5,
+            8: 1e-6
+        }
+    def make_layer_forward_hook(l):
+        def forward_hook(m, input, output):
+            l.append(output)
+        return forward_hook
+    supervision_module_pairs = []
+    for ((mname, ml), (oriname, ori)) in zip(model.named_modules(), origin_model.named_modules()):
+        if isinstance(ml, quant_nn.TensorQuantizer): continue
+        if supervision_policy:
+            if not supervision_policy(mname, ml):
+                continue
+        supervision_module_pairs.append([ml, ori])
+    for iepoch in range(nepochs):
+        if iepoch in lrschedule:
+            learningrate = lrschedule[iepoch]
+            for g in optimizer.param_groups:
+                g["lr"] = learningrate
+        model_outputs  = []
+        origin_outputs = []
+        remove_handle  = []
+        for ml, ori in supervision_module_pairs:
+            remove_handle.append(ml.register_forward_hook(make_layer_forward_hook(model_outputs))) 
+            remove_handle.append(ori.register_forward_hook(make_layer_forward_hook(origin_outputs)))
+        model.train()
+        pbar = tqdm(train_dataloader, desc="QAT", total=early_exit_batchs_per_epoch)
+        for ibatch, imgs in enumerate(pbar):
+            if ibatch >= early_exit_batchs_per_epoch:
+                break
+            if preprocess:
+                imgs = preprocess(imgs)
+            imgs = imgs.to(device)
+            with amp.autocast(enabled=fp16):
+                model(imgs)
+                with torch.no_grad():
+                    origin_model(imgs)
+                quant_loss = 0
+                for index, (mo, fo) in enumerate(zip(model_outputs, origin_outputs)):
+                    quant_loss += quant_lossfn(mo, fo)
+                model_outputs.clear()
+                origin_outputs.clear()
+            if fp16:
+                scaler.scale(quant_loss).backward()
+                scaler.step(optimizer)
+                scaler.update()
+            else:
+                quant_loss.backward()
+                optimizer.step()
+            optimizer.zero_grad()
+            pbar.set_description(f"QAT Finetuning {iepoch + 1} / {nepochs}, Loss: {quant_loss.detach().item():.5f}, LR: {learningrate:g}")
+        # You must remove hooks during onnx export or torch.save
+        for rm in remove_handle:
+            rm.remove()
+        if per_epoch_callback:
+            if per_epoch_callback(model, iepoch, learningrate):
+                break
+def export_onnx(model, input, file, *args, **kwargs):
+    quant_nn.TensorQuantizer.use_fb_fake_quant = True
+    model.eval()
+    with torch.no_grad():
+        torch.onnx.export(model, input, file, *args, **kwargs)
+    quant_nn.TensorQuantizer.use_fb_fake_quant = False
\ No newline at end of file
--- a/quantization/rules.py
+++ b/quantization/rules.py
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+import onnx
+def find_with_input_node(model, name):
+    for node in model.graph.node:
+        if len(node.input) > 0 and name in node.input:
+            return node
+def find_all_with_input_node(model, name):
+    all = []
+    for node in model.graph.node:
+        if len(node.input) > 0 and name in node.input:
+            all.append(node)
+    return all
+def find_with_output_node(model, name):
+    for node in model.graph.node:
+        if len(node.output) > 0 and name in node.output:
+            return node
+def find_with_no_change_parent_node(model, node):
+    parent = find_with_output_node(model, node.input[0])
+    if parent is not None:
+        if parent.op_type in ["Concat", "MaxPool"]:
+            return find_with_no_change_parent_node(model, parent)
+    return parent
+def find_quantizelinear_conv(model, qnode):
+    dq   = find_with_input_node(model, qnode.output[0])
+    conv = find_with_input_node(model, dq.output[0])
+    return conv
+def find_quantize_conv_name(model, weight_qname):
+    dq = find_with_output_node(model, weight_qname)
+    q  = find_with_output_node(model, dq.input[0])
+    return ".".join(q.input[0].split(".")[:-1])
+def find_quantizer_pairs(onnx_file):
+    model = onnx.load(onnx_file)
+    match_pairs = []
+    for node in model.graph.node:   
+        if node.op_type == "Concat":
+            qnodes = find_all_with_input_node(model, node.output[0])
+            major = None
+            for qnode in qnodes:
+                if qnode.op_type != "QuantizeLinear":
+                    continue
+                conv = find_quantizelinear_conv(model, qnode)
+                if major is None:
+                    major = find_quantize_conv_name(model, conv.input[1])
+                else:
+                    match_pairs.append([major, find_quantize_conv_name(model, conv.input[1])])
+                for subnode in model.graph.node:
+                    if len(subnode.input) > 0 and subnode.op_type == "QuantizeLinear" and subnode.input[0] in node.input:
+                        subconv = find_quantizelinear_conv(model, subnode)
+                        match_pairs.append([major, find_quantize_conv_name(model, subconv.input[1])])
+        elif node.op_type == "MaxPool":
+            qnode = find_with_input_node(model, node.output[0])
+            if not (qnode and qnode.op_type == "QuantizeLinear"):
+                continue
+            major = find_quantizelinear_conv(model, qnode)
+            major = find_quantize_conv_name(model, major.input[1])
+            same_input_nodes = find_all_with_input_node(model, node.input[0])
+            for same_input_node in same_input_nodes:
+                if same_input_node.op_type == "QuantizeLinear":
+                    subconv = find_quantizelinear_conv(model, same_input_node)
+                    match_pairs.append([major, find_quantize_conv_name(model, subconv.input[1])])
+    return match_pairs
\ No newline at end of file
--- a/readme_imgs/image-1.png
+++ b/readme_imgs/image-1.png