Initial commit

d1faa4b3 · fengyf1 · dca509fe · d1faa4b3 · d1faa4b3 · d1faa4b3
Commit d1faa4b3 authored Feb 05, 2026 by fengyf1
3 changed files
--- a/README.md
+++ b/README.md
--- a/onnxruntime-1.19.2+das.opt1.dtk25041-cp310-cp310-manylinux_2_28_x86_64.whl
+++ b/onnxruntime-1.19.2+das.opt1.dtk25041-cp310-cp310-manylinux_2_28_x86_64.whl
--- a/scripts/amg_amp_compile_v2_warmup.py
+++ b/scripts/amg_amp_compile_v2_warmup.py
+      
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import cv2  # type: ignore
+
+from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+
+import argparse
+import json
+import os
+from typing import Any, Dict, List
+
+parser = argparse.ArgumentParser(
+    description=(
+        "Runs automatic mask generation on an input image or directory of images, "
+        "and outputs masks as either PNGs or COCO-style RLEs. Requires open-cv, "
+        "as well as pycocotools if saving in RLE format."
+    )
+)
+
+parser.add_argument(
+    "--input",
+    type=str,
+    required=False,
+    default='/work/home/yuhai/ll/segment-anything-main/datasets',
+    help="Path to either a single input image or folder of images.",
+)
+
+parser.add_argument(
+    "--output",
+    type=str,
+    required=False,
+    default='/work/home/yuhai/ll/segment-anything-main/ouputs',
+    help=(
+        "Path to the directory where masks will be output. Output will be either a folder "
+        "of PNGs per image or a single json with COCO-style masks."
+    ),
+)
+
+parser.add_argument(
+    "--model-type",
+    type=str,
+    default='vit_h',
+    required=False,
+    help="The type of model to load, in ['default', 'vit_h', 'vit_l', 'vit_b']",
+)
+
+parser.add_argument(
+    "--checkpoint",
+    type=str,
+    required=False,
+    default='/work/home/yuhai/ll/segment-anything-main/sam_vit_h_4b8939.pth',
+    help="The path to the SAM checkpoint to use for mask generation.",
+)
+
+parser.add_argument("--device", type=str, default="cuda", help="The device to run generation on.")
+
+parser.add_argument(
+    "--convert-to-rle",
+    action="store_true",
+    help=(
+        "Save masks as COCO RLEs in a single json instead of as a folder of PNGs. "
+        "Requires pycocotools."
+    ),
+)
+
+amg_settings = parser.add_argument_group("AMG Settings")
+
+amg_settings.add_argument(
+    "--points-per-side",
+    type=int,
+    default=None,
+    help="Generate masks by sampling a grid over the image with this many points to a side.",
+)
+
+amg_settings.add_argument(
+    "--points-per-batch",
+    type=int,
+    default=None,
+    help="How many input points to process simultaneously in one batch.",
+)
+
+amg_settings.add_argument(
+    "--pred-iou-thresh",
+    type=float,
+    default=None,
+    help="Exclude masks with a predicted score from the model that is lower than this threshold.",
+)
+
+amg_settings.add_argument(
+    "--stability-score-thresh",
+    type=float,
+    default=None,
+    help="Exclude masks with a stability score lower than this threshold.",
+)
+
+amg_settings.add_argument(
+    "--stability-score-offset",
+    type=float,
+    default=None,
+    help="Larger values perturb the mask more when measuring stability score.",
+)
+
+amg_settings.add_argument(
+    "--box-nms-thresh",
+    type=float,
+    default=None,
+    help="The overlap threshold for excluding a duplicate mask.",
+)
+
+amg_settings.add_argument(
+    "--crop-n-layers",
+    type=int,
+    default=None,
+    help=(
+        "If >0, mask generation is run on smaller crops of the image to generate more masks. "
+        "The value sets how many different scales to crop at."
+    ),
+)
+
+amg_settings.add_argument(
+    "--crop-nms-thresh",
+    type=float,
+    default=None,
+    help="The overlap threshold for excluding duplicate masks across different crops.",
+)
+
+amg_settings.add_argument(
+    "--crop-overlap-ratio",
+    type=int,
+    default=None,
+    help="Larger numbers mean image crops will overlap more.",
+)
+
+amg_settings.add_argument(
+    "--crop-n-points-downscale-factor",
+    type=int,
+    default=None,
+    help="The number of points-per-side in each layer of crop is reduced by this factor.",
+)
+
+amg_settings.add_argument(
+    "--min-mask-region-area",
+    type=int,
+    default=None,
+    help=(
+        "Disconnected mask regions or holes with area smaller than this value "
+        "in pixels are removed by postprocessing."
+    ),
+)
+
+
+def write_masks_to_folder(masks: List[Dict[str, Any]], path: str) -> None:
+    header = "id,area,bbox_x0,bbox_y0,bbox_w,bbox_h,point_input_x,point_input_y,predicted_iou,stability_score,crop_box_x0,crop_box_y0,crop_box_w,crop_box_h"  # noqa
+    metadata = [header]
+    for i, mask_data in enumerate(masks):
+        mask = mask_data["segmentation"]
+        filename = f"{i}.png"
+        cv2.imwrite(os.path.join(path, filename), mask * 255)
+        mask_metadata = [
+            str(i),
+            str(mask_data["area"]),
+            *[str(x) for x in mask_data["bbox"]],
+            *[str(x) for x in mask_data["point_coords"][0]],
+            str(mask_data["predicted_iou"]),
+            str(mask_data["stability_score"]),
+            *[str(x) for x in mask_data["crop_box"]],
+        ]
+        row = ",".join(mask_metadata)
+        metadata.append(row)
+    metadata_path = os.path.join(path, "metadata.csv")
+    with open(metadata_path, "w") as f:
+        f.write("\n".join(metadata))
+
+    return
+
+
+def get_amg_kwargs(args):
+    amg_kwargs = {
+        "points_per_side": args.points_per_side,
+        "points_per_batch": args.points_per_batch,
+        "pred_iou_thresh": args.pred_iou_thresh,
+        "stability_score_thresh": args.stability_score_thresh,
+        "stability_score_offset": args.stability_score_offset,
+        "box_nms_thresh": args.box_nms_thresh,
+        "crop_n_layers": args.crop_n_layers,
+        "crop_nms_thresh": args.crop_nms_thresh,
+        "crop_overlap_ratio": args.crop_overlap_ratio,
+        "crop_n_points_downscale_factor": args.crop_n_points_downscale_factor,
+        "min_mask_region_area": args.min_mask_region_area,
+    }
+    amg_kwargs = {k: v for k, v in amg_kwargs.items() if v is not None}
+    return amg_kwargs
+
+import torch
+import time
+import numpy as np
+
+def main(args: argparse.Namespace) -> None:
+    print("Loading model...")
+    with torch.no_grad():
+        with torch.autocast(device_type="cuda", dtype=torch.float16):
+            sam = sam_model_registry[args.model_type](checkpoint=args.checkpoint)
+            #sam = torch.compile(sam,  mode="reduce-overhead", fullgraph=True)
+            sam = torch.compile(sam, mode="max-autotune")
+
+            _ = sam.to(device=args.device)
+            output_mode = "coco_rle" if args.convert_to_rle else "binary_mask"
+            amg_kwargs = get_amg_kwargs(args)
+            
+            sam.image_encoder = torch.compile(sam.image_encoder, mode="default", backend="inductor")
+            sam.mask_decoder  = torch.compile(sam.mask_decoder,  mode="default", backend="inductor")
+
+            generator = SamAutomaticMaskGenerator(
+                    sam, 
+                    output_mode=output_mode,
+                    points_per_side=32,     # 减少点数
+                    crop_n_layers=0,        # 不做多层裁剪
+                    crop_overlap_ratio=0.0, # 关闭重叠
+                    **amg_kwargs
+            )
+            
+            if not os.path.isdir(args.input):
+                targets = [args.input]
+            else:
+                targets = [
+                    f for f in os.listdir(args.input) if not os.path.isdir(os.path.join(args.input, f))
+                ]
+                targets = [os.path.join(args.input, f) for f in targets]
+
+            os.makedirs(args.output, exist_ok=True)
+            inference_times = []
+            for t in targets:
+                print(f"Processing '{t}'...")
+                image = cv2.imread(t)
+                if image is None:
+                    print(f"Could not load '{t}' as an image, skipping...")
+                    continue
+                #image = cv2.resize(image, (1024, 1024))
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+                # -------------------------
+                # warmup (不计时)
+                # -------------------------
+                for _ in range(2):  # 可以设置 2~3 次
+                    with torch.autocast(device_type="cuda", dtype=torch.float16):
+                        _ = generator.generate(image)
+                    torch.cuda.synchronize()
+
+                # -------------------------
+                # 正式计时
+                # -------------------------
+                start_time = time.time()
+                #with torch.cuda.amp.autocast():
+                with torch.autocast(device_type="cuda", dtype=torch.float16):
+                    masks = generator.generate(image)
+                torch.cuda.synchronize()
+                inference_time = time.time() - start_time
+                inference_times.append(inference_time)
+                base = os.path.basename(t)
+                base = os.path.splitext(base)[0]
+                save_base = os.path.join(args.output, base)
+                if output_mode == "binary_mask":
+                    os.makedirs(save_base, exist_ok=False)
+                    write_masks_to_folder(masks, save_base)
+                else:
+                    save_file = save_base + ".json"
+                    with open(save_file, "w") as f:
+                        json.dump(masks, f)
+    print("Done!")
+    print(f"inference_images:{len(inference_times)}")
+    print(f"Average inference time:{np.mean(inference_times):.2f}")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file