facenet use_flipped_images

b2293819 · sunzhq2 · a4a4ae0f · b2293819 · b2293819 · b2293819
Commit b2293819 authored Jan 23, 2026 by sunzhq2
6 changed files
--- a/facenet/facenet/inference.sh
+++ b/facenet/facenet/inference.sh
 export HIP_VISIBLE_DEVICES=0
-nohup numactl -N 0 -m 0 python3 src/migraphx_infer.py 2>&1 | tee result_0.log &
+nohup numactl -N 0 -m 0 python3 src/migraphx_infer.py --use_flipped_images 2>&1 | tee result_0.log &
 export HIP_VISIBLE_DEVICES=1
-nohup numactl -N 1 -m 1 python3 src/migraphx_infer.py 2>&1 | tee result_1.log &
+nohup numactl -N 1 -m 1 python3 src/migraphx_infer.py --use_flipped_images 2>&1 | tee result_1.log &
 export HIP_VISIBLE_DEVICES=2
-nohup numactl -N 2 -m 2 python3 src/migraphx_infer.py 2>&1 | tee result_2.log &
+nohup numactl -N 2 -m 2 python3 src/migraphx_infer.py --use_flipped_images 2>&1 | tee result_2.log &
 export HIP_VISIBLE_DEVICES=3
-nohup numactl -N 3 -m 3 python3 src/migraphx_infer.py 2>&1 | tee result_3.log &
+nohup numactl -N 3 -m 3 python3 src/migraphx_infer.py --use_flipped_images 2>&1 | tee result_3.log &
 # python3 src/validate_on_lfw.py /datasets/lfw_mtcnnpy_160 models_m/$1 \

--- a/facenet/facenet/post.sh
+++ b/facenet/facenet/post.sh
-#export HIP_VISIBLE_DEVICES=$1
+# export HIP_VISIBLE_DEVICES=$1
-for i in {0..4}
+for i in {0..3}
 do
 python3 facenet_post.py /datasets/lfw_mtcnnpy_160 results/${i} results/${i} \
 --distance_metric 1 \

--- a/facenet/facenet/src/migraphx_infer.bak.py
+++ b/facenet/facenet/src/migraphx_infer.bak.py
+import os
+import numpy as np
+from PIL import Image  # 推荐用于读取和调整图像
+import onnxruntime as ort
+import argparse
+import lfw  # 假设你有这个模块来读取 pairs 和 paths
+import sys
+# Import metrics and interpolation functions for AUC/EER calculation
+from sklearn import metrics
+from scipy.optimize import brentq
+from scipy import interpolate
+import time
+from tqdm import tqdm
+from sklearn.preprocessing import LabelEncoder
+import migraphx
+gpuid = os.getenv('HIP_VISIBLE_DEVICES')
+resultdir = os.path.join('results', gpuid)
+os.makedirs(resultdir, exist_ok=True)
+def AllocateOutputMemory(model):
+    outputData={}
+    for key in model.get_outputs().keys():
+        outputData[key] = migraphx.allocate_gpu(s=model.get_outputs()[key])
+    return outputData
+def preprocess_image(image_path, target_size=(160, 160)):
+    img = Image.open(image_path).convert('RGB') # 确保是 RGB 三通道
+    img = img.resize(target_size, Image.Resampling.BILINEAR) # 或 Image.LANCZOS
+    img_np = np.array(img, dtype=np.float32) 
+    img_np = (img_np - 127.5) / 128.0
+    return img_np
+def load_lfw_for_onnx(lfw_dir, pairs_file, batch_size, image_size=(160, 160)):
+    pairs = lfw.read_pairs(os.path.expanduser(pairs_file))
+    paths, actual_issame = lfw.get_paths(os.path.expanduser(lfw_dir), pairs)
+    nrof_pairs = len(actual_issame)
+    all_processed_images = []
+    all_labels = []
+    all_actual_issame_full = []
+    for i in range(nrof_pairs):
+        path0 = paths[i*2]
+        path1 = paths[i*2+1]
+        actual_same = actual_issame[i]
+        # Process each image in the pair
+        for img_path in [path0, path1]:
+            label = os.path.basename(os.path.dirname(img_path))
+            # Original image
+            processed_img = preprocess_image(
+                img_path, 
+                target_size=image_size, 
+            )
+            processed_img = np.transpose(processed_img, (2, 0, 1))
+            processed_img = np.ascontiguousarray(processed_img)
+            all_processed_images.append(processed_img)
+            all_labels.append(label)
+            all_actual_issame_full.append(actual_same) 
+    # Encode labels to integers
+    le = LabelEncoder()
+    all_labels_encoded = le.fit_transform(all_labels).astype(np.int32)
+    num_batches = len(all_processed_images) // batch_size
+    if len(all_processed_images) % batch_size != 0:
+        print(f"Warning: Number of images ({len(all_processed_images)}) is not evenly divisible by batch size ({batch_size}). Last batch will be smaller.")
+        num_batches += 1
+    for i in range(num_batches):
+        start_idx = i * batch_size
+        end_idx = min(start_idx + batch_size, len(all_processed_images))
+        batch_images = all_processed_images[start_idx:end_idx]
+        # batch_labels = all_labels[start_idx:end_idx]
+        batch_labels = all_labels_encoded[start_idx:end_idx]
+        batch_actual_issame_part = all_actual_issame_full[start_idx:end_idx]
+        batch_array = np.stack(batch_images, axis=0)
+        batch_array = batch_array.astype(np.float32)
+        yield batch_array, batch_labels, batch_actual_issame_part
+def main_onnx(args):
+    embedding_size = 512
+    pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
+    paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs)
+    nrof_pairs = len(actual_issame)
+    nrof_images = nrof_pairs * 2
+    print(f"Number of pairs: {nrof_pairs}, Number of images: {nrof_images}, Embedding size: {embedding_size}")
+    data_generator = load_lfw_for_onnx(
+        args.lfw_dir, 
+        args.lfw_pairs, 
+        args.lfw_batch_size, 
+        image_size=(args.image_size, args.image_size),
+    )
+    total_images_calculated = len(paths)
+    num_batches_calculated = total_images_calculated // args.lfw_batch_size
+    if total_images_calculated % args.lfw_batch_size != 0:
+        num_batches_calculated += 1
+    all_embeddings = np.zeros((nrof_images, embedding_size), dtype=np.float32)
+    current_image_index = 0
+    model = migraphx.load(args.model_path)
+    inputName=list(model.get_inputs().keys())[0]
+    modelData=AllocateOutputMemory(model)
+    # warm up
+    modelData[inputName] = migraphx.to_gpu(migraphx.argument(np.ones([64,3,160,160]).astype(np.float32)))
+    model.run(modelData)
+    infer_times = []
+    total_infer_times = []
+    total_start = time.time()
+    for i, (batch_images, batch_label, _) in enumerate(tqdm(data_generator, total=num_batches_calculated, desc="Processing Batches")):
+        original_batch_size = batch_images.shape[0]
+        # import pdb;pdb.set_trace()
+        if original_batch_size < 64:
+            pad_size = 64 - original_batch_size
+            padding_images = np.repeat(batch_images[-1:], pad_size, axis=0)
+            batch_images = np.concatenate((batch_images, padding_images), axis=0)
+        modelData[inputName] = migraphx.to_gpu(migraphx.argument(batch_images))
+        start = time.time()
+        embeddings_dcu = model.run(modelData)
+        infer_time_taken = time.time() - start
+        embeddings = np.array(migraphx.from_gpu(embeddings_dcu[0]))
+        infer_times.append(infer_time_taken)
+        if original_batch_size != 64:
+            embeddings = embeddings[:original_batch_size]
+            print(f"**********************************embeddings: {embeddings.shape}")
+        batch_size_current = embeddings.shape[0]
+        all_embeddings[current_image_index:current_image_index + batch_size_current] = embeddings
+        current_image_index += batch_size_current # Move index forward
+        embeddings.tofile(os.path.join(f'{resultdir}', '{}_0.bin'.format(str(i).zfill(6))))
+        batch_label.tofile(os.path.join(f'{resultdir}', '{}.bin'.format(str(i).zfill(6))))
+        if i % 10 == 9:
+            print('.', end='')
+            sys.stdout.flush()
+        total_infer_times.append(time.time() - total_start)
+        total_start = time.time() # Reset timer for next batch's data loading + inference
+    print("\nAll batches processed.")
+    nrof_embeddings = nrof_pairs * 2 
+    final_embeddings = np.zeros((nrof_embeddings, embedding_size), dtype=np.float32)  
+    final_embeddings = all_embeddings 
+    distance_metric = 1 # Euclidean
+    subtract_mean = True
+    nrof_folds = 10    
+    tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(
+        final_embeddings, 
+        actual_issame, 
+        nrof_folds=nrof_folds, 
+        distance_metric=distance_metric, 
+        subtract_mean=subtract_mean
+    )
+    print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy)))
+    print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
+    auc = metrics.auc(fpr, tpr)
+    print('Area Under Curve (AUC): %1.3f' % auc)
+    eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)
+    print('Equal Error Rate (EER): %1.3f' % eer)
+    print("***************************")
+    infer_time = sum(infer_times)
+    avg_infer_fps = 64 * len(infer_times) / sum(infer_times)
+    print(f"total_infer_time: {infer_time}s")
+    print(f'avg_infer_fps: {avg_infer_fps}samples/s')
+    load_data_infer_time = sum(total_infer_times)
+    load_data_avg_infer_fps = len(total_infer_times) * 64 / sum(total_infer_times)
+    print(f'load_data_total_infer_time: {load_data_infer_time}s')
+    print(f'load_data_avg_total_Infer_fps: {load_data_avg_infer_fps} samples/s')
+    print("******************************")
+def parse_arguments_onnx():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--lfw_dir', type=str, default="/datasets/lfw_mtcnnpy_160", help='Path to the data directory containing aligned LFW face patches.')
+    parser.add_argument('--lfw_batch_size', type=int, help='Number of images to process in a batch in the LFW test set.', default=64) # Changed default to common ONNX batch size
+    parser.add_argument('--model_path', type=str, default="/home/sunzhq/workspace/yidong-infer/facenet/facenet/tools/onnx-models/facenet_static_bs64.mxr", help='Path to the ONNX model file.')
+    parser.add_argument('--image_size', type=int, help='Image size (height, width) in pixels.', default=160)
+    parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='data/pairs.txt')
+    return parser.parse_args()
+if __name__ == '__main__':
+    args = parse_arguments_onnx()
+    main_onnx(args)
--- a/facenet/facenet/src/migraphx_infer.py
+++ b/facenet/facenet/src/migraphx_infer.py
 import os
 import numpy as np
-from PIL import Image  # 推荐用于读取和调整图像
+from PIL import Image
-import onnxruntime as ort
+# import onnxruntime as ort
 import argparse
-import lfw  # 假设你有这个模块来读取 pairs 和 paths
+import lfw 
 import sys
-# Import metrics and interpolation functions for AUC/EER calculation
 from sklearn import metrics
 from scipy.optimize import brentq
 from scipy import interpolate
 import time
 from tqdm import tqdm
+from sklearn.preprocessing import LabelEncoder
 import migraphx
+gpuid = os.getenv('HIP_VISIBLE_DEVICES')
+resultdir = os.path.join('results', gpuid)
+os.makedirs(resultdir, exist_ok=True)
 def AllocateOutputMemory(model):
    outputData={}
    for key in model.get_outputs().keys():
        outputData[key] = migraphx.allocate_gpu(s=model.get_outputs()[key])
    return outputData
-def preprocess_image(image_path, target_size=(160, 160)):
+def preprocess_image(image_path, target_size=(160, 160), flip=False):
-    img = Image.open(image_path).convert('RGB') # 确保是 RGB 三通道
+    img = Image.open(image_path).convert('RGB')
-    img = img.resize(target_size, Image.Resampling.BILINEAR) # 或 Image.LANCZOS
+    img = img.resize(target_size, Image.Resampling.BILINEAR)
    img_np = np.array(img, dtype=np.float32) 
+    if flip:
+        img_np = np.fliplr(img_np)
    img_np = (img_np - 127.5) / 128.0
    return img_np
-def load_lfw_for_onnx(lfw_dir, pairs_file, batch_size, image_size=(160, 160)):
+def load_lfw_for_onnx(lfw_dir, pairs_file, batch_size, image_size=(160, 160), use_flipped_images=False):
    pairs = lfw.read_pairs(os.path.expanduser(pairs_file))
    paths, actual_issame = lfw.get_paths(os.path.expanduser(lfw_dir), pairs)
@@ -38,6 +45,8 @@ def load_lfw_for_onnx(lfw_dir, pairs_file, batch_size, image_size=(160, 160)):
    all_labels = []
    all_actual_issame_full = []
+    current_idx = 0
    for i in range(nrof_pairs):
        path0 = paths[i*2]
        path1 = paths[i*2+1]
@@ -45,18 +54,35 @@ def load_lfw_for_onnx(lfw_dir, pairs_file, batch_size, image_size=(160, 160)):
        # Process each image in the pair
        for img_path in [path0, path1]:
-            label = os.path.basename(os.path.dirname(img_path))
-            # Original image
            processed_img = preprocess_image(
                img_path, 
                target_size=image_size, 
+                flip=False
            )
            processed_img = np.transpose(processed_img, (2, 0, 1))
            processed_img = np.ascontiguousarray(processed_img)
            all_processed_images.append(processed_img)
-            all_labels.append(label)
+            all_labels.append(current_idx)  # 使用递增的索引作为标签
            all_actual_issame_full.append(actual_same) 
+            current_idx += 1
+            if use_flipped_images:
+                processed_img_flipped = preprocess_image(
+                    img_path, 
+                    target_size=image_size, 
+                    flip=True
+                )
+                processed_img_flipped = np.transpose(processed_img_flipped, (2, 0, 1))
+                processed_img_flipped = np.ascontiguousarray(processed_img_flipped)
+                all_processed_images.append(processed_img_flipped)
+                all_labels.append(current_idx)  # 使用递增的索引作为标签
+                all_actual_issame_full.append(actual_same) 
+                current_idx += 1
+    # 转换为numpy数组
+    all_labels = np.array(all_labels, dtype=np.int32)
    num_batches = len(all_processed_images) // batch_size
    if len(all_processed_images) % batch_size != 0:
@@ -76,14 +102,16 @@ def load_lfw_for_onnx(lfw_dir, pairs_file, batch_size, image_size=(160, 160)):
        yield batch_array, batch_labels, batch_actual_issame_part
 def main_onnx(args):
    embedding_size = 512
    pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
    paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs)
    nrof_pairs = len(actual_issame)
-    nrof_images = nrof_pairs * 2
+    if args.use_flipped_images:
+        nrof_images = nrof_pairs * 4  # 每对图像有4张：A原图、A翻转、B原图、B翻转
+    else:
+        nrof_images = nrof_pairs * 2  # 每对图像有2张：A原图、B原图
    print(f"Number of pairs: {nrof_pairs}, Number of images: {nrof_images}, Embedding size: {embedding_size}")
@@ -93,9 +121,10 @@ def main_onnx(args):
        args.lfw_pairs, 
        args.lfw_batch_size, 
        image_size=(args.image_size, args.image_size),
+        use_flipped_images=args.use_flipped_images
    )
-    total_images_calculated = len(paths)
+    total_images_calculated = nrof_images
    num_batches_calculated = total_images_calculated // args.lfw_batch_size
    if total_images_calculated % args.lfw_batch_size != 0:
        num_batches_calculated += 1
@@ -113,9 +142,8 @@ def main_onnx(args):
    infer_times = []
    total_infer_times = []
    total_start = time.time()
-    for i, (batch_images, _, _) in enumerate(tqdm(data_generator, total=num_batches_calculated, desc="Processing Batches")):
+    for i, (batch_images, batch_label, _) in enumerate(tqdm(data_generator, total=num_batches_calculated, desc="Processing Batches")):
        original_batch_size = batch_images.shape[0]
        if original_batch_size < 64:
            pad_size = 64 - original_batch_size
            padding_images = np.repeat(batch_images[-1:], pad_size, axis=0)
@@ -124,28 +152,44 @@ def main_onnx(args):
        start = time.time()
        embeddings_dcu = model.run(modelData)
-        embeddings_1 = np.array(migraphx.from_gpu(embeddings_dcu[0]))
        infer_time_taken = time.time() - start
+        embeddings = np.array(migraphx.from_gpu(embeddings_dcu[0]))
        infer_times.append(infer_time_taken)
-        if original_batch_size == 64:
+        if original_batch_size != 64:
-            embeddings = embeddings_1
+            embeddings = embeddings[:original_batch_size]
-        else:
+            # print(f"**********************************embeddings: {embeddings.shape}")
-            embeddings = embeddings_1[:original_batch_size]
        batch_size_current = embeddings.shape[0]
        all_embeddings[current_image_index:current_image_index + batch_size_current] = embeddings
-        current_image_index += batch_size_current # Move index forward
+        current_image_index += batch_size_current
+        embeddings.tofile(os.path.join(f'{resultdir}', '{}_0.bin'.format(str(i).zfill(6))))
+        batch_label.tofile(os.path.join(f'{resultdir}', '{}.bin'.format(str(i).zfill(6))))
        if i % 10 == 9:
            print('.', end='')
            sys.stdout.flush()
        total_infer_times.append(time.time() - total_start)
-        total_start = time.time() # Reset timer for next batch's data loading + inference
+        total_start = time.time()
    print("\nAll batches processed.")
-    nrof_embeddings = nrof_pairs * 2 
+    # 验证嵌入向量数量是否正确
-    final_embeddings = np.zeros((nrof_embeddings, embedding_size), dtype=np.float32)  
+    print(f"Total embeddings collected: {current_image_index}")
-    final_embeddings = all_embeddings 
+    print(f"Expected embeddings: {nrof_images}")
+    if current_image_index != nrof_images:
+        print(f"Warning: Expected {nrof_images} embeddings but collected {current_image_index}")
+    if args.use_flipped_images:
+        # 使用翻转图像时，每张图像有两个嵌入向量（原图和翻转图）,将它们合并成一个增强的嵌入向量
+        nrof_original_images = nrof_pairs * 2
+        final_embeddings = np.zeros((nrof_original_images, embedding_size * 2), dtype=np.float32)
+        # 将原图和翻转图的嵌入向量拼接起来
+        for i in range(nrof_original_images):
+            final_embeddings[i, :embedding_size] = all_embeddings[i*2]
+            final_embeddings[i, embedding_size:] = all_embeddings[i*2+1]
+    else:
+        final_embeddings = all_embeddings
    distance_metric = 1 # Euclidean
    subtract_mean = True
@@ -165,9 +209,14 @@ def main_onnx(args):
    print('Area Under Curve (AUC): %1.3f' % auc)
    eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)
    print('Equal Error Rate (EER): %1.3f' % eer)
+    # 额外打印使用翻转图像的配置信息
+    if args.use_flipped_images:
+        print(f"Configuration: Using flipped images (original + flipped concatenated)")
+        print(f"Embedding dimension: {embedding_size*2}")
    print("***************************")
    infer_time = sum(infer_times)
    avg_infer_fps = 64 * len(infer_times) / sum(infer_times)
    print(f"total_infer_time: {infer_time}s")
@@ -178,7 +227,6 @@ def main_onnx(args):
    print(f'load_data_avg_total_Infer_fps: {load_data_avg_infer_fps} samples/s')
    print("******************************")
 def parse_arguments_onnx():
    parser = argparse.ArgumentParser()
    parser.add_argument('--lfw_dir', type=str, default="/datasets/lfw_mtcnnpy_160", help='Path to the data directory containing aligned LFW face patches.')
@@ -186,8 +234,9 @@ def parse_arguments_onnx():
    parser.add_argument('--model_path', type=str, default="/home/sunzhq/workspace/yidong-infer/facenet/facenet/tools/onnx-models/facenet_static_bs64.mxr", help='Path to the ONNX model file.')
    parser.add_argument('--image_size', type=int, help='Image size (height, width) in pixels.', default=160)
    parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='data/pairs.txt')
+    parser.add_argument('--use_flipped_images', action='store_true', help='Use flipped images for evaluation (original + flipped concatenated).')
    return parser.parse_args()
 if __name__ == '__main__':
    args = parse_arguments_onnx()
    main_onnx(args)
\ No newline at end of file
--- a/facenet/facenet/src/migraphx_infer_with_filp.py
+++ b/facenet/facenet/src/migraphx_infer_with_filp.py
-import os
-import numpy as np
-from PIL import Image
-import argparse
-import lfw
-import sys
-from sklearn import metrics
-from scipy.optimize import brentq
-from scipy import interpolate
-import time
-from tqdm import tqdm
-import migraphx
-def AllocateOutputMemory(model):
-    outputData={}
-    for key in model.get_outputs().keys():
-        outputData[key] = migraphx.allocate_gpu(s=model.get_outputs()[key])
-    return outputData
-def evaluate_embeddings_with_different_methods(embeddings, actual_issame, use_flipped_images, embedding_size):
-    """用不同方法评估嵌入向量"""
-    results = {}
-    nrof_pairs = len(actual_issame)
-    # 方法0: 原始方法（不使用翻转）
-    if not use_flipped_images:
-        tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(
-            embeddings,
-            actual_issame,
-            nrof_folds=10,
-            distance_metric=1,
-            subtract_mean=True
-        )
-        results["original"] = {
-            "accuracy": np.mean(accuracy),
-            "std": np.std(accuracy),
-            "auc": metrics.auc(fpr, tpr)
-        }
-    # 方法1: 原始TF的拼接方式
-    elif embeddings.shape[0] == nrof_pairs * 4:  # 有翻转图像
-        # 方法1A: 原始 + 翻转
-        final_embeddings = np.zeros((nrof_pairs * 2, embedding_size * 2))
-        final_embeddings[:, :embedding_size] = embeddings[0::2]
-        final_embeddings[:, embedding_size:] = embeddings[1::2]
-        tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(
-            final_embeddings,
-            actual_issame,
-            nrof_folds=10,
-            distance_metric=1,
-            subtract_mean=True
-        )
-        results["original+flipped"] = {
-            "accuracy": np.mean(accuracy),
-            "std": np.std(accuracy),
-            "auc": metrics.auc(fpr, tpr)
-        }
-    return results
-def main_optimized(args):
-    """优化后的主函数"""
-    # 加载模型
-    model = migraphx.load(args.migraphx_model_path)
-    input_name = list(model.get_inputs().keys())[0]
-    modelData=AllocateOutputMemory(model)
-    embedding_size=512
-    print("="*70)
-    # 加载数据
-    pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
-    paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs)
-    nrof_pairs = len(actual_issame)
-    # 准备所有图像路径和翻转标志
-    all_image_paths = []
-    flip_flags = []
-    print("\nPreparing image paths...")
-    for i in tqdm(range(nrof_pairs), desc="Organizing pairs"):
-        path0 = paths[i*2]
-        path1 = paths[i*2+1]
-        # 第一张图像
-        all_image_paths.append(path0)
-        flip_flags.append(False)        
-        if args.use_flipped_images:
-            all_image_paths.append(path0)
-            flip_flags.append(True)
-        # 第二张图像
-        all_image_paths.append(path1)
-        flip_flags.append(False)        
-        if args.use_flipped_images:
-            all_image_paths.append(path1)
-            flip_flags.append(True)
-    nrof_images = len(all_image_paths)
-    print(f"Total images to process: {nrof_images}")
-    # 预分配嵌入向量存储
-    all_embeddings = np.zeros((nrof_images, embedding_size), dtype=np.float32)
-    # 推理
-    print("\nRunning inference...")
-    infer_times = []
-    for start_idx in tqdm(range(0, nrof_images, args.lfw_batch_size), desc="Processing"):
-        end_idx = min(start_idx + args.lfw_batch_size, nrof_images)
-        batch_paths = all_image_paths[start_idx:end_idx]
-        batch_flip_flags = flip_flags[start_idx:end_idx]
-        # 预处理批次
-        batch_images = []
-        for img_path, flip_flag in zip(batch_paths, batch_flip_flags):
-            # 使用PIL读取和预处理
-            img = Image.open(img_path).convert('RGB')
-            img = img.resize((args.image_size, args.image_size), Image.Resampling.BILINEAR)
-            img_np = np.array(img, dtype=np.float32)
-            if flip_flag:
-                img_np = np.fliplr(img_np)
-            # FaceNet标准化
-            img_np = (img_np - 127.5) / 128.0
-            # CHW格式
-            img_np = np.transpose(img_np, (2, 0, 1))
-            batch_images.append(img_np)
-        batch_array = np.stack(batch_images, axis=0).astype(np.float32)
-        # 为MIGraphX准备批次
-        if batch_array.shape[0] < 64:
-            pad_size = 64 - batch_array.shape[0]
-            padding = np.repeat(batch_array[-1:], pad_size, axis=0)
-            batch_for_infer = np.concatenate([batch_array, padding], axis=0)
-        else:
-            batch_for_infer = batch_array
-        # 转换为MIGraphX参数
-        batch_for_infer = np.ascontiguousarray(batch_for_infer)
-        # mgx_arg = migraphx.to_gpu(migraphx.argument(batch_for_infer))
-        # model_data = {input_name: mgx_arg}
-        modelData[input_name] = migraphx.to_gpu(migraphx.argument(batch_for_infer))
-        # 推理
-        infer_start = time.time()
-        output = model.run(modelData)
-        infer_time = time.time() - infer_start
-        infer_times.append(infer_time)
-        # 提取嵌入向量
-        embeddings_np = np.array(migraphx.from_gpu(output[0]))
-        if batch_array.shape[0] < 64:
-            embeddings_np = embeddings_np[:batch_array.shape[0]]
-        all_embeddings[start_idx:end_idx] = embeddings_np
-    print("\n" + "="*70)
-    print("EVALUATION RESULTS")
-    print("="*70)
-    # 使用不同方法评估
-    results = evaluate_embeddings_with_different_methods(
-        all_embeddings, 
-        actual_issame, 
-        args.use_flipped_images, 
-        embedding_size
-    )
-    # 打印结果
-    print("\nComparison of different methods:")
-    print("-"*70)
-    for method_name, result in results.items():
-        print(f"{method_name:20} | Accuracy: {result['accuracy']:.5f} ± {result['std']:.5f} | AUC: {result['auc']:.3f}")
-    # 性能统计
-    if infer_times:
-        total_infer_time = sum(infer_times)
-        avg_fps = nrof_images / total_infer_time
-        print("\n" + "="*70)
-        print("PERFORMANCE STATISTICS")
-        print("-"*70)
-        print(f"Total inference time: {total_infer_time:.3f}s")
-        print(f"Average FPS: {avg_fps:.1f} images/s")
-        print(f"Number of images: {nrof_images}")
-        if args.use_flipped_images:
-            print(f"  (Note: {nrof_pairs * 2} original images + their flips)")
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--lfw_dir', type=str, default="/datasets/lfw_mtcnnpy_160")
-    parser.add_argument('--lfw_batch_size', type=int, default=64)
-    parser.add_argument('--migraphx_model_path', type=str, 
-                       default="/home/sunzhq/workspace/yidong-infer/facenet/facenet/tools/onnx-models/facenet_static_bs64_fp32.mxr")
-    parser.add_argument('--image_size', type=int, default=160)
-    parser.add_argument('--lfw_pairs', type=str, default='data/pairs.txt')
-    parser.add_argument('--use_flipped_images', action='store_true')
-    parser.add_argument('--use_fixed_image_standardization', action='store_true')
-    args = parser.parse_args()
-    main_optimized(args)
\ No newline at end of file
--- a/facenet/facenet/tools/check_pd.py
+++ b/facenet/facenet/tools/check_pd.py
+import tensorflow as tf
+from tensorflow.python.platform import gfile
+pb_path = '/home/sunzhq/workspace/yidong-infer/facenet/facenet/models_m/facenet-tmp/20180408-102900.pb'
+with tf.io.gfile.GFile(pb_path, 'rb') as f:
+    graph_def = tf.compat.v1.GraphDef()
+    graph_def.ParseFromString(f.read())
+with tf.Graph().as_default() as graph:
+    tf.import_graph_def(graph_def, name='')
+ops = graph.get_operations()
+# --- 查找 Identity 操作，特别关注可能代表最终输出的 ---
+print("--- Identity Nodes (Potential Model Outputs) ---")
+identity_ops = [op for op in ops if op.type == "Identity"]
+# 存储映射到最终输出的 Identity 信息
+final_outputs = []
+for op in identity_ops:
+    input_to_identity = op.inputs[0]
+    producer_op = input_to_identity.op
+    # 尝试判断是否是模型的主要输出（例如，名称包含 embedding，或者 producer op 是常见的输出层类型）
+    # In Facenet, the final layer before normalization might be called 'embeddings' or similar.
+    # Common final ops could be MatMul, Add, BatchMatMul, etc., followed by normalization like L2Norm or similar.
+    # Let's look for the most likely candidate based on name patterns common in Facenet models.
+    # Look for a name pattern often used for the final embedding vector
+    if ('embedding' in input_to_identity.name.lower() or 
+        'bottleneck' in input_to_identity.name.lower() or
+        producer_op.type in ['MatMul', 'Add', 'BatchMatMulV2']): # Common final computation types
+        final_outputs.append({
+            'identity_name': op.name,
+            'final_output_name': input_to_identity.name,
+            'producer_name': producer_op.name,
+            'type': producer_op.type,
+            'shape': input_to_identity.shape,
+            'dtype': input_to_identity.dtype
+        })
+# Print only the potentially relevant final outputs
+if final_outputs:
+    print("Potential Model Output Candidates:")
+    for out_info in final_outputs:
+        print(f"  Identity Op: {out_info['identity_name']}")
+        print(f"    Maps Output From: {out_info['producer_name']} (Type: {out_info['type']})")
+        print(f"    Final Output Name: {out_info['final_output_name']}")
+        print(f"    Shape: {out_info['shape']}, Dtype: {out_info['dtype']}")
+        print("  ---")
+else:
+    print("No obvious final output Identity nodes found matching common patterns.")
+# --- Alternative: Check for common output layer names directly (if not mapped by Identity) ---
+# This is less likely if Identity is used, but worth a quick check
+print("\n--- Checking for Common Embedding Layer Names ---")
+potential_embedding_names = [
+    'embeddings', 'Bottleneck/BatchNorm/Reshape_1', 'prelogits', 'InceptionResnetV1/Bottleneck/Identity'
+    # Add more potential names based on the specific model architecture if needed
+]
+# Add a suffix like :0 if necessary, TensorFlow usually adds this implicitly for the first output
+potential_embedding_names_with_suffix = [name + ':0' for name in potential_embedding_names]
+found_embedding_directly = False
+for op in ops:
+    if op.name in potential_embedding_names or op.name + ':0' in potential_embedding_names_with_suffix:
+        print(f"Found potential embedding node directly: {op.name}")
+        for output in op.outputs:
+             print(f"  Output: {output.name}, Shape: {output.shape}, Dtype: {output.dtype}")
+        found_embedding_directly = True
+        break # Assume the first match is the one
+if not found_embedding_directly:
+    print("  No direct match found for common embedding names.")
\ No newline at end of file