evaluate_ocrbench_v2.py

import argparse
import json
import subprocess
import nltk
nltk.download("wordnet")

from .evaluate_mmmu import get_input_output_paths


def convert_to_ocrbench_v2_format(input_path, groundtruth_path):
    """Convert input files to OCRBenchV2 compatible format."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, "OCRBench_v2")

    output = []

    with open(groundtruth_path) as f:
        gt = json.load(f)

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)

                out = gt[res["sample_id"]]
                out["predict"] = res["predict"]

                output.append(out)

    output = sorted(output, key=lambda x: x["id"])

    with open(output_file_path, "w") as output_file:
        json.dump(output, output_file)

    return output_file_path


def ocrbench_v2_eval(input_path, groundtruth_path, output_path):
    """Run OCRBenchV2 evaluation."""
    result_file = convert_to_ocrbench_v2_format(input_path, groundtruth_path)

    # The OCRBenchV2 repo has scripts for running the actual evaluation
    output = subprocess.run(
        [
            "python",
            "examples/multimodal/MultimodalOCR/OCRBench_v2/eval_scripts/eval.py",
            "--output_path",
            output_path,
            "--input_path",
            result_file,
        ],
        capture_output=True,
        text=True,
    )
    print(output.stderr)
    print(output.stdout)

    output = subprocess.run(
        [
            "python",
            "examples/multimodal/MultimodalOCR/OCRBench_v2/eval_scripts/get_score.py",
            "--json_file",
            output_path,
        ],
        capture_output=True,
        text=True,
    )
    print(output.stderr)
    print(output.stdout)


def main():
    """Run OCRBenchV2 evaluation."""

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-path", type=str, required=True, help="Path to input file(s)")
    parser.add_argument(
        "--groundtruth-path",
        type=str,
        required=True,
        help="Path to groundtruth file",
    )
    parser.add_argument(
        "--output-path",
        type=str,
        required=True,
        help="Path to dump outputs from the OCRBench V2 eval script",
    )
    args = parser.parse_args()

    ocrbench_v2_eval(args.input_path, args.groundtruth_path, args.output_path)


if __name__ == "__main__":
    main()