eval_demo.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


import dataclasses
import os
from typing import Any, cast, Dict, List, Optional, Tuple

import lpips
import torch
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
from pytorch3d.implicitron.dataset.json_index_dataset_map_provider import (
    CO3D_CATEGORIES,
)
from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import (
    aggregate_nvs_results,
    eval_batch,
    pretty_print_nvs_metrics,
    summarize_nvs_eval_results,
)
from pytorch3d.implicitron.models.model_dbir import ModelDBIR
from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
from tqdm import tqdm


def main() -> None:
    """
    Evaluates new view synthesis metrics of a simple depth-based image rendering
    (DBIR) model for multisequence/singlesequence tasks for several categories.

    The evaluation is conducted on the same data as in [1] and, hence, the results
    are directly comparable to the numbers reported in [1].

    References:
        [1] J. Reizenstein, R. Shapovalov, P. Henzler, L. Sbordone,
                P. Labatut, D. Novotny:
            Common Objects in 3D: Large-Scale Learning
                and Evaluation of Real-life 3D Category Reconstruction
    """

    task_results = {}
    for task in (Task.SINGLE_SEQUENCE, Task.MULTI_SEQUENCE):
        task_results[task] = []
        for category in CO3D_CATEGORIES[: (20 if task == Task.SINGLE_SEQUENCE else 10)]:
            for single_sequence_id in (
                (0, 1) if task == Task.SINGLE_SEQUENCE else (None,)
            ):
                category_result = evaluate_dbir_for_category(
                    category, task=task, single_sequence_id=single_sequence_id
                )
                print("")
                print(
                    f"Results for task={task}; category={category};"
                    + (
                        f" sequence={single_sequence_id}:"
                        if single_sequence_id is not None
                        else ":"
                    )
                )
                pretty_print_nvs_metrics(category_result)
                print("")

                task_results[task].append(category_result)
            _print_aggregate_results(task, task_results)

    for task in task_results:
        _print_aggregate_results(task, task_results)


def evaluate_dbir_for_category(
    category: str,
    task: Task,
    bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0),
    single_sequence_id: Optional[int] = None,
    num_workers: int = 16,
):
    """
    Evaluates new view synthesis metrics of a simple depth-based image rendering
    (DBIR) model for a given task, category, and sequence (in case task=='singlesequence').

    Args:
        category: Object category.
        bg_color: Background color of the renders.
        task: Evaluation task. Either singlesequence or multisequence.
        single_sequence_id: The ID of the evaluiation sequence for the singlesequence task.
        num_workers: The number of workers for the employed dataloaders.
        path_manager: (optional) Used for interpreting paths.

    Returns:
        category_result: A dictionary of quantitative metrics.
    """

    single_sequence_id = single_sequence_id if single_sequence_id is not None else -1

    torch.manual_seed(42)

    dataset_map_provider_args = {
        "category": category,
        "dataset_root": os.environ["CO3D_DATASET_ROOT"],
        "assert_single_seq": task == Task.SINGLE_SEQUENCE,
        "task_str": task.value,
        "test_on_train": False,
        "test_restrict_sequence_id": single_sequence_id,
        "dataset_JsonIndexDataset_args": {"load_point_clouds": True},
    }
    data_source = ImplicitronDataSource(
        dataset_map_provider_JsonIndexDatasetMapProvider_args=dataset_map_provider_args
    )

    datasets, dataloaders = data_source.get_datasets_and_dataloaders()

    test_dataset = datasets.test
    test_dataloader = dataloaders.test
    if test_dataset is None or test_dataloader is None:
        raise ValueError("must have a test dataset.")

    all_train_cameras = data_source.get_all_train_cameras()

    image_size = cast(JsonIndexDataset, test_dataset).image_width

    if image_size is None:
        raise ValueError("Image size should be set in the dataset")

    # init the simple DBIR model
    model = ModelDBIR(  # pyre-ignore[28]: c’tor implicitly overridden
        render_image_width=image_size,
        render_image_height=image_size,
        bg_color=bg_color,
        max_points=int(1e5),
    )
    model.cuda()

    # init the lpips model for eval
    lpips_model = lpips.LPIPS(net="vgg")
    lpips_model = lpips_model.cuda()

    per_batch_eval_results = []
    print("Evaluating DBIR model ...")
    for frame_data in tqdm(test_dataloader):
        frame_data = dataclass_to_cuda_(frame_data)
        preds = model(**dataclasses.asdict(frame_data))
        per_batch_eval_results.append(
            eval_batch(
                frame_data,
                preds["implicitron_render"],
                bg_color=bg_color,
                lpips_model=lpips_model,
                source_cameras=all_train_cameras,
            )
        )

    if task == Task.SINGLE_SEQUENCE:
        camera_difficulty_bin_breaks = 0.97, 0.98
    else:
        camera_difficulty_bin_breaks = 2.0 / 3, 5.0 / 6

    category_result_flat, category_result = summarize_nvs_eval_results(
        per_batch_eval_results, task, camera_difficulty_bin_breaks
    )

    return category_result["results"]


def _print_aggregate_results(
    task: Task, task_results: Dict[Task, List[List[Dict[str, Any]]]]
) -> None:
    """
    Prints the aggregate metrics for a given task.
    """
    aggregate_task_result = aggregate_nvs_results(task_results[task])
    print("")
    print(f"Aggregate results for task={task}:")
    pretty_print_nvs_metrics(aggregate_task_result)
    print("")


if __name__ == "__main__":
    main()