"scripts/convert_zero123_to_diffusers.py" did not exist on "a7ca03aa85f94574f06576d2155b3ec061fe8d63"
Unverified Commit 162267ca authored by Ponku's avatar Ponku Committed by GitHub
Browse files

Add FallingThings dataset (#6346)



* Added Falling Things datasets

* Renamed split to variant

* Update torchvision/datasets/_stereo_matching.py

Changed constant formatting
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>
parent 8120c594
......@@ -111,6 +111,7 @@ Stereo Matching
CarlaStereo
Kitti2012Stereo
Kitti2015Stereo
FallingThingsStereo
SceneFlowStereo
SintelStereo
InStereo2k
......
......@@ -2841,6 +2841,69 @@ class CarlaStereoTestCase(datasets_utils.ImageDatasetTestCase):
datasets_utils.shape_test_for_stereo(left, right, disparity)
class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.FallingThingsStereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(variant=("single", "mixed", "both"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
@staticmethod
def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]):
file = pathlib.Path(root) / name
image = np.ones((size[0], size[1]), dtype=np.uint8)
PIL.Image.fromarray(image).save(file)
@staticmethod
def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> None:
root = pathlib.Path(root) / scene_name
os.makedirs(root, exist_ok=True)
# jpg images
datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))
datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))
# single channel depth maps
FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1]))
FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1]))
# camera settings json. Minimal example for _read_disparity function testing
settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]}
with open(root / "_camera_settings.json", "w") as f:
json.dump(settings_json, f)
def inject_fake_data(self, tmpdir, config):
fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings"
os.makedirs(fallingthings_dir, exist_ok=True)
num_examples = {"single": 2, "mixed": 3, "both": 4}.get(config["variant"], 0)
variants = {
"single": ["single"],
"mixed": ["mixed"],
"both": ["single", "mixed"],
}.get(config["variant"], [])
for variant_name in variants:
variant_dir = pathlib.Path(fallingthings_dir) / variant_name
os.makedirs(variant_dir, exist_ok=True)
for i in range(num_examples):
self._make_scene_folder(
root=variant_dir,
scene_name=f"scene_{i:06d}",
size=(100, 200),
)
if config["variant"] == "both":
num_examples *= 2
return num_examples
def test_splits(self):
for variant_name in ["single", "mixed"]:
with self.create_dataset(variant=variant_name) as (dataset, _):
for left, right, disparity in dataset:
datasets_utils.shape_test_for_stereo(left, right, disparity)
def test_bad_input(self):
with pytest.raises(ValueError, match="Unknown value 'bad' for argument variant"):
with self.create_dataset(variant="bad"):
pass
class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.SceneFlowStereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
......
from ._optical_flow import FlyingChairs, FlyingThings3D, HD1K, KittiFlow, Sintel
from ._stereo_matching import CarlaStereo, InStereo2k, Kitti2012Stereo, Kitti2015Stereo, SceneFlowStereo, SintelStereo
from ._stereo_matching import (
CarlaStereo,
FallingThingsStereo,
InStereo2k,
Kitti2012Stereo,
Kitti2015Stereo,
SceneFlowStereo,
SintelStereo,
)
from .caltech import Caltech101, Caltech256
from .celeba import CelebA
from .cifar import CIFAR10, CIFAR100
......@@ -109,6 +117,7 @@ __all__ = (
"Kitti2012Stereo",
"Kitti2015Stereo",
"CarlaStereo",
"FallingThingsStereo",
"SceneFlowStereo",
"SintelStereo",
"InStereo2k",
......
import functools
import json
import os
from abc import ABC, abstractmethod
from glob import glob
......@@ -362,6 +363,104 @@ class Kitti2015Stereo(StereoMatchingDataset):
return super().__getitem__(index)
class FallingThingsStereo(StereoMatchingDataset):
"""`FallingThings <https://research.nvidia.com/publication/2018-06_falling-things-synthetic-dataset-3d-object-detection-and-pose-estimation>`_ dataset.
The dataset is expected to have the following structre: ::
root
FallingThings
single
scene1
_object_settings.json
_camera_settings.json
image1.left.depth.png
image1.right.depth.png
image1.left.jpg
image1.right.jpg
image2.left.depth.png
image2.right.depth.png
image2.left.jpg
image2.right
...
scene2
...
mixed
scene1
_object_settings.json
_camera_settings.json
image1.left.depth.png
image1.right.depth.png
image1.left.jpg
image1.right.jpg
image2.left.depth.png
image2.right.depth.png
image2.left.jpg
image2.right
...
scene2
...
Args:
root (string): Root directory where FallingThings is located.
variant (string): Which variant to use. Either "single", "mixed", or "both".
transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
"""
def __init__(self, root: str, variant: str = "single", transforms: Optional[Callable] = None):
super().__init__(root, transforms)
root = Path(root) / "FallingThings"
verify_str_arg(variant, "variant", valid_values=("single", "mixed", "both"))
variants = {
"single": ["single"],
"mixed": ["mixed"],
"both": ["single", "mixed"],
}[variant]
for s in variants:
left_img_pattern = str(root / s / "*" / "*.left.jpg")
right_img_pattern = str(root / s / "*" / "*.right.jpg")
self._images += self._scan_pairs(left_img_pattern, right_img_pattern)
left_disparity_pattern = str(root / s / "*" / "*.left.depth.png")
right_disparity_pattern = str(root / s / "*" / "*.right.depth.png")
self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
def _read_disparity(self, file_path: str) -> Tuple:
# (H, W) image
depth = np.asarray(Image.open(file_path))
# as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt
# in order to extract disparity from depth maps
camera_settings_path = Path(file_path).parent / "_camera_settings.json"
with open(camera_settings_path, "r") as f:
# inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constatnt)
intrinsics = json.load(f)
focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]
baseline, pixel_constant = 6, 100 # pixel constant is inverted
disparity_map = (baseline * focal * pixel_constant) / depth.astype(np.float32)
# unsqueeze disparity to (C, H, W)
disparity_map = disparity_map[None, :, :]
valid_mask = None
return disparity_map, valid_mask
def __getitem__(self, index: int) -> Tuple:
"""Return example at given index.
Args:
index(int): The index of the example to retrieve
Returns:
tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
If a ``valid_mask`` is generated within the ``transforms`` parameter,
a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
"""
return super().__getitem__(index)
class SceneFlowStereo(StereoMatchingDataset):
"""Dataset interface for `Scene Flow <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ datasets.
This interface provides access to the `FlyingThings3D, `Monkaa` and `Driving` datasets.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment