"...en/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "3fab6624fdd2753233a10984b62025076a7e9889"
Unverified Commit b8a5dda5 authored by estelleafl's avatar estelleafl Committed by GitHub
Browse files

[ldm3d] Update code to be functional with the new checkpoints (#3875)



* fixed typo

* updated doc to be consistent in naming

* make style/quality

* preprocessing for 4 channels and not 6

* make style

* test for 4c

* make style/quality

* fixed test on cpu

---------
Co-authored-by: default avatarAflalo <estellea@isl-iam1.rr.intel.com>
Co-authored-by: default avatarAflalo <estellea@isl-gpu33.rr.intel.com>
Co-authored-by: default avatarAflalo <estellea@isl-gpu38.rr.intel.com>
parent 572d8e20
...@@ -312,12 +312,17 @@ class VaeImageProcessorLDM3D(VaeImageProcessor): ...@@ -312,12 +312,17 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
""" """
if images.ndim == 3: if images.ndim == 3:
images = images[None, ...] images = images[None, ...]
images = (images * 255).round().astype("uint8") images_depth = images[:, :, :, 3:]
if images.shape[-1] == 1: if images.shape[-1] == 6:
# special case for grayscale (single channel) images images_depth = (images_depth * 255).round().astype("uint8")
raise Exception("Not supported") pil_images = [
Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth
]
elif images.shape[-1] == 4:
images_depth = (images_depth * 65535.0).astype(np.uint16)
pil_images = [Image.fromarray(image_depth, mode="I;16") for image_depth in images_depth]
else: else:
pil_images = [Image.fromarray(self.rgblike_to_depthmap(image[:, :, 3:]), mode="I;16") for image in images] raise Exception("Not supported")
return pil_images return pil_images
...@@ -349,7 +354,11 @@ class VaeImageProcessorLDM3D(VaeImageProcessor): ...@@ -349,7 +354,11 @@ class VaeImageProcessorLDM3D(VaeImageProcessor):
image = self.pt_to_numpy(image) image = self.pt_to_numpy(image)
if output_type == "np": if output_type == "np":
return image[:, :, :, :3], np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0) if image.shape[-1] == 6:
image_depth = np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0)
else:
image_depth = image[:, :, :, 3:]
return image[:, :, :, :3], image_depth
if output_type == "pil": if output_type == "pil":
return self.numpy_to_pil(image), self.numpy_to_depth(image) return self.numpy_to_pil(image), self.numpy_to_depth(image)
......
...@@ -130,9 +130,9 @@ class StableDiffusionLDM3DPipelineFastTests(unittest.TestCase): ...@@ -130,9 +130,9 @@ class StableDiffusionLDM3DPipelineFastTests(unittest.TestCase):
assert depth.shape == (1, 64, 64) assert depth.shape == (1, 64, 64)
expected_slice_rgb = np.array( expected_slice_rgb = np.array(
[0.37301102, 0.7023895, 0.7418312, 0.5163375, 0.5825485, 0.60929704, 0.4188174, 0.48407027, 0.46555096] [0.37338176, 0.70247, 0.74203193, 0.51643604, 0.58256793, 0.60932136, 0.4181095, 0.48355877, 0.46535262]
) )
expected_slice_depth = np.array([103.4673, 85.81202, 87.84926]) expected_slice_depth = np.array([103.46727, 85.812004, 87.849236])
assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2 assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2
assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2 assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2
...@@ -280,10 +280,30 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase): ...@@ -280,10 +280,30 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase):
output = ldm3d_pipe(**inputs) output = ldm3d_pipe(**inputs)
rgb, depth = output.rgb, output.depth rgb, depth = output.rgb, output.depth
expected_rgb_mean = 0.54461557 expected_rgb_mean = 0.495586
expected_rgb_std = 0.2806707 expected_rgb_std = 0.33795515
expected_depth_mean = 143.64595 expected_depth_mean = 112.48518
expected_depth_std = 83.491776 expected_depth_std = 98.489746
assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3
assert np.abs(expected_rgb_std - rgb.std()) < 1e-3
assert np.abs(expected_depth_mean - depth.mean()) < 1e-3
assert np.abs(expected_depth_std - depth.std()) < 1e-3
def test_ldm3d_v2(self):
ldm3d_pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d-4c").to(torch_device)
ldm3d_pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device)
output = ldm3d_pipe(**inputs)
rgb, depth = output.rgb, output.depth
expected_rgb_mean = 0.4194127
expected_rgb_std = 0.35375586
expected_depth_mean = 0.5638502
expected_depth_std = 0.34686103
assert rgb.shape == (1, 512, 512, 3)
assert depth.shape == (1, 512, 512, 1)
assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3 assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3
assert np.abs(expected_rgb_std - rgb.std()) < 1e-3 assert np.abs(expected_rgb_std - rgb.std()) < 1e-3
assert np.abs(expected_depth_mean - depth.mean()) < 1e-3 assert np.abs(expected_depth_mean - depth.mean()) < 1e-3
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment