Add Cog configuration for replicate deployment (#27)

* feat: add cog configuration for replicate deployment * fix: clean up * fix: clean up the configuration * fix: download the weights with pget * fix: download with huggingface * fix: clean up * fix: add missing bits * fix: fix loading the image * fix: default num_outputs to 1

Add Cog configuration for replicate deployment (#27)
* feat: add cog configuration for replicate deployment * fix: clean up * fix: clean up the configuration * fix: download the weights with pget * fix: download with huggingface * fix: clean up * fix: add missing bits * fix: fix loading the image * fix: default num_outputs to 1
541fd6dc · mbuke_repo · GitHub · 8303453b · 541fd6dc · 541fd6dc
Unverified Commit 541fd6dc authored Jan 17, 2024 by mbuke_repo Committed by GitHub Jan 17, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 127 additions and 0 deletions

cog.yaml cog.yaml +17 -0

predict.py predict.py +110 -0

No files found.
--- a/cog.yaml
+++ b/cog.yaml
+build:
+  gpu: true
+  cuda: "11.7"
+  python_version: "3.8"
+  python_packages:
+    - "torch==2.0.1"
+    - "torchvision==0.15.2"
+    - "diffusers==0.25.0"
+    - "transformers==4.36.2"
+    - "huggingface-hub==0.20.2"
+    - "numpy"
+    - "accelerate"
+    - "safetensors"
+    - "omegaconf"
+    - "peft"
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"
--- a/predict.py
+++ b/predict.py
+# Prediction interface for Cog ⚙️
+# https://github.com/replicate/cog/blob/main/docs/python.md
+
+from cog import BasePredictor, Input, Path
+import torch
+import numpy as np
+import random
+import os
+from PIL import Image
+import logging
+import time
+from typing import List
+import shutil
+
+from diffusers.utils import load_image
+from diffusers import EulerDiscreteScheduler
+
+from photomaker.pipeline import PhotoMakerStableDiffusionXLPipeline
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
+logger = logging.getLogger(__name__)
+
+base_model_path = 'SG161222/RealVisXL_V3.0'
+photomaker_path = 'release_model/photomaker-v1.bin'
+device = "cuda"
+
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        """Load the model into memory to make running multiple predictions efficient"""
+        start = time.time()
+        logger.info("Loading model...")
+
+        self.pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
+            base_model_path, 
+            torch_dtype=torch.bfloat16, 
+            use_safetensors=True, 
+            variant="fp16"
+        ).to(device)
+        
+        self.pipe.load_photomaker_adapter(
+           os.path.dirname(photomaker_path),
+           subfolder="",
+           weight_name=os.path.basename(photomaker_path),
+           trigger_word="img"
+        )     
+        
+        self.pipe.scheduler = EulerDiscreteScheduler.from_config(self.pipe.scheduler.config)
+        self.pipe.fuse_lora()
+        logger.info(f"Loaded model in {time.time() - start:.06}s")
+        
+    def _load_image(self, path):
+        shutil.copyfile(path, "/tmp/image.png")
+        return load_image("/tmp/image.png").convert("RGB")
+
+    @torch.inference_mode()
+    def predict(
+        self,
+        prompt: str = Input(
+            description="Input prompt",
+            default="sci-fi, closeup portrait photo of a man img wearing the sunglasses in Iron man suit, face, slim body, high quality, film grain"
+        ),
+        negative_prompt: str = Input(
+            description="Negative Input prompt",
+            default="(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth"
+        ),
+        image: Path = Input(
+            description="Input image for img2img or inpaint mode",
+            default=None,
+        ),
+        seed: int = Input(
+            description="Random seed. Leave blank to randomize the seed", default=None
+        ),
+        num_outputs: int = Input(
+            description="Number of images to output.",
+            ge=1,
+            le=4,
+            default=1,
+        ),
+        num_inference_steps: int = Input(
+            description="Number of denoising steps", ge=1, le=500, default=40
+        )
+    ) -> List[Path]:
+        """Run a single prediction on the model"""
+        if seed is None:
+            seed = int.from_bytes(os.urandom(4), "big")
+        logger.info(f"Using seed: {seed}")
+        generator = torch.Generator("cuda").manual_seed(seed)
+
+        style_strength_ratio = 20
+        start_merge_step = int(float(style_strength_ratio) / 100 * num_inference_steps)
+        if start_merge_step > 30:
+            start_merge_step = 30
+        
+        images = self.pipe(
+            prompt=prompt,
+            input_id_images=[self._load_image(image)],
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_outputs,
+            num_inference_steps=num_inference_steps,
+            start_merge_step=start_merge_step,
+            generator=generator,
+        ).images
+    
+        output_paths = []
+        for i, image in enumerate(images):
+            output_path = f"/tmp/out-{i}.png"
+            image.save(output_path)
+            output_paths.append(Path(output_path))
+
+        return output_paths
\ No newline at end of file