Unverified Commit 541fd6dc authored by mbuke_repo's avatar mbuke_repo Committed by GitHub
Browse files

Add Cog configuration for replicate deployment (#27)

* feat: add cog configuration for replicate deployment

* fix: clean up

* fix: clean up the configuration

* fix: download the weights with pget

* fix: download with huggingface

* fix: clean up

* fix: add missing bits

* fix: fix loading the image

* fix: default num_outputs to 1
parent 8303453b
build:
gpu: true
cuda: "11.7"
python_version: "3.8"
python_packages:
- "torch==2.0.1"
- "torchvision==0.15.2"
- "diffusers==0.25.0"
- "transformers==4.36.2"
- "huggingface-hub==0.20.2"
- "numpy"
- "accelerate"
- "safetensors"
- "omegaconf"
- "peft"
# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md
from cog import BasePredictor, Input, Path
import torch
import numpy as np
import random
import os
from PIL import Image
import logging
import time
from typing import List
import shutil
from diffusers.utils import load_image
from diffusers import EulerDiscreteScheduler
from photomaker.pipeline import PhotoMakerStableDiffusionXLPipeline
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
logger = logging.getLogger(__name__)
base_model_path = 'SG161222/RealVisXL_V3.0'
photomaker_path = 'release_model/photomaker-v1.bin'
device = "cuda"
class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
start = time.time()
logger.info("Loading model...")
self.pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.bfloat16,
use_safetensors=True,
variant="fp16"
).to(device)
self.pipe.load_photomaker_adapter(
os.path.dirname(photomaker_path),
subfolder="",
weight_name=os.path.basename(photomaker_path),
trigger_word="img"
)
self.pipe.scheduler = EulerDiscreteScheduler.from_config(self.pipe.scheduler.config)
self.pipe.fuse_lora()
logger.info(f"Loaded model in {time.time() - start:.06}s")
def _load_image(self, path):
shutil.copyfile(path, "/tmp/image.png")
return load_image("/tmp/image.png").convert("RGB")
@torch.inference_mode()
def predict(
self,
prompt: str = Input(
description="Input prompt",
default="sci-fi, closeup portrait photo of a man img wearing the sunglasses in Iron man suit, face, slim body, high quality, film grain"
),
negative_prompt: str = Input(
description="Negative Input prompt",
default="(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth"
),
image: Path = Input(
description="Input image for img2img or inpaint mode",
default=None,
),
seed: int = Input(
description="Random seed. Leave blank to randomize the seed", default=None
),
num_outputs: int = Input(
description="Number of images to output.",
ge=1,
le=4,
default=1,
),
num_inference_steps: int = Input(
description="Number of denoising steps", ge=1, le=500, default=40
)
) -> List[Path]:
"""Run a single prediction on the model"""
if seed is None:
seed = int.from_bytes(os.urandom(4), "big")
logger.info(f"Using seed: {seed}")
generator = torch.Generator("cuda").manual_seed(seed)
style_strength_ratio = 20
start_merge_step = int(float(style_strength_ratio) / 100 * num_inference_steps)
if start_merge_step > 30:
start_merge_step = 30
images = self.pipe(
prompt=prompt,
input_id_images=[self._load_image(image)],
negative_prompt=negative_prompt,
num_images_per_prompt=num_outputs,
num_inference_steps=num_inference_steps,
start_merge_step=start_merge_step,
generator=generator,
).images
output_paths = []
for i, image in enumerate(images):
output_path = f"/tmp/out-{i}.png"
image.save(output_path)
output_paths.append(Path(output_path))
return output_paths
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment