NEGATIVE_PROMPT="Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion"
PROMPT_TEMPLATE={
"dit-llm-encode":{
"template":PROMPT_TEMPLATE_ENCODE,
"crop_start":36,
},
"dit-llm-encode-video":{
"template":PROMPT_TEMPLATE_ENCODE_VIDEO,
"crop_start":95,
},
}
# ======================= Model ======================
f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[kforkincallback_on_step_end_tensor_inputsifknotinself._callback_tensor_inputs]}"
)
ifpromptisnotNoneandprompt_embedsisnotNone:
raiseValueError(
f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
" only forward one of the two.")
elifpromptisNoneandprompt_embedsisNone:
raiseValueError(
"Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.")
# Check existence to make it compatible with FlowMatchEulerDiscreteScheduler
ifhasattr(self.scheduler,"init_noise_sigma"):
# scale the initial noise by the standard deviation required by the scheduler
latents=latents*self.scheduler.init_noise_sigma
returnlatents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
defget_guidance_scale_embedding(
self,
w:torch.Tensor,
embedding_dim:int=512,
dtype:torch.dtype=torch.float32,
)->torch.Tensor:
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args:
w (`torch.Tensor`):
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512):
Dimension of the embeddings to generate.
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
Data type of the generated embeddings.
Returns:
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.