From 5d848ec07c2011d600ce5e5c1aa02a03152aea9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Tolga=20Cang=C3=B6z?= <46008593+standardAI@users.noreply.github.com> Date: Thu, 14 Mar 2024 22:17:35 +0300 Subject: [PATCH] [`Tests`] Update a deprecated parameter in test files and fix several typos (#7277) * Add properties and `IPAdapterTesterMixin` tests for `StableDiffusionPanoramaPipeline` * Fix variable name typo and update comments * Update deprecated `output_type="numpy"` to "np" in test files * Discard changes to src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py * Update test_stable_diffusion_panorama.py * Update numbers in README.md * Update get_guidance_scale_embedding method to use timesteps instead of w * Update number of checkpoints in README.md * Add type hints and fix var name * Fix PyTorch's convention for inplace functions * Fix a typo * Revert "Fix PyTorch's convention for inplace functions" This reverts commit 74350cf65b2c9aa77f08bec7937d7a8b13edb509. * Fix typos * Indent * Refactor get_guidance_scale_embedding method in LEditsPPPipelineStableDiffusionXL class --- README.md | 4 +- .../train_unconditional.py | 2 +- .../train_unconditional.py | 2 +- src/diffusers/models/attention.py | 2 +- .../models/transformers/transformer_2d.py | 2 +- src/diffusers/models/unets/unet_2d_blocks.py | 18 +++--- src/diffusers/models/unets/unet_3d_blocks.py | 6 +- .../amused/pipeline_amused_img2img.py | 4 +- .../controlnet/pipeline_controlnet.py | 16 ++--- .../controlnet/pipeline_controlnet_sd_xl.py | 16 ++--- .../pipeline_spectrogram_diffusion.py | 10 ++-- .../versatile_diffusion/modeling_text_unet.py | 6 +- .../pipeline_latent_consistency_img2img.py | 16 ++--- .../pipeline_latent_consistency_text2img.py | 16 ++--- .../pipeline_leditspp_stable_diffusion_xl.py | 16 ++--- .../pipeline_stable_diffusion.py | 16 ++--- .../pipeline_stable_diffusion_img2img.py | 16 ++--- .../pipeline_stable_diffusion_inpaint.py | 16 ++--- .../pipeline_stable_diffusion_diffedit.py | 4 +- .../pipeline_stable_diffusion_ldm3d.py | 16 ++--- .../pipeline_stable_diffusion_sag.py | 6 +- .../pipeline_stable_diffusion_xl.py | 16 ++--- .../pipeline_stable_diffusion_xl_img2img.py | 16 ++--- .../pipeline_stable_diffusion_xl_inpaint.py | 16 ++--- .../pipeline_stable_diffusion_adapter.py | 16 ++--- .../pipeline_stable_diffusion_xl_adapter.py | 16 ++--- .../pipeline_text_to_video_zero.py | 4 +- tests/pipelines/controlnet/test_controlnet.py | 6 +- .../controlnet/test_controlnet_img2img.py | 4 +- .../controlnet/test_controlnet_inpaint.py | 4 +- .../test_controlnet_inpaint_sdxl.py | 2 +- .../test_controlnet_sdxl_img2img.py | 2 +- tests/pipelines/ddim/test_ddim.py | 6 +- tests/pipelines/ddpm/test_ddpm.py | 10 ++-- tests/pipelines/deepfloyd_if/test_if.py | 2 +- .../pipelines/deepfloyd_if/test_if_img2img.py | 2 +- .../test_if_img2img_superresolution.py | 2 +- .../deepfloyd_if/test_if_inpainting.py | 2 +- .../test_if_inpainting_superresolution.py | 2 +- .../deepfloyd_if/test_if_superresolution.py | 2 +- tests/pipelines/dit/test_dit.py | 2 +- .../latent_diffusion/test_latent_diffusion.py | 6 +- .../test_latent_diffusion_superresolution.py | 6 +- .../paint_by_example/test_paint_by_example.py | 2 +- tests/pipelines/pndm/test_pndm.py | 6 +- .../test_onnx_stable_diffusion.py | 2 +- .../test_onnx_stable_diffusion_img2img.py | 2 +- .../test_onnx_stable_diffusion_upscale.py | 2 +- .../stable_diffusion/test_stable_diffusion.py | 6 +- .../test_stable_diffusion_img2img.py | 2 +- .../test_stable_diffusion_inpaint.py | 10 ++-- ...st_stable_diffusion_instruction_pix2pix.py | 4 +- .../test_stable_diffusion.py | 6 +- ...test_stable_diffusion_attend_and_excite.py | 4 +- .../test_stable_diffusion_depth.py | 6 +- .../test_stable_diffusion_diffedit.py | 8 +-- .../test_stable_diffusion_inpaint.py | 2 +- .../test_stable_diffusion_latent_upscale.py | 2 +- .../test_stable_diffusion_v_pred.py | 8 +-- .../test_stable_diffusion_adapter.py | 2 +- .../test_stable_diffusion_image_variation.py | 4 +- .../test_stable_diffusion_ldm3d.py | 6 +- .../test_stable_diffusion_panorama.py | 4 +- .../test_stable_diffusion_xl_adapter.py | 2 +- ...stable_diffusion_xl_instruction_pix2pix.py | 2 +- .../stable_unclip/test_stable_unclip.py | 2 +- tests/pipelines/test_pipelines.py | 58 +++++++++---------- tests/pipelines/unclip/test_unclip.py | 2 +- .../pipelines/unidiffuser/test_unidiffuser.py | 8 +-- 69 files changed, 272 insertions(+), 244 deletions(-) diff --git a/README.md b/README.md index 945064ac28ec..c57a5e9ab8b1 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi ## Quickstart -Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 19000+ checkpoints): +Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 22000+ checkpoints): ```python from diffusers import DiffusionPipeline @@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") # Notice that normalization is always applied before the real computation in the following blocks. # 0. Self-Attention diff --git a/src/diffusers/models/transformers/transformer_2d.py b/src/diffusers/models/transformers/transformer_2d.py index 555ea4f63808..b2a188ddfbc2 100644 --- a/src/diffusers/models/transformers/transformer_2d.py +++ b/src/diffusers/models/transformers/transformer_2d.py @@ -308,7 +308,7 @@ def forward( """ if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. diff --git a/src/diffusers/models/unets/unet_2d_blocks.py b/src/diffusers/models/unets/unet_2d_blocks.py index b9e9e63bbc18..d54630376961 100644 --- a/src/diffusers/models/unets/unet_2d_blocks.py +++ b/src/diffusers/models/unets/unet_2d_blocks.py @@ -846,7 +846,7 @@ def forward( ) -> torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") hidden_states = self.resnets[0](hidden_states, temb) for attn, resnet in zip(self.attentions, self.resnets[1:]): @@ -986,7 +986,7 @@ def forward( ) -> torch.FloatTensor: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") if attention_mask is None: # if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask. @@ -1116,7 +1116,7 @@ def forward( ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") output_states = () @@ -1241,7 +1241,7 @@ def forward( ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") output_states = () @@ -1986,7 +1986,7 @@ def forward( ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") output_states = () @@ -2201,7 +2201,7 @@ def forward( ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") output_states = () @@ -2483,7 +2483,7 @@ def forward( ) -> torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") is_freeu_enabled = ( getattr(self, "s1", None) @@ -3312,7 +3312,7 @@ def forward( ) -> torch.FloatTensor: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") if attention_mask is None: # if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask. @@ -3694,7 +3694,7 @@ def forward( ) -> torch.FloatTensor: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") # 1. Self-Attention if self.add_self_attention: diff --git a/src/diffusers/models/unets/unet_3d_blocks.py b/src/diffusers/models/unets/unet_3d_blocks.py index a48f1841c683..97c91f61da1c 100644 --- a/src/diffusers/models/unets/unet_3d_blocks.py +++ b/src/diffusers/models/unets/unet_3d_blocks.py @@ -1183,7 +1183,7 @@ def forward( ): if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") output_states = () @@ -1367,7 +1367,7 @@ def forward( ) -> torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") is_freeu_enabled = ( getattr(self, "s1", None) @@ -1707,7 +1707,7 @@ def forward( ) -> torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") hidden_states = self.resnets[0](hidden_states, temb) diff --git a/src/diffusers/pipelines/amused/pipeline_amused_img2img.py b/src/diffusers/pipelines/amused/pipeline_amused_img2img.py index 444d6354b7da..8b49d1a64578 100644 --- a/src/diffusers/pipelines/amused/pipeline_amused_img2img.py +++ b/src/diffusers/pipelines/amused/pipeline_amused_img2img.py @@ -127,7 +127,7 @@ def __call__( on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising process runs for the full number of iterations specified in `num_inference_steps`. A value of 1 essentially ignores `image`. - num_inference_steps (`int`, *optional*, defaults to 16): + num_inference_steps (`int`, *optional*, defaults to 12): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. guidance_scale (`float`, *optional*, defaults to 10.0): @@ -191,7 +191,7 @@ def __call__( negative_prompt_embeds is None and negative_encoder_hidden_states is not None ): raise ValueError( - "pass either both `negatve_prompt_embeds` and `negative_encoder_hidden_states` or neither" + "pass either both `negative_prompt_embeds` and `negative_encoder_hidden_states` or neither" ) if (prompt is None and prompt_embeds is None) or (prompt is not None and prompt_embeds is not None): diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index 8f31dfc2678a..bdba7833b6e1 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -824,20 +824,22 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype return latents # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index eca81083be7b..0d7e20dc3725 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -869,20 +869,22 @@ def upcast_vae(self): self.vae.decoder.mid_block.to(dtype) # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py b/src/diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py index 496a1f7658f1..475da0b6d188 100644 --- a/src/diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +++ b/src/diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py @@ -133,7 +133,7 @@ def __call__( generator: Optional[torch.Generator] = None, num_inference_steps: int = 100, return_dict: bool = True, - output_type: str = "numpy", + output_type: str = "np", callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback_steps: int = 1, ) -> Union[AudioPipelineOutput, Tuple]: @@ -157,7 +157,7 @@ def __call__( expense of slower inference. return_dict (`bool`, *optional*, defaults to `True`): Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple. - output_type (`str`, *optional*, defaults to `"numpy"`): + output_type (`str`, *optional*, defaults to `"np"`): The output format of the generated audio. callback (`Callable`, *optional*): A function that calls every `callback_steps` steps during inference. The function is called with the @@ -249,16 +249,16 @@ def __call__( logger.info("Generated segment", i) - if output_type == "numpy" and not is_onnx_available(): + if output_type == "np" and not is_onnx_available(): raise ValueError( "Cannot return output in 'np' format if ONNX is not available. Make sure to have ONNX installed or set 'output_type' to 'mel'." ) - elif output_type == "numpy" and self.melgan is None: + elif output_type == "np" and self.melgan is None: raise ValueError( "Cannot return output in 'np' format if melgan component is not defined. Make sure to define `self.melgan` or set 'output_type' to 'mel'." ) - if output_type == "numpy": + if output_type == "np": output = self.melgan(input_features=full_pred_mel.astype(np.float32)) else: output = full_pred_mel diff --git a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py index 62a3a8728a2a..d66693d9b7e9 100644 --- a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +++ b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py @@ -2004,7 +2004,7 @@ def forward( ) -> torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") is_freeu_enabled = ( getattr(self, "s1", None) @@ -2338,7 +2338,7 @@ def forward( ) -> torch.FloatTensor: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") hidden_states = self.resnets[0](hidden_states, temb) for attn, resnet in zip(self.attentions, self.resnets[1:]): @@ -2479,7 +2479,7 @@ def forward( ) -> torch.FloatTensor: cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") if attention_mask is None: # if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask. diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index f64854ea982b..e8482ffe9ce2 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -548,20 +548,22 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index e9bacaa89ba5..259a65c80782 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -490,20 +490,22 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py index 874a10a7ccd5..b1f773cb864b 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py @@ -713,20 +713,22 @@ def upcast_vae(self): self.vae.decoder.mid_block.to(dtype) # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 9e4e6c186ffa..b9b9b60e759d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -669,20 +669,22 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype return latents # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index b43e0eb2abcd..540eed6ebd56 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -767,20 +767,22 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 221d5c2cfd3f..79039badd2e5 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -909,20 +909,22 @@ def get_timesteps(self, num_inference_steps, strength, device): return timesteps, num_inference_steps - t_start # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py index 4c90ce0646c4..9bb68c1d3ec9 100644 --- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py @@ -1304,7 +1304,7 @@ def __call__( callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback_steps: int = 1, cross_attention_kwargs: Optional[Dict[str, Any]] = None, - clip_ckip: int = None, + clip_skip: int = None, ): r""" The call function to the pipeline for generation. @@ -1426,7 +1426,7 @@ def __call__( prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, lora_scale=text_encoder_lora_scale, - clip_skip=clip_ckip, + clip_skip=clip_skip, ) # For classifier free guidance, we need to do two forward passes. # Here we concatenate the unconditional and text embeddings into a single batch diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index c7c05feaf013..170551312782 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -644,20 +644,22 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype return latents # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py index 96aa006d2ab3..82d7474ac4f3 100644 --- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py @@ -632,7 +632,7 @@ def __call__( # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 # and `sag_scale` is` `s` of equation (16) - # of the self-attentnion guidance paper: https://arxiv.org/pdf/2210.00939.pdf + # of the self-attention guidance paper: https://arxiv.org/pdf/2210.00939.pdf # `sag_scale = 0` means no self-attention guidance do_self_attention_guidance = sag_scale > 0.0 @@ -667,7 +667,7 @@ def __call__( if timesteps.dtype not in [torch.int16, torch.int32, torch.int64]: raise ValueError( - f"{self.__class__.__name__} does not support using a scheduler of type {self.scheduler.__class__.__name__}. Please make sure to use one of 'DDIMScheduler, PNDMScheduler, DDPMScheduler, DEISMultistepScheduler, UniPCMultistepScheduler, DPMSolverMultistepScheduler, DPMSolverSinlgestepScheduler'." + f"{self.__class__.__name__} does not support using a scheduler of type {self.scheduler.__class__.__name__}. Please make sure to use one of 'DDIMScheduler, PNDMScheduler, DDPMScheduler, DEISMultistepScheduler, UniPCMultistepScheduler, DPMSolverMultistepScheduler, DPMSolverSinglestepScheduler'." ) # 5. Prepare latent variables @@ -723,7 +723,7 @@ def get_map_size(module, input, output): noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - # perform self-attention guidance with the stored self-attentnion map + # perform self-attention guidance with the stored self-attention map if do_self_attention_guidance: # classifier-free guidance produces two chunks of attention map # and we only use unconditional one according to equation (25) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 776696e9d486..66f33a65e8da 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -740,20 +740,22 @@ def upcast_vae(self): self.vae.decoder.mid_block.to(dtype) # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index fd4c412f48cb..4b0ea1e3f3d1 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -874,20 +874,22 @@ def upcast_vae(self): self.vae.decoder.mid_block.to(dtype) # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index c25628c22c7b..cf5a3319333c 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -1110,20 +1110,22 @@ def upcast_vae(self): self.vae.decoder.mid_block.to(dtype) # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index 0b55bb38b5eb..10f8dc66f79d 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -613,20 +613,22 @@ def _default_height_width(self, height, width, image): return height, width # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 4e0cc61f5c1d..59d4022923eb 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -784,20 +784,22 @@ def _default_height_width(self, height, width, image): return height, width # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): + def get_guidance_scale_embedding( + self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32 + ) -> torch.FloatTensor: """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 Args: - timesteps (`torch.Tensor`): - generate embedding vectors at these timesteps + w (`torch.Tensor`): + Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings. embedding_dim (`int`, *optional*, defaults to 512): - dimension of the embeddings to generate - dtype: - data type of the generated embeddings + Dimension of the embeddings to generate. + dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): + Data type of the generated embeddings. Returns: - `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`. """ assert len(w.shape) == 1 w = w * 1000.0 diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py index d3ff3728c22e..d45408e9543a 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py @@ -575,8 +575,8 @@ def __call__( Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor is generated by sampling using the supplied random `generator`. - output_type (`str`, *optional*, defaults to `"numpy"`): - The output format of the generated video. Choose between `"latent"` and `"numpy"`. + output_type (`str`, *optional*, defaults to `"np"`): + The output format of the generated video. Choose between `"latent"` and `"np"`. return_dict (`bool`, *optional*, defaults to `True`): Whether or not to return a [`~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput`] instead of diff --git a/tests/pipelines/controlnet/test_controlnet.py b/tests/pipelines/controlnet/test_controlnet.py index 114a36b37f74..bd6330c12c40 100644 --- a/tests/pipelines/controlnet/test_controlnet.py +++ b/tests/pipelines/controlnet/test_controlnet.py @@ -211,7 +211,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": image, } @@ -402,7 +402,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": images, } @@ -602,7 +602,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": images, } diff --git a/tests/pipelines/controlnet/test_controlnet_img2img.py b/tests/pipelines/controlnet/test_controlnet_img2img.py index 89e2b3803dee..0e04325d85b2 100644 --- a/tests/pipelines/controlnet/test_controlnet_img2img.py +++ b/tests/pipelines/controlnet/test_controlnet_img2img.py @@ -164,7 +164,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": image, "control_image": control_image, } @@ -313,7 +313,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": image, "control_image": control_image, } diff --git a/tests/pipelines/controlnet/test_controlnet_inpaint.py b/tests/pipelines/controlnet/test_controlnet_inpaint.py index 67e0da4de9cd..4cefa4a0463c 100644 --- a/tests/pipelines/controlnet/test_controlnet_inpaint.py +++ b/tests/pipelines/controlnet/test_controlnet_inpaint.py @@ -155,7 +155,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": image, "mask_image": mask_image, "control_image": control_image, @@ -375,7 +375,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": image, "mask_image": mask_image, "control_image": control_image, diff --git a/tests/pipelines/controlnet/test_controlnet_inpaint_sdxl.py b/tests/pipelines/controlnet/test_controlnet_inpaint_sdxl.py index 5f38263e1225..970247d249c8 100644 --- a/tests/pipelines/controlnet/test_controlnet_inpaint_sdxl.py +++ b/tests/pipelines/controlnet/test_controlnet_inpaint_sdxl.py @@ -172,7 +172,7 @@ def get_dummy_inputs(self, device, seed=0, img_res=64): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": init_image, "mask_image": mask_image, "control_image": control_image, diff --git a/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py b/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py index 7d2ba8cc28fd..0e648a339a2a 100644 --- a/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py +++ b/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py @@ -163,7 +163,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "image": image, "control_image": image, } diff --git a/tests/pipelines/ddim/test_ddim.py b/tests/pipelines/ddim/test_ddim.py index 0d84a8e7e5bc..0f0654397a34 100644 --- a/tests/pipelines/ddim/test_ddim.py +++ b/tests/pipelines/ddim/test_ddim.py @@ -63,7 +63,7 @@ def get_dummy_inputs(self, device, seed=0): "batch_size": 1, "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -113,7 +113,7 @@ def test_inference_cifar10(self): ddim.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = ddim(generator=generator, eta=0.0, output_type="numpy").images + image = ddim(generator=generator, eta=0.0, output_type="np").images image_slice = image[0, -3:, -3:, -1] @@ -133,7 +133,7 @@ def test_inference_ema_bedroom(self): ddpm.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = ddpm(generator=generator, output_type="numpy").images + image = ddpm(generator=generator, output_type="np").images image_slice = image[0, -3:, -3:, -1] diff --git a/tests/pipelines/ddpm/test_ddpm.py b/tests/pipelines/ddpm/test_ddpm.py index bf25ced4cae8..c0cce3a2f237 100644 --- a/tests/pipelines/ddpm/test_ddpm.py +++ b/tests/pipelines/ddpm/test_ddpm.py @@ -50,10 +50,10 @@ def test_fast_inference(self): ddpm.set_progress_bar_config(disable=None) generator = torch.Generator(device=device).manual_seed(0) - image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images + image = ddpm(generator=generator, num_inference_steps=2, output_type="np").images generator = torch.Generator(device=device).manual_seed(0) - image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", return_dict=False)[0] + image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="np", return_dict=False)[0] image_slice = image[0, -3:, -3:, -1] image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] @@ -75,10 +75,10 @@ def test_inference_predict_sample(self): ddpm.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images + image = ddpm(generator=generator, num_inference_steps=2, output_type="np").images generator = torch.manual_seed(0) - image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="numpy")[0] + image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="np")[0] image_slice = image[0, -3:, -3:, -1] image_eps_slice = image_eps[0, -3:, -3:, -1] @@ -102,7 +102,7 @@ def test_inference_cifar10(self): ddpm.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = ddpm(generator=generator, output_type="numpy").images + image = ddpm(generator=generator, output_type="np").images image_slice = image[0, -3:, -3:, -1] diff --git a/tests/pipelines/deepfloyd_if/test_if.py b/tests/pipelines/deepfloyd_if/test_if.py index 96fd013eca55..b595fa71278d 100644 --- a/tests/pipelines/deepfloyd_if/test_if.py +++ b/tests/pipelines/deepfloyd_if/test_if.py @@ -50,7 +50,7 @@ def get_dummy_inputs(self, device, seed=0): "prompt": "A painting of a squirrel eating a burger", "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/deepfloyd_if/test_if_img2img.py b/tests/pipelines/deepfloyd_if/test_if_img2img.py index 17a5e371aef5..7fee1b39e3b6 100644 --- a/tests/pipelines/deepfloyd_if/test_if_img2img.py +++ b/tests/pipelines/deepfloyd_if/test_if_img2img.py @@ -55,7 +55,7 @@ def get_dummy_inputs(self, device, seed=0): "image": image, "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py index d37f7f46cd5c..d3506d709e12 100644 --- a/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py +++ b/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py @@ -57,7 +57,7 @@ def get_dummy_inputs(self, device, seed=0): "original_image": original_image, "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting.py b/tests/pipelines/deepfloyd_if/test_if_inpainting.py index 85dea36605d5..ebff78eed5e0 100644 --- a/tests/pipelines/deepfloyd_if/test_if_inpainting.py +++ b/tests/pipelines/deepfloyd_if/test_if_inpainting.py @@ -57,7 +57,7 @@ def get_dummy_inputs(self, device, seed=0): "mask_image": mask_image, "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py index f8e782d3e566..efef51847489 100644 --- a/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py +++ b/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py @@ -59,7 +59,7 @@ def get_dummy_inputs(self, device, seed=0): "mask_image": mask_image, "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/deepfloyd_if/test_if_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_superresolution.py index ca2051721c4d..7e43a2f60371 100644 --- a/tests/pipelines/deepfloyd_if/test_if_superresolution.py +++ b/tests/pipelines/deepfloyd_if/test_if_superresolution.py @@ -52,7 +52,7 @@ def get_dummy_inputs(self, device, seed=0): "image": image, "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/dit/test_dit.py b/tests/pipelines/dit/test_dit.py index 1f36776bdfd2..13e72f2ea287 100644 --- a/tests/pipelines/dit/test_dit.py +++ b/tests/pipelines/dit/test_dit.py @@ -74,7 +74,7 @@ def get_dummy_inputs(self, device, seed=0): "class_labels": [1], "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/latent_diffusion/test_latent_diffusion.py b/tests/pipelines/latent_diffusion/test_latent_diffusion.py index 4faa0e7690d7..b1ff68400769 100644 --- a/tests/pipelines/latent_diffusion/test_latent_diffusion.py +++ b/tests/pipelines/latent_diffusion/test_latent_diffusion.py @@ -113,7 +113,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -153,7 +153,7 @@ def get_inputs(self, device, dtype=torch.float32, seed=0): "generator": generator, "num_inference_steps": 3, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -189,7 +189,7 @@ def get_inputs(self, device, dtype=torch.float32, seed=0): "generator": generator, "num_inference_steps": 50, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py b/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py index a9df2c1130aa..576fe24bfbfa 100644 --- a/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py +++ b/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py @@ -84,7 +84,7 @@ def test_inference_superresolution(self): init_image = self.dummy_image.to(device) generator = torch.Generator(device=device).manual_seed(0) - image = ldm(image=init_image, generator=generator, num_inference_steps=2, output_type="numpy").images + image = ldm(image=init_image, generator=generator, num_inference_steps=2, output_type="np").images image_slice = image[0, -3:, -3:, -1] @@ -109,7 +109,7 @@ def test_inference_superresolution_fp16(self): init_image = self.dummy_image.to(torch_device) - image = ldm(init_image, num_inference_steps=2, output_type="numpy").images + image = ldm(init_image, num_inference_steps=2, output_type="np").images assert image.shape == (1, 64, 64, 3) @@ -128,7 +128,7 @@ def test_inference_superresolution(self): ldm.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = ldm(image=init_image, generator=generator, num_inference_steps=20, output_type="numpy").images + image = ldm(image=init_image, generator=generator, num_inference_steps=20, output_type="np").images image_slice = image[0, -3:, -3:, -1] diff --git a/tests/pipelines/paint_by_example/test_paint_by_example.py b/tests/pipelines/paint_by_example/test_paint_by_example.py index 7771977d7cff..cb76570d6b9a 100644 --- a/tests/pipelines/paint_by_example/test_paint_by_example.py +++ b/tests/pipelines/paint_by_example/test_paint_by_example.py @@ -117,7 +117,7 @@ def get_dummy_inputs(self, device="cpu", seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/pndm/test_pndm.py b/tests/pipelines/pndm/test_pndm.py index d4cb6a546dd9..5efb244919da 100644 --- a/tests/pipelines/pndm/test_pndm.py +++ b/tests/pipelines/pndm/test_pndm.py @@ -49,10 +49,10 @@ def test_inference(self): pndm.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = pndm(generator=generator, num_inference_steps=20, output_type="numpy").images + image = pndm(generator=generator, num_inference_steps=20, output_type="np").images generator = torch.manual_seed(0) - image_from_tuple = pndm(generator=generator, num_inference_steps=20, output_type="numpy", return_dict=False)[0] + image_from_tuple = pndm(generator=generator, num_inference_steps=20, output_type="np", return_dict=False)[0] image_slice = image[0, -3:, -3:, -1] image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] @@ -77,7 +77,7 @@ def test_inference_cifar10(self): pndm.to(torch_device) pndm.set_progress_bar_config(disable=None) generator = torch.manual_seed(0) - image = pndm(generator=generator, output_type="numpy").images + image = pndm(generator=generator, output_type="np").images image_slice = image[0, -3:, -3:, -1] diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py index 229b1665bdd1..c3bfa6b5dabc 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py @@ -46,7 +46,7 @@ def get_dummy_inputs(self, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py index 33b461bac336..274cb6718233 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py @@ -55,7 +55,7 @@ def get_dummy_inputs(self, seed=0): "num_inference_steps": 3, "strength": 0.75, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py index 56c10adbd6ae..2df64ad1d685 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py @@ -55,7 +55,7 @@ def get_dummy_inputs(self, seed=0): "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 82afacaa2a8b..86bdc9af1b90 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -775,7 +775,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -950,7 +950,7 @@ def test_stable_diffusion_vae_tiling(self): generator=generator, guidance_scale=7.5, num_inference_steps=2, - output_type="numpy", + output_type="np", ) image_chunked = output_chunked.images @@ -966,7 +966,7 @@ def test_stable_diffusion_vae_tiling(self): generator=generator, guidance_scale=7.5, num_inference_steps=2, - output_type="numpy", + output_type="np", ) image = output.images diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 4483fd8e0b8c..922bf4dba381 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -179,7 +179,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index 218ac3e76a0e..e4e97d7bfc83 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -199,7 +199,7 @@ def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -470,7 +470,7 @@ def get_dummy_inputs_2images(self, device, seed=0, img_res=64): "generator": [generator1, generator2], "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -586,7 +586,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -847,7 +847,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -1072,7 +1072,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 50, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py index 0986f02deeaa..fc6bd2f4e043 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py @@ -131,7 +131,7 @@ def get_dummy_inputs(self, device, seed=0): "num_inference_steps": 2, "guidance_scale": 6.0, "image_guidance_scale": 1, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -288,7 +288,7 @@ def get_inputs(self, seed=0): "num_inference_steps": 3, "guidance_scale": 7.5, "image_guidance_scale": 1.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py index 7aef098916ca..63e1cb30e203 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -151,7 +151,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -336,7 +336,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -557,7 +557,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 50, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py index fdc41a2f3bc2..e342ca7c9ee7 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py @@ -138,7 +138,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 1, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "max_iter_to_alter": 2, "thresholds": {0: 0.7}, } @@ -225,7 +225,7 @@ def test_attend_and_excite_fp16(self): generator=generator, num_inference_steps=5, max_iter_to_alter=5, - output_type="numpy", + output_type="np", ).images[0] expected_image = load_numpy( diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py index 76d480ea484c..d247efe581d7 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py @@ -174,7 +174,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -395,7 +395,7 @@ def get_inputs(self, device="cpu", dtype=torch.float32, seed=0): "num_inference_steps": 3, "strength": 0.75, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -534,7 +534,7 @@ def get_inputs(self, device="cpu", dtype=torch.float32, seed=0): "num_inference_steps": 3, "strength": 0.75, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py index 76343036de04..8e7b9b56e0ce 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py @@ -143,7 +143,7 @@ def get_dummy_inputs(self, device, seed=0): "num_inference_steps": 2, "inpaint_strength": 1.0, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -165,7 +165,7 @@ def get_dummy_mask_inputs(self, device, seed=0): "num_maps_per_mask": 2, "mask_encode_strength": 1.0, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -186,7 +186,7 @@ def get_dummy_inversion_inputs(self, device, seed=0): "inpaint_strength": 1.0, "guidance_scale": 6.0, "decode_latents": True, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -417,7 +417,7 @@ def test_stable_diffusion_diffedit_dpm(self): negative_prompt=source_prompt, inpaint_strength=0.7, num_inference_steps=25, - output_type="numpy", + output_type="np", ).images[0] expected_image = ( diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py index 6157b32a8734..563d518680d9 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py @@ -129,7 +129,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py index 04721b4a8cc1..8434a6245d32 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py @@ -155,7 +155,7 @@ def get_dummy_inputs(self, device, seed=0): "image": self.dummy_image.cpu(), "generator": generator, "num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py index be5b639cee56..8ef8e8ab5a34 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py @@ -308,7 +308,7 @@ def test_stable_diffusion_v_pred_euler(self): prompt = "A painting of a squirrel eating a burger" generator = torch.manual_seed(0) - output = sd_pipe([prompt], generator=generator, num_inference_steps=5, output_type="numpy") + output = sd_pipe([prompt], generator=generator, num_inference_steps=5, output_type="np") image = output.images image_slice = image[0, 253:256, 253:256, -1] @@ -335,7 +335,7 @@ def test_stable_diffusion_v_pred_dpm(self): prompt = "a photograph of an astronaut riding a horse" generator = torch.manual_seed(0) image = sd_pipe( - [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=5, output_type="numpy" + [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=5, output_type="np" ).images image_slice = image[0, 253:256, 253:256, -1] @@ -357,7 +357,7 @@ def test_stable_diffusion_attention_slicing_v_pred(self): pipe.enable_attention_slicing() generator = torch.manual_seed(0) output_chunked = pipe( - [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy" + [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="np" ) image_chunked = output_chunked.images @@ -369,7 +369,7 @@ def test_stable_diffusion_attention_slicing_v_pred(self): # disable slicing pipe.disable_attention_slicing() generator = torch.manual_seed(0) - output = pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy") + output = pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="np") image = output.images # make sure that more than 3.0 GB is allocated diff --git a/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py b/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py index f1b61c3364f0..bd721e6c5064 100644 --- a/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py +++ b/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py @@ -246,7 +246,7 @@ def get_dummy_inputs(self, device, seed=0, height=64, width=64, num_images=1): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py b/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py index 4dd7de7a943b..78d414496562 100644 --- a/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py +++ b/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py @@ -117,7 +117,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -293,7 +293,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 50, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py index 9ac69c895521..a5de5eff5cab 100644 --- a/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py +++ b/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py @@ -107,7 +107,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -222,7 +222,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -268,7 +268,7 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 "generator": generator, "num_inference_steps": 50, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py b/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py index aa7212b0f9ff..bb0bf8c7124d 100644 --- a/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py +++ b/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py @@ -105,7 +105,7 @@ def get_dummy_inputs(self, device, seed=0): "width": None, "num_inference_steps": 1, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -263,7 +263,7 @@ def get_inputs(self, seed=0): "generator": generator, "num_inference_steps": 3, "guidance_scale": 7.5, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index 0bcffeb078b8..af5a8f5ccccb 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -290,7 +290,7 @@ def get_dummy_inputs(self, device, seed=0, height=64, width=64, num_images=1): "generator": generator, "num_inference_steps": 2, "guidance_scale": 5.0, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py index 0b4324f60ba3..98cecb4e0f7c 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py @@ -143,7 +143,7 @@ def get_dummy_inputs(self, device, seed=0): "num_inference_steps": 2, "guidance_scale": 6.0, "image_guidance_scale": 1, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py index f05edf6861f1..080fa5bb3267 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip.py @@ -168,7 +168,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "prior_num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 8954456913c0..25fb5e7182dd 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -117,10 +117,10 @@ def _test_from_save_pretrained_dynamo(in_queue, out_queue, timeout): new_ddpm.to(torch_device) generator = torch.Generator(device=torch_device).manual_seed(0) - image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images + image = ddpm(generator=generator, num_inference_steps=5, output_type="np").images generator = torch.Generator(device=torch_device).manual_seed(0) - new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images + new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="np").images assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass" except Exception: @@ -363,12 +363,12 @@ def test_download_no_safety_checker(self): ) pipe = pipe.to(torch_device) generator = torch.manual_seed(0) - out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="np").images pipe_2 = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch") pipe_2 = pipe_2.to(torch_device) generator = torch.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="np").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -379,7 +379,7 @@ def test_load_no_safety_checker_explicit_locally(self): ) pipe = pipe.to(torch_device) generator = torch.manual_seed(0) - out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="np").images with tempfile.TemporaryDirectory() as tmpdirname: pipe.save_pretrained(tmpdirname) @@ -388,7 +388,7 @@ def test_load_no_safety_checker_explicit_locally(self): generator = torch.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="np").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -398,7 +398,7 @@ def test_load_no_safety_checker_default_locally(self): pipe = pipe.to(torch_device) generator = torch.manual_seed(0) - out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="np").images with tempfile.TemporaryDirectory() as tmpdirname: pipe.save_pretrained(tmpdirname) @@ -407,7 +407,7 @@ def test_load_no_safety_checker_default_locally(self): generator = torch.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="np").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -590,7 +590,7 @@ def test_local_save_load_index(self): ) pipe = pipe.to(torch_device) generator = torch.manual_seed(0) - out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="np").images with tempfile.TemporaryDirectory() as tmpdirname: pipe.save_pretrained(tmpdirname) @@ -601,7 +601,7 @@ def test_local_save_load_index(self): generator = torch.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="np").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -626,7 +626,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("<*>", pipe.tokenizer) == "<*>" prompt = "hey <*>" - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # single token load local with weight name @@ -642,7 +642,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("<**>", pipe.tokenizer) == "<**>" prompt = "hey <**>" - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # multi token load @@ -665,7 +665,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("<***>", pipe.tokenizer) == "<***> <***>_1 <***>_2" prompt = "hey <***>" - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # multi token load a1111 @@ -693,7 +693,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("<****>", pipe.tokenizer) == "<****> <****>_1 <****>_2" prompt = "hey <****>" - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # multi embedding load @@ -718,7 +718,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("<******>", pipe.tokenizer) == "<******>" prompt = "hey <*****> <******>" - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # single token state dict load @@ -731,7 +731,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("", pipe.tokenizer) == "" prompt = "hey " - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # multi embedding state dict load @@ -751,7 +751,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("", pipe.tokenizer) == "" prompt = "hey " - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # auto1111 multi-token state dict load @@ -777,7 +777,7 @@ def test_text_inversion_download(self): assert pipe._maybe_convert_prompt("", pipe.tokenizer) == " _1 _2" prompt = "hey " - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) # multiple references to multi embedding @@ -789,7 +789,7 @@ def test_text_inversion_download(self): ) prompt = "hey " - out = pipe(prompt, num_inference_steps=1, output_type="numpy").images + out = pipe(prompt, num_inference_steps=1, output_type="np").images assert out.shape == (1, 128, 128, 3) def test_text_inversion_multi_tokens(self): @@ -1739,10 +1739,10 @@ def test_from_save_pretrained(self): new_ddpm.to(torch_device) generator = torch.Generator(device=torch_device).manual_seed(0) - image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images + image = ddpm(generator=generator, num_inference_steps=5, output_type="np").images generator = torch.Generator(device=torch_device).manual_seed(0) - new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images + new_image = new_ddpm(generator=generator, num_inference_steps=5, output_type="np").images assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass" @@ -1765,10 +1765,10 @@ def test_from_pretrained_hub(self): ddpm_from_hub.set_progress_bar_config(disable=None) generator = torch.Generator(device=torch_device).manual_seed(0) - image = ddpm(generator=generator, num_inference_steps=5, output_type="numpy").images + image = ddpm(generator=generator, num_inference_steps=5, output_type="np").images generator = torch.Generator(device=torch_device).manual_seed(0) - new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images + new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="np").images assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass" @@ -1788,10 +1788,10 @@ def test_from_pretrained_hub_pass_model(self): ddpm_from_hub_custom_model.set_progress_bar_config(disable=None) generator = torch.Generator(device=torch_device).manual_seed(0) - image = ddpm_from_hub_custom_model(generator=generator, num_inference_steps=5, output_type="numpy").images + image = ddpm_from_hub_custom_model(generator=generator, num_inference_steps=5, output_type="np").images generator = torch.Generator(device=torch_device).manual_seed(0) - new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="numpy").images + new_image = ddpm_from_hub(generator=generator, num_inference_steps=5, output_type="np").images assert np.abs(image - new_image).max() < 1e-5, "Models don't give the same forward pass" @@ -1803,7 +1803,7 @@ def test_output_format(self): pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - images = pipe(output_type="numpy").images + images = pipe(output_type="np").images assert images.shape == (1, 32, 32, 3) assert isinstance(images, np.ndarray) @@ -1878,7 +1878,7 @@ def test_weighted_prompts_compel(self): generator = [torch.Generator(device="cpu").manual_seed(33) for _ in range(prompt_embeds.shape[0])] images = pipe( - prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=20, output_type="numpy" + prompt_embeds=prompt_embeds, generator=generator, num_inference_steps=20, output_type="np" ).images for i, image in enumerate(images): @@ -1916,7 +1916,7 @@ def test_ddpm_ddim_equality_batched(self): ddim.set_progress_bar_config(disable=None) generator = torch.Generator(device=torch_device).manual_seed(seed) - ddpm_images = ddpm(batch_size=2, generator=generator, output_type="numpy").images + ddpm_images = ddpm(batch_size=2, generator=generator, output_type="np").images generator = torch.Generator(device=torch_device).manual_seed(seed) ddim_images = ddim( @@ -1924,7 +1924,7 @@ def test_ddpm_ddim_equality_batched(self): generator=generator, num_inference_steps=1000, eta=1.0, - output_type="numpy", + output_type="np", use_clipped_model_output=True, # Need this to make DDIM match DDPM ).images diff --git a/tests/pipelines/unclip/test_unclip.py b/tests/pipelines/unclip/test_unclip.py index 60c5c52fe431..e3b2222b07f0 100644 --- a/tests/pipelines/unclip/test_unclip.py +++ b/tests/pipelines/unclip/test_unclip.py @@ -233,7 +233,7 @@ def get_dummy_inputs(self, device, seed=0): "prior_num_inference_steps": 2, "decoder_num_inference_steps": 2, "super_res_num_inference_steps": 2, - "output_type": "numpy", + "output_type": "np", } return inputs diff --git a/tests/pipelines/unidiffuser/test_unidiffuser.py b/tests/pipelines/unidiffuser/test_unidiffuser.py index ba8026db6154..11a02a656d3f 100644 --- a/tests/pipelines/unidiffuser/test_unidiffuser.py +++ b/tests/pipelines/unidiffuser/test_unidiffuser.py @@ -158,7 +158,7 @@ def get_dummy_inputs(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", } return inputs @@ -199,7 +199,7 @@ def get_dummy_inputs_with_latents(self, device, seed=0): "generator": generator, "num_inference_steps": 2, "guidance_scale": 6.0, - "output_type": "numpy", + "output_type": "np", "prompt_latents": latents.get("prompt_latents"), "vae_latents": latents.get("vae_latents"), "clip_latents": latents.get("clip_latents"), @@ -590,7 +590,7 @@ def get_inputs(self, device, seed=0, generate_latents=False): "generator": generator, "num_inference_steps": 3, "guidance_scale": 8.0, - "output_type": "numpy", + "output_type": "np", } if generate_latents: latents = self.get_fixed_latents(device, seed=seed) @@ -706,7 +706,7 @@ def get_inputs(self, device, seed=0, generate_latents=False): "generator": generator, "num_inference_steps": 3, "guidance_scale": 8.0, - "output_type": "numpy", + "output_type": "np", } if generate_latents: latents = self.get_fixed_latents(device, seed=seed)