Skip to content

Commit

Permalink
[Tests] Update a deprecated parameter in test files and fix several…
Browse files Browse the repository at this point in the history
… typos (huggingface#7277)

* Add properties and `IPAdapterTesterMixin` tests for `StableDiffusionPanoramaPipeline`

* Fix variable name typo and update comments

* Update deprecated `output_type="numpy"` to "np" in test files

* Discard changes to src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py

* Update test_stable_diffusion_panorama.py

* Update numbers in README.md

* Update get_guidance_scale_embedding method to use timesteps instead of w

* Update number of checkpoints in README.md

* Add type hints and fix var name

* Fix PyTorch's convention for inplace functions

* Fix a typo

* Revert "Fix PyTorch's convention for inplace functions"

This reverts commit 74350cf.

* Fix typos

* Indent

* Refactor get_guidance_scale_embedding method in LEditsPPPipelineStableDiffusionXL class
  • Loading branch information
tolgacangoz authored Mar 14, 2024
1 parent 4974b84 commit 5d848ec
Show file tree
Hide file tree
Showing 69 changed files with 272 additions and 244 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi

## Quickstart

Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 19000+ checkpoints):
Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 22000+ checkpoints):

```python
from diffusers import DiffusionPipeline
Expand Down Expand Up @@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
- https://github.com/deep-floyd/IF
- https://github.com/bentoml/BentoML
- https://github.com/bmaltais/kohya_ss
- +8000 other amazing GitHub repositories 💪
- +9000 other amazing GitHub repositories 💪

Thank you for using us ❤️.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ def transform_images(examples):
generator=generator,
batch_size=args.eval_batch_size,
num_inference_steps=args.ddpm_num_inference_steps,
output_type="numpy",
output_type="np",
).images

if args.use_ema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def transform_images(examples):
generator=generator,
batch_size=args.eval_batch_size,
num_inference_steps=args.ddpm_num_inference_steps,
output_type="numpy",
output_type="np",
).images

if args.use_ema:
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/models/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

# Notice that normalization is always applied before the real computation in the following blocks.
# 0. Self-Attention
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/models/transformers/transformer_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def forward(
"""
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
# ensure attention_mask is a bias, and give it a singleton query_tokens dimension.
# we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward.
# we can tell by counting dims; if ndim == 2: it's a mask rather than a bias.
Expand Down
18 changes: 9 additions & 9 deletions src/diffusers/models/unets/unet_2d_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

hidden_states = self.resnets[0](hidden_states, temb)
for attn, resnet in zip(self.attentions, self.resnets[1:]):
Expand Down Expand Up @@ -986,7 +986,7 @@ def forward(
) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

if attention_mask is None:
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
Expand Down Expand Up @@ -1116,7 +1116,7 @@ def forward(
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

output_states = ()

Expand Down Expand Up @@ -1241,7 +1241,7 @@ def forward(
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

output_states = ()

Expand Down Expand Up @@ -1986,7 +1986,7 @@ def forward(
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

output_states = ()

Expand Down Expand Up @@ -2201,7 +2201,7 @@ def forward(
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

output_states = ()

Expand Down Expand Up @@ -2483,7 +2483,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

is_freeu_enabled = (
getattr(self, "s1", None)
Expand Down Expand Up @@ -3312,7 +3312,7 @@ def forward(
) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

if attention_mask is None:
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
Expand Down Expand Up @@ -3694,7 +3694,7 @@ def forward(
) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

# 1. Self-Attention
if self.add_self_attention:
Expand Down
6 changes: 3 additions & 3 deletions src/diffusers/models/unets/unet_3d_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1183,7 +1183,7 @@ def forward(
):
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

output_states = ()

Expand Down Expand Up @@ -1367,7 +1367,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

is_freeu_enabled = (
getattr(self, "s1", None)
Expand Down Expand Up @@ -1707,7 +1707,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

hidden_states = self.resnets[0](hidden_states, temb)

Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/pipelines/amused/pipeline_amused_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __call__(
on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
essentially ignores `image`.
num_inference_steps (`int`, *optional*, defaults to 16):
num_inference_steps (`int`, *optional*, defaults to 12):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.
guidance_scale (`float`, *optional*, defaults to 10.0):
Expand Down Expand Up @@ -191,7 +191,7 @@ def __call__(
negative_prompt_embeds is None and negative_encoder_hidden_states is not None
):
raise ValueError(
"pass either both `negatve_prompt_embeds` and `negative_encoder_hidden_states` or neither"
"pass either both `negative_prompt_embeds` and `negative_encoder_hidden_states` or neither"
)

if (prompt is None and prompt_embeds is None) or (prompt is not None and prompt_embeds is not None):
Expand Down
16 changes: 9 additions & 7 deletions src/diffusers/pipelines/controlnet/pipeline_controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,20 +824,22 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
return latents

# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args:
timesteps (`torch.Tensor`):
generate embedding vectors at these timesteps
w (`torch.Tensor`):
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate
dtype:
data type of the generated embeddings
Dimension of the embeddings to generate.
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
Data type of the generated embeddings.
Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
"""
assert len(w.shape) == 1
w = w * 1000.0
Expand Down
16 changes: 9 additions & 7 deletions src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,20 +869,22 @@ def upcast_vae(self):
self.vae.decoder.mid_block.to(dtype)

# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args:
timesteps (`torch.Tensor`):
generate embedding vectors at these timesteps
w (`torch.Tensor`):
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate
dtype:
data type of the generated embeddings
Dimension of the embeddings to generate.
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
Data type of the generated embeddings.
Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
"""
assert len(w.shape) == 1
w = w * 1000.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def __call__(
generator: Optional[torch.Generator] = None,
num_inference_steps: int = 100,
return_dict: bool = True,
output_type: str = "numpy",
output_type: str = "np",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
) -> Union[AudioPipelineOutput, Tuple]:
Expand All @@ -157,7 +157,7 @@ def __call__(
expense of slower inference.
return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
output_type (`str`, *optional*, defaults to `"numpy"`):
output_type (`str`, *optional*, defaults to `"np"`):
The output format of the generated audio.
callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the
Expand Down Expand Up @@ -249,16 +249,16 @@ def __call__(

logger.info("Generated segment", i)

if output_type == "numpy" and not is_onnx_available():
if output_type == "np" and not is_onnx_available():
raise ValueError(
"Cannot return output in 'np' format if ONNX is not available. Make sure to have ONNX installed or set 'output_type' to 'mel'."
)
elif output_type == "numpy" and self.melgan is None:
elif output_type == "np" and self.melgan is None:
raise ValueError(
"Cannot return output in 'np' format if melgan component is not defined. Make sure to define `self.melgan` or set 'output_type' to 'mel'."
)

if output_type == "numpy":
if output_type == "np":
output = self.melgan(input_features=full_pred_mel.astype(np.float32))
else:
output = full_pred_mel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2004,7 +2004,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

is_freeu_enabled = (
getattr(self, "s1", None)
Expand Down Expand Up @@ -2338,7 +2338,7 @@ def forward(
) -> torch.FloatTensor:
if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

hidden_states = self.resnets[0](hidden_states, temb)
for attn, resnet in zip(self.attentions, self.resnets[1:]):
Expand Down Expand Up @@ -2479,7 +2479,7 @@ def forward(
) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")

if attention_mask is None:
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -548,20 +548,22 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
return latents

# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args:
timesteps (`torch.Tensor`):
generate embedding vectors at these timesteps
w (`torch.Tensor`):
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate
dtype:
data type of the generated embeddings
Dimension of the embeddings to generate.
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
Data type of the generated embeddings.
Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
"""
assert len(w.shape) == 1
w = w * 1000.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,20 +490,22 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
latents = latents * self.scheduler.init_noise_sigma
return latents

def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args:
timesteps (`torch.Tensor`):
generate embedding vectors at these timesteps
w (`torch.Tensor`):
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate
dtype:
data type of the generated embeddings
Dimension of the embeddings to generate.
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
Data type of the generated embeddings.
Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
"""
assert len(w.shape) == 1
w = w * 1000.0
Expand Down
Loading

0 comments on commit 5d848ec

Please sign in to comment.