diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 31c62f27a75a..063ff4c8b05d 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -32,7 +32,7 @@ @require_peft_backend class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = StableDiffusion3Pipeline - scheduler_cls = FlowMatchEulerDiscreteScheduler() + scheduler_cls = FlowMatchEulerDiscreteScheduler scheduler_kwargs = {} uses_flow_matching = True transformer_kwargs = { @@ -80,8 +80,7 @@ def test_sd3_lora(self): Related PR: https://github.com/huggingface/diffusers/pull/8584 """ components = self.get_dummy_components() - - pipe = self.pipeline_class(**components) + pipe = self.pipeline_class(**components[0]) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) diff --git a/tests/lora/test_lora_layers_sdxl.py b/tests/lora/test_lora_layers_sdxl.py index f00f7b193abf..4ec7ef897485 100644 --- a/tests/lora/test_lora_layers_sdxl.py +++ b/tests/lora/test_lora_layers_sdxl.py @@ -124,71 +124,6 @@ def tearDown(self): gc.collect() torch.cuda.empty_cache() - def test_sdxl_0_9_lora_one(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") - lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora" - lora_filename = "daiton-xl-lora-test.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213]) - - max_diff = numpy_cosine_similarity_distance(expected, images) - assert max_diff < 1e-3 - pipe.unload_lora_weights() - release_memory(pipe) - - def test_sdxl_0_9_lora_two(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") - lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora" - lora_filename = "saijo.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626]) - - max_diff = numpy_cosine_similarity_distance(expected, images) - assert max_diff < 1e-3 - - pipe.unload_lora_weights() - release_memory(pipe) - - def test_sdxl_0_9_lora_three(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9") - lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora" - lora_filename = "kame_sdxl_v2-000020-16rank.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - pipe.enable_model_cpu_offload() - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468]) - - max_diff = numpy_cosine_similarity_distance(expected, images) - assert max_diff < 5e-3 - - pipe.unload_lora_weights() - release_memory(pipe) - def test_sdxl_1_0_lora(self): generator = torch.Generator("cpu").manual_seed(0) diff --git a/tests/models/transformers/test_models_transformer_aura_flow.py b/tests/models/transformers/test_models_transformer_aura_flow.py index 51075b2b4cc1..376d8b57da4d 100644 --- a/tests/models/transformers/test_models_transformer_aura_flow.py +++ b/tests/models/transformers/test_models_transformer_aura_flow.py @@ -26,7 +26,7 @@ enable_full_determinism() -class SD3TransformerTests(ModelTesterMixin, unittest.TestCase): +class AuraFlowTransformerTests(ModelTesterMixin, unittest.TestCase): model_class = AuraFlowTransformer2DModel main_input_name = "hidden_states" # We override the items here because the transformer under consideration is small. @@ -73,3 +73,7 @@ def prepare_init_args_and_inputs_for_common(self): } inputs_dict = self.dummy_input return init_dict, inputs_dict + + @unittest.skip("AuraFlowTransformer2DModel uses its own dedicated attention processor. This test does not apply") + def test_set_attn_processor_for_determinism(self): + pass diff --git a/tests/models/transformers/test_models_transformer_sd3.py b/tests/models/transformers/test_models_transformer_sd3.py index 9c927287cb8d..2b9084327289 100644 --- a/tests/models/transformers/test_models_transformer_sd3.py +++ b/tests/models/transformers/test_models_transformer_sd3.py @@ -76,3 +76,7 @@ def prepare_init_args_and_inputs_for_common(self): } inputs_dict = self.dummy_input return init_dict, inputs_dict + + @unittest.skip("SD3Transformer2DModel uses a dedicated attention processor. This test doesn't apply") + def test_set_attn_processor_for_determinism(self): + pass diff --git a/tests/pipelines/aura_flow/test_pipeline_aura_flow.py b/tests/pipelines/aura_flow/test_pipeline_aura_flow.py index 3694a733163c..14bc588df905 100644 --- a/tests/pipelines/aura_flow/test_pipeline_aura_flow.py +++ b/tests/pipelines/aura_flow/test_pipeline_aura_flow.py @@ -163,3 +163,7 @@ def test_fused_qkv_projections(self): assert np.allclose( original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2 ), "Original outputs should match when fused QKV projections are disabled." + + @unittest.skip("xformers attention processor does not exist for AuraFlow") + def test_xformers_attention_forwardGenerator_pass(self): + pass diff --git a/tests/pipelines/lumina/test_lumina_nextdit.py b/tests/pipelines/lumina/test_lumina_nextdit.py index a53758ce2808..d6aeb57b80a1 100644 --- a/tests/pipelines/lumina/test_lumina_nextdit.py +++ b/tests/pipelines/lumina/test_lumina_nextdit.py @@ -119,6 +119,10 @@ def test_lumina_prompt_embeds(self): max_diff = np.abs(output_with_prompt - output_with_embeds).max() assert max_diff < 1e-4 + @unittest.skip("xformers attention processor does not exist for Lumina") + def test_xformers_attention_forwardGenerator_pass(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py index 79e3a7f9b736..033addd51c3d 100644 --- a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py +++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py @@ -20,12 +20,7 @@ import torch from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer -from diffusers import ( - AutoencoderKL, - DDIMScheduler, - TextToVideoSDPipeline, - UNet3DConditionModel, -) +from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoSDPipeline, UNet3DConditionModel from diffusers.utils import is_xformers_available from diffusers.utils.testing_utils import ( enable_full_determinism, @@ -64,7 +59,7 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin, def get_dummy_components(self): torch.manual_seed(0) unet = UNet3DConditionModel( - block_out_channels=(4, 8), + block_out_channels=(8, 8), layers_per_block=1, sample_size=32, in_channels=4, @@ -134,10 +129,7 @@ def get_dummy_inputs(self, device, seed=0): return inputs def test_dict_tuple_outputs_equivalent(self): - expected_slice = None - if torch_device == "cpu": - expected_slice = np.array([0.4903, 0.5649, 0.5504, 0.5179, 0.4821, 0.5466, 0.4131, 0.5052, 0.5077]) - return super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice) + return super().test_dict_tuple_outputs_equivalent() def test_text_to_video_default_case(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -151,9 +143,8 @@ def test_text_to_video_default_case(self): frames = sd_pipe(**inputs).frames image_slice = frames[0][0][-3:, -3:, -1] - assert frames[0][0].shape == (32, 32, 3) - expected_slice = np.array([0.7537, 0.1752, 0.6157, 0.5508, 0.4240, 0.4110, 0.4838, 0.5648, 0.5094]) + expected_slice = np.array([0.8093, 0.2751, 0.6976, 0.5927, 0.4616, 0.4336, 0.5094, 0.5683, 0.4796]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2