From 258e2b755b2245cccad2d6d5cf24abe1126dda2f Mon Sep 17 00:00:00 2001 From: YAO Matrix Date: Wed, 23 Apr 2025 18:39:56 -0700 Subject: [PATCH 1/3] enable group_offload cases and quanto cases on XPU Signed-off-by: YAO Matrix --- tests/pipelines/test_pipelines_common.py | 3 ++- tests/quantization/quanto/test_quanto.py | 25 +++++++++++++++--------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index a950de142740..ab74a8bf51c6 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -53,6 +53,7 @@ require_accelerator, require_hf_hub_version_greater, require_torch, + require_torch_accelerator, require_torch_gpu, require_transformers_version_greater, skip_mps, @@ -2210,7 +2211,7 @@ def test_layerwise_casting_inference(self): inputs = self.get_dummy_inputs(torch_device) _ = pipe(**inputs)[0] - @require_torch_gpu + @require_torch_accelerator def test_group_offloading_inference(self): if not self.test_group_offloading: return diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py index 9eb6958d2183..9903e582e3ac 100644 --- a/tests/quantization/quanto/test_quanto.py +++ b/tests/quantization/quanto/test_quanto.py @@ -6,9 +6,11 @@ from diffusers.models.attention_processor import Attention from diffusers.utils import is_optimum_quanto_available, is_torch_available from diffusers.utils.testing_utils import ( + enable_full_determinism, nightly, numpy_cosine_similarity_distance, require_accelerate, + require_big_accelerator, require_big_gpu_with_torch_cuda, require_torch_cuda_compatibility, torch_device, @@ -23,9 +25,11 @@ from ..utils import LoRALayer, get_memory_consumption_stat +enable_full_determinism() + @nightly -@require_big_gpu_with_torch_cuda +@require_big_accelerator @require_accelerate class QuantoBaseTesterMixin: model_id = None @@ -37,15 +41,17 @@ class QuantoBaseTesterMixin: keep_in_fp32_module = "" modules_to_not_convert = "" _test_torch_compile = False + torch_accelerator_module = None def setUp(self): - torch.cuda.reset_peak_memory_stats() - torch.cuda.empty_cache() + self.torch_accelerator_module = getattr(torch, torch_device, torch.cuda) + self.torch_accelerator_module.reset_peak_memory_stats() + self.torch_accelerator_module.empty_cache() gc.collect() def tearDown(self): - torch.cuda.reset_peak_memory_stats() - torch.cuda.empty_cache() + self.torch_accelerator_module.reset_peak_memory_stats() + self.torch_accelerator_module.empty_cache() gc.collect() def get_dummy_init_kwargs(self): @@ -89,7 +95,7 @@ def test_keep_modules_in_fp32(self): self.model_cls._keep_in_fp32_modules = self.keep_in_fp32_module model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) - model.to("cuda") + model.to(torch_device) for name, module in model.named_modules(): if isinstance(module, torch.nn.Linear): @@ -107,7 +113,7 @@ def test_modules_to_not_convert(self): init_kwargs.update({"quantization_config": quantization_config}) model = self.model_cls.from_pretrained(**init_kwargs) - model.to("cuda") + model.to(torch_device) for name, module in model.named_modules(): if name in self.modules_to_not_convert: @@ -122,7 +128,8 @@ def test_dtype_assignment(self): with self.assertRaises(ValueError): # Tries with a `device` and `dtype` - model.to(device="cuda:0", dtype=torch.float16) + device_0 = f"{torch_device}:0" + model.to(device=device_0, dtype=torch.float16) with self.assertRaises(ValueError): # Tries with a cast @@ -133,7 +140,7 @@ def test_dtype_assignment(self): model.half() # This should work - model.to("cuda") + model.to(torch_device) def test_serialization(self): model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) From 5e049f1743571cc011bf6e27ba6a860930a78103 Mon Sep 17 00:00:00 2001 From: Yao Matrix Date: Thu, 24 Apr 2025 02:33:56 +0000 Subject: [PATCH 2/3] use backend APIs Signed-off-by: Yao Matrix --- tests/quantization/quanto/test_quanto.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py index 9903e582e3ac..1cc53634bb46 100644 --- a/tests/quantization/quanto/test_quanto.py +++ b/tests/quantization/quanto/test_quanto.py @@ -6,6 +6,8 @@ from diffusers.models.attention_processor import Attention from diffusers.utils import is_optimum_quanto_available, is_torch_available from diffusers.utils.testing_utils import ( + backend_reset_peak_memory_stats, + backend_empty_cache, enable_full_determinism, nightly, numpy_cosine_similarity_distance, @@ -41,17 +43,15 @@ class QuantoBaseTesterMixin: keep_in_fp32_module = "" modules_to_not_convert = "" _test_torch_compile = False - torch_accelerator_module = None def setUp(self): - self.torch_accelerator_module = getattr(torch, torch_device, torch.cuda) - self.torch_accelerator_module.reset_peak_memory_stats() - self.torch_accelerator_module.empty_cache() + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) gc.collect() def tearDown(self): - self.torch_accelerator_module.reset_peak_memory_stats() - self.torch_accelerator_module.empty_cache() + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) gc.collect() def get_dummy_init_kwargs(self): From 0be45892b088ebb3e95fcd1fae0fe872fcd406cf Mon Sep 17 00:00:00 2001 From: Yao Matrix Date: Fri, 25 Apr 2025 06:23:32 +0000 Subject: [PATCH 3/3] fix style Signed-off-by: Yao Matrix --- tests/pipelines/test_pipelines_common.py | 1 - tests/quantization/quanto/test_quanto.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index ab74a8bf51c6..617147ccaf66 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -54,7 +54,6 @@ require_hf_hub_version_greater, require_torch, require_torch_accelerator, - require_torch_gpu, require_transformers_version_greater, skip_mps, torch_device, diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py index 1cc53634bb46..d7bde6591dcf 100644 --- a/tests/quantization/quanto/test_quanto.py +++ b/tests/quantization/quanto/test_quanto.py @@ -6,14 +6,13 @@ from diffusers.models.attention_processor import Attention from diffusers.utils import is_optimum_quanto_available, is_torch_available from diffusers.utils.testing_utils import ( - backend_reset_peak_memory_stats, backend_empty_cache, + backend_reset_peak_memory_stats, enable_full_determinism, nightly, numpy_cosine_similarity_distance, require_accelerate, require_big_accelerator, - require_big_gpu_with_torch_cuda, require_torch_cuda_compatibility, torch_device, )