From 258e2b755b2245cccad2d6d5cf24abe1126dda2f Mon Sep 17 00:00:00 2001
From: YAO Matrix <matrix.yao@intel.com>
Date: Wed, 23 Apr 2025 18:39:56 -0700
Subject: [PATCH 1/3] enable group_offload cases and quanto cases on XPU

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
---
 tests/pipelines/test_pipelines_common.py |  3 ++-
 tests/quantization/quanto/test_quanto.py | 25 +++++++++++++++---------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index a950de142740..ab74a8bf51c6 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -53,6 +53,7 @@
     require_accelerator,
     require_hf_hub_version_greater,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     require_transformers_version_greater,
     skip_mps,
@@ -2210,7 +2211,7 @@ def test_layerwise_casting_inference(self):
         inputs = self.get_dummy_inputs(torch_device)
         _ = pipe(**inputs)[0]
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_group_offloading_inference(self):
         if not self.test_group_offloading:
             return
diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py
index 9eb6958d2183..9903e582e3ac 100644
--- a/tests/quantization/quanto/test_quanto.py
+++ b/tests/quantization/quanto/test_quanto.py
@@ -6,9 +6,11 @@
 from diffusers.models.attention_processor import Attention
 from diffusers.utils import is_optimum_quanto_available, is_torch_available
 from diffusers.utils.testing_utils import (
+    enable_full_determinism,
     nightly,
     numpy_cosine_similarity_distance,
     require_accelerate,
+    require_big_accelerator,
     require_big_gpu_with_torch_cuda,
     require_torch_cuda_compatibility,
     torch_device,
@@ -23,9 +25,11 @@
 
     from ..utils import LoRALayer, get_memory_consumption_stat
 
+enable_full_determinism()
+
 
 @nightly
-@require_big_gpu_with_torch_cuda
+@require_big_accelerator
 @require_accelerate
 class QuantoBaseTesterMixin:
     model_id = None
@@ -37,15 +41,17 @@ class QuantoBaseTesterMixin:
     keep_in_fp32_module = ""
     modules_to_not_convert = ""
     _test_torch_compile = False
+    torch_accelerator_module = None
 
     def setUp(self):
-        torch.cuda.reset_peak_memory_stats()
-        torch.cuda.empty_cache()
+        self.torch_accelerator_module = getattr(torch, torch_device, torch.cuda)
+        self.torch_accelerator_module.reset_peak_memory_stats()
+        self.torch_accelerator_module.empty_cache()
         gc.collect()
 
     def tearDown(self):
-        torch.cuda.reset_peak_memory_stats()
-        torch.cuda.empty_cache()
+        self.torch_accelerator_module.reset_peak_memory_stats()
+        self.torch_accelerator_module.empty_cache()
         gc.collect()
 
     def get_dummy_init_kwargs(self):
@@ -89,7 +95,7 @@ def test_keep_modules_in_fp32(self):
         self.model_cls._keep_in_fp32_modules = self.keep_in_fp32_module
 
         model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs())
-        model.to("cuda")
+        model.to(torch_device)
 
         for name, module in model.named_modules():
             if isinstance(module, torch.nn.Linear):
@@ -107,7 +113,7 @@ def test_modules_to_not_convert(self):
         init_kwargs.update({"quantization_config": quantization_config})
 
         model = self.model_cls.from_pretrained(**init_kwargs)
-        model.to("cuda")
+        model.to(torch_device)
 
         for name, module in model.named_modules():
             if name in self.modules_to_not_convert:
@@ -122,7 +128,8 @@ def test_dtype_assignment(self):
 
         with self.assertRaises(ValueError):
             # Tries with a `device` and `dtype`
-            model.to(device="cuda:0", dtype=torch.float16)
+            device_0 = f"{torch_device}:0"
+            model.to(device=device_0, dtype=torch.float16)
 
         with self.assertRaises(ValueError):
             # Tries with a cast
@@ -133,7 +140,7 @@ def test_dtype_assignment(self):
             model.half()
 
         # This should work
-        model.to("cuda")
+        model.to(torch_device)
 
     def test_serialization(self):
         model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs())

From 5e049f1743571cc011bf6e27ba6a860930a78103 Mon Sep 17 00:00:00 2001
From: Yao Matrix <matrix.yao@intel.com>
Date: Thu, 24 Apr 2025 02:33:56 +0000
Subject: [PATCH 2/3] use backend APIs

Signed-off-by: Yao Matrix <matrix.yao@intel.com>
---
 tests/quantization/quanto/test_quanto.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py
index 9903e582e3ac..1cc53634bb46 100644
--- a/tests/quantization/quanto/test_quanto.py
+++ b/tests/quantization/quanto/test_quanto.py
@@ -6,6 +6,8 @@
 from diffusers.models.attention_processor import Attention
 from diffusers.utils import is_optimum_quanto_available, is_torch_available
 from diffusers.utils.testing_utils import (
+    backend_reset_peak_memory_stats,
+    backend_empty_cache,
     enable_full_determinism,
     nightly,
     numpy_cosine_similarity_distance,
@@ -41,17 +43,15 @@ class QuantoBaseTesterMixin:
     keep_in_fp32_module = ""
     modules_to_not_convert = ""
     _test_torch_compile = False
-    torch_accelerator_module = None
 
     def setUp(self):
-        self.torch_accelerator_module = getattr(torch, torch_device, torch.cuda)
-        self.torch_accelerator_module.reset_peak_memory_stats()
-        self.torch_accelerator_module.empty_cache()
+        backend_reset_peak_memory_stats(torch_device)
+        backend_empty_cache(torch_device)
         gc.collect()
 
     def tearDown(self):
-        self.torch_accelerator_module.reset_peak_memory_stats()
-        self.torch_accelerator_module.empty_cache()
+        backend_reset_peak_memory_stats(torch_device)
+        backend_empty_cache(torch_device)
         gc.collect()
 
     def get_dummy_init_kwargs(self):

From 0be45892b088ebb3e95fcd1fae0fe872fcd406cf Mon Sep 17 00:00:00 2001
From: Yao Matrix <matrix.yao@intel.com>
Date: Fri, 25 Apr 2025 06:23:32 +0000
Subject: [PATCH 3/3] fix style

Signed-off-by: Yao Matrix <matrix.yao@intel.com>
---
 tests/pipelines/test_pipelines_common.py | 1 -
 tests/quantization/quanto/test_quanto.py | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index ab74a8bf51c6..617147ccaf66 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -54,7 +54,6 @@
     require_hf_hub_version_greater,
     require_torch,
     require_torch_accelerator,
-    require_torch_gpu,
     require_transformers_version_greater,
     skip_mps,
     torch_device,
diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py
index 1cc53634bb46..d7bde6591dcf 100644
--- a/tests/quantization/quanto/test_quanto.py
+++ b/tests/quantization/quanto/test_quanto.py
@@ -6,14 +6,13 @@
 from diffusers.models.attention_processor import Attention
 from diffusers.utils import is_optimum_quanto_available, is_torch_available
 from diffusers.utils.testing_utils import (
-    backend_reset_peak_memory_stats,
     backend_empty_cache,
+    backend_reset_peak_memory_stats,
     enable_full_determinism,
     nightly,
     numpy_cosine_similarity_distance,
     require_accelerate,
     require_big_accelerator,
-    require_big_gpu_with_torch_cuda,
     require_torch_cuda_compatibility,
     torch_device,
 )