Reduce Memory Cost in Flux Training (huggingface#9829)

* Improve NPU performance * Improve NPU performance * Improve NPU performance * Improve NPU performance * [bugfix] bugfix for npu free memory * [bugfix] bugfix for npu free memory * [bugfix] bugfix for npu free memory * Reduce memory cost for flux training process --------- Co-authored-by: 蒋硕 <[email protected]> Co-authored-by: Sayak Paul <[email protected]>
tolgacangoz · Nov 1, 2024 · a98a839 · a98a839
1 parent 3deed72
commit a98a839
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 0 deletions.
diff --git a/examples/dreambooth/train_dreambooth_flux.py b/examples/dreambooth/train_dreambooth_flux.py
@@ -1740,6 +1740,9 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
                         torch_npu.npu.empty_cache()
                     gc.collect()
 
+                images = None
+                del pipeline
+
     # Save the lora layers
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
@@ -1798,6 +1801,9 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
                 ignore_patterns=["step_*", "epoch_*"],
             )
 
+        images = None
+        del pipeline
+
     accelerator.end_training()
 
 

diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py
@@ -1844,6 +1844,9 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
                     del text_encoder_one, text_encoder_two
                     free_memory()
 
+                images = None
+                del pipeline
+
     # Save the lora layers
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
@@ -1908,6 +1911,9 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
                 ignore_patterns=["step_*", "epoch_*"],
             )
 
+        images = None
+        del pipeline
+
     accelerator.end_training()