diff --git a/src/diffusers/models/adapter.py b/src/diffusers/models/adapter.py
index 0f4b2ec03371..677a991f055e 100644
--- a/src/diffusers/models/adapter.py
+++ b/src/diffusers/models/adapter.py
@@ -30,10 +30,10 @@ class MultiAdapter(ModelMixin):
     MultiAdapter is a wrapper model that contains multiple adapter models and merges their outputs according to
     user-assigned weighting.
 
-    This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library
-    implements for all the model (such as downloading or saving, etc.)
+    This model inherits from [`ModelMixin`]. Check the superclass documentation for common methods such as downloading
+    or saving.
 
-    Parameters:
+    Args:
         adapters (`List[T2IAdapter]`, *optional*, defaults to None):
             A list of `T2IAdapter` model instances.
     """
@@ -77,11 +77,13 @@ def forward(self, xs: torch.Tensor, adapter_weights: Optional[List[float]] = Non
         r"""
         Args:
             xs (`torch.Tensor`):
-                (batch, channel, height, width) input images for multiple adapter models concated along dimension 1,
-                `channel` should equal to `num_adapter` * "number of channel of image".
+                A tensor of shape (batch, channel, height, width) representing input images for multiple adapter
+                models, concatenated along dimension 1(channel dimension). The `channel` dimension should be equal to
+                `num_adapter` * number of channel per image.
+
             adapter_weights (`List[float]`, *optional*, defaults to None):
-                List of floats representing the weight which will be multiply to each adapter's output before adding
-                them together.
+                A list of floats representing the weights which will be multiplied by each adapter's output before
+                summing them together. If `None`, equal weights will be used for all adapters.
         """
         if adapter_weights is None:
             adapter_weights = torch.tensor([1 / self.num_adapter] * self.num_adapter)
@@ -109,24 +111,24 @@ def save_pretrained(
         variant: Optional[str] = None,
     ):
         """
-        Save a model and its configuration file to a directory, so that it can be re-loaded using the
+        Save a model and its configuration file to a specified directory, allowing it to be re-loaded with the
         `[`~models.adapter.MultiAdapter.from_pretrained`]` class method.
 
-        Arguments:
+        Args:
             save_directory (`str` or `os.PathLike`):
-                Directory to which to save. Will be created if it doesn't exist.
-            is_main_process (`bool`, *optional*, defaults to `True`):
-                Whether the process calling this is the main process or not. Useful when in distributed training like
-                TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on
-                the main process to avoid race conditions.
+                The directory where the model will be saved. If the directory does not exist, it will be created.
+            is_main_process (`bool`, optional, defaults=True):
+                Indicates whether current process is the main process or not. Useful for distributed training (e.g.,
+                TPUs) and need to call this function on all processes. In this case, set `is_main_process=True` only
+                for the main process to avoid race conditions.
             save_function (`Callable`):
-                The function to use to save the state dictionary. Useful on distributed training like TPUs when one
-                need to replace `torch.save` by another method. Can be configured with the environment variable
-                `DIFFUSERS_SAVE_MODE`.
-            safe_serialization (`bool`, *optional*, defaults to `True`):
-                Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+                Function used to save the state dictionary. Useful for distributed training (e.g., TPUs) to replace
+                `torch.save` with another method. Can also be configured using`DIFFUSERS_SAVE_MODE` environment
+                variable.
+            safe_serialization (`bool`, optional, defaults=True):
+                If `True`, save the model using `safetensors`. If `False`, save the model with `pickle`.
             variant (`str`, *optional*):
-                If specified, weights are saved in the format pytorch_model.<variant>.bin.
+                If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
         """
         idx = 0
         model_path_to_save = save_directory
@@ -145,19 +147,17 @@ def save_pretrained(
     @classmethod
     def from_pretrained(cls, pretrained_model_path: Optional[Union[str, os.PathLike]], **kwargs):
         r"""
-        Instantiate a pretrained MultiAdapter model from multiple pre-trained adapter models.
+        Instantiate a pretrained `MultiAdapter` model from multiple pre-trained adapter models.
 
         The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train
-        the model, you should first set it back in training mode with `model.train()`.
+        the model, set it back to training mode using `model.train()`.
 
-        The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come
-        pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
-        task.
+        Warnings:
+            *Weights from XXX not initialized from pretrained model* means that the weights of XXX are not pretrained
+            with the rest of the model. It is up to you to train those weights with a downstream fine-tuning. *Weights
+            from XXX not used in YYY* means that the layer XXX is not used by YYY, so those weights are discarded.
 
-        The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those
-        weights are discarded.
-
-        Parameters:
+        Args:
             pretrained_model_path (`os.PathLike`):
                 A path to a *directory* containing model weights saved using
                 [`~diffusers.models.adapter.MultiAdapter.save_pretrained`], e.g., `./my_model_directory/adapter`.
@@ -175,20 +175,20 @@ def from_pretrained(cls, pretrained_model_path: Optional[Union[str, os.PathLike]
                 more information about each option see [designing a device
                 map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
             max_memory (`Dict`, *optional*):
-                A dictionary device identifier to maximum memory. Will default to the maximum memory available for each
-                GPU and the available CPU RAM if unset.
+                A dictionary mapping device identifiers to their maximum memory. Default to the maximum memory
+                available for each GPU and the available CPU RAM if unset.
             low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
                 Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
                 also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
                 model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
                 setting this argument to `True` will raise an error.
             variant (`str`, *optional*):
-                If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
-                ignored when using `from_flax`.
+                If specified, load weights from a `variant` file (*e.g.* pytorch_model.<variant>.bin). `variant` will
+                be ignored when using `from_flax`.
             use_safetensors (`bool`, *optional*, defaults to `None`):
-                If set to `None`, the `safetensors` weights will be downloaded if they're available **and** if the
-                `safetensors` library is installed. If set to `True`, the model will be forcibly loaded from
-                `safetensors` weights. If set to `False`, loading will *not* use `safetensors`.
+                If `None`, the `safetensors` weights will be downloaded if available **and** if`safetensors` library is
+                installed. If `True`, the model will be forcibly loaded from`safetensors` weights. If `False`,
+                `safetensors` is not used.
         """
         idx = 0
         adapters = []
@@ -223,22 +223,22 @@ class T2IAdapter(ModelMixin, ConfigMixin):
      and
      [AdapterLight](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L235).
 
-    This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library
-    implements for all the model (such as downloading or saving, etc.)
+    This model inherits from [`ModelMixin`]. Check the superclass documentation for the common methods, such as
+    downloading or saving.
 
-    Parameters:
-        in_channels (`int`, *optional*, defaults to 3):
-            Number of channels of Aapter's input(*control image*). Set this parameter to 1 if you're using gray scale
-            image as *control image*.
+    Args:
+        in_channels (`int`, *optional*, defaults to `3`):
+            The number of channels in the adapter's input (*control image*). Set it to 1 if you're using a gray scale
+            image.
         channels (`List[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
-            The number of channel of each downsample block's output hidden state. The `len(block_out_channels)` will
-            also determine the number of downsample blocks in the Adapter.
-        num_res_blocks (`int`, *optional*, defaults to 2):
+            The number of channels in each downsample block's output hidden state. The `len(block_out_channels)`
+            determines the number of downsample blocks in the adapter.
+        num_res_blocks (`int`, *optional*, defaults to `2`):
             Number of ResNet blocks in each downsample block.
-        downscale_factor (`int`, *optional*, defaults to 8):
+        downscale_factor (`int`, *optional*, defaults to `8`):
             A factor that determines the total downscale factor of the Adapter.
         adapter_type (`str`, *optional*, defaults to `full_adapter`):
-            The type of Adapter to use. Choose either `full_adapter` or `full_adapter_xl` or `light_adapter`.
+            Adapter type (`full_adapter` or `full_adapter_xl` or `light_adapter`) to use.
     """
 
     @register_to_config
@@ -393,7 +393,7 @@ class AdapterBlock(nn.Module):
     An AdapterBlock is a helper model that contains multiple ResNet-like blocks. It is used in the `FullAdapter` and
     `FullAdapterXL` models.
 
-    Parameters:
+    Args:
         in_channels (`int`):
             Number of channels of AdapterBlock's input.
         out_channels (`int`):
@@ -401,7 +401,7 @@ class AdapterBlock(nn.Module):
         num_res_blocks (`int`):
             Number of ResNet blocks in the AdapterBlock.
         down (`bool`, *optional*, defaults to `False`):
-            Whether to perform downsampling on AdapterBlock's input.
+            If `True`, perform downsampling on AdapterBlock's input.
     """
 
     def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False):
@@ -440,7 +440,7 @@ class AdapterResnetBlock(nn.Module):
     r"""
     An `AdapterResnetBlock` is a helper model that implements a ResNet-like block.
 
-    Parameters:
+    Args:
         channels (`int`):
             Number of channels of AdapterResnetBlock's input and output.
     """
@@ -518,7 +518,7 @@ class LightAdapterBlock(nn.Module):
     A `LightAdapterBlock` is a helper model that contains multiple `LightAdapterResnetBlocks`. It is used in the
     `LightAdapter` model.
 
-    Parameters:
+    Args:
         in_channels (`int`):
             Number of channels of LightAdapterBlock's input.
         out_channels (`int`):
@@ -526,7 +526,7 @@ class LightAdapterBlock(nn.Module):
         num_res_blocks (`int`):
             Number of LightAdapterResnetBlocks in the LightAdapterBlock.
         down (`bool`, *optional*, defaults to `False`):
-            Whether to perform downsampling on LightAdapterBlock's input.
+            If `True`, perform downsampling on LightAdapterBlock's input.
     """
 
     def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False):
@@ -561,7 +561,7 @@ class LightAdapterResnetBlock(nn.Module):
     A `LightAdapterResnetBlock` is a helper model that implements a ResNet-like block with a slightly different
     architecture than `AdapterResnetBlock`.
 
-    Parameters:
+    Args:
         channels (`int`):
             Number of channels of LightAdapterResnetBlock's input and output.
     """