diff --git a/src/diffusers/models/adapter.py b/src/diffusers/models/adapter.py index 0f4b2ec03371..677a991f055e 100644 --- a/src/diffusers/models/adapter.py +++ b/src/diffusers/models/adapter.py @@ -30,10 +30,10 @@ class MultiAdapter(ModelMixin): MultiAdapter is a wrapper model that contains multiple adapter models and merges their outputs according to user-assigned weighting. - This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library - implements for all the model (such as downloading or saving, etc.) + This model inherits from [`ModelMixin`]. Check the superclass documentation for common methods such as downloading + or saving. - Parameters: + Args: adapters (`List[T2IAdapter]`, *optional*, defaults to None): A list of `T2IAdapter` model instances. """ @@ -77,11 +77,13 @@ def forward(self, xs: torch.Tensor, adapter_weights: Optional[List[float]] = Non r""" Args: xs (`torch.Tensor`): - (batch, channel, height, width) input images for multiple adapter models concated along dimension 1, - `channel` should equal to `num_adapter` * "number of channel of image". + A tensor of shape (batch, channel, height, width) representing input images for multiple adapter + models, concatenated along dimension 1(channel dimension). The `channel` dimension should be equal to + `num_adapter` * number of channel per image. + adapter_weights (`List[float]`, *optional*, defaults to None): - List of floats representing the weight which will be multiply to each adapter's output before adding - them together. + A list of floats representing the weights which will be multiplied by each adapter's output before + summing them together. If `None`, equal weights will be used for all adapters. """ if adapter_weights is None: adapter_weights = torch.tensor([1 / self.num_adapter] * self.num_adapter) @@ -109,24 +111,24 @@ def save_pretrained( variant: Optional[str] = None, ): """ - Save a model and its configuration file to a directory, so that it can be re-loaded using the + Save a model and its configuration file to a specified directory, allowing it to be re-loaded with the `[`~models.adapter.MultiAdapter.from_pretrained`]` class method. - Arguments: + Args: save_directory (`str` or `os.PathLike`): - Directory to which to save. Will be created if it doesn't exist. - is_main_process (`bool`, *optional*, defaults to `True`): - Whether the process calling this is the main process or not. Useful when in distributed training like - TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on - the main process to avoid race conditions. + The directory where the model will be saved. If the directory does not exist, it will be created. + is_main_process (`bool`, optional, defaults=True): + Indicates whether current process is the main process or not. Useful for distributed training (e.g., + TPUs) and need to call this function on all processes. In this case, set `is_main_process=True` only + for the main process to avoid race conditions. save_function (`Callable`): - The function to use to save the state dictionary. Useful on distributed training like TPUs when one - need to replace `torch.save` by another method. Can be configured with the environment variable - `DIFFUSERS_SAVE_MODE`. - safe_serialization (`bool`, *optional*, defaults to `True`): - Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + Function used to save the state dictionary. Useful for distributed training (e.g., TPUs) to replace + `torch.save` with another method. Can also be configured using`DIFFUSERS_SAVE_MODE` environment + variable. + safe_serialization (`bool`, optional, defaults=True): + If `True`, save the model using `safetensors`. If `False`, save the model with `pickle`. variant (`str`, *optional*): - If specified, weights are saved in the format pytorch_model..bin. + If specified, weights are saved in the format `pytorch_model..bin`. """ idx = 0 model_path_to_save = save_directory @@ -145,19 +147,17 @@ def save_pretrained( @classmethod def from_pretrained(cls, pretrained_model_path: Optional[Union[str, os.PathLike]], **kwargs): r""" - Instantiate a pretrained MultiAdapter model from multiple pre-trained adapter models. + Instantiate a pretrained `MultiAdapter` model from multiple pre-trained adapter models. The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train - the model, you should first set it back in training mode with `model.train()`. + the model, set it back to training mode using `model.train()`. - The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come - pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning - task. + Warnings: + *Weights from XXX not initialized from pretrained model* means that the weights of XXX are not pretrained + with the rest of the model. It is up to you to train those weights with a downstream fine-tuning. *Weights + from XXX not used in YYY* means that the layer XXX is not used by YYY, so those weights are discarded. - The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those - weights are discarded. - - Parameters: + Args: pretrained_model_path (`os.PathLike`): A path to a *directory* containing model weights saved using [`~diffusers.models.adapter.MultiAdapter.save_pretrained`], e.g., `./my_model_directory/adapter`. @@ -175,20 +175,20 @@ def from_pretrained(cls, pretrained_model_path: Optional[Union[str, os.PathLike] more information about each option see [designing a device map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). max_memory (`Dict`, *optional*): - A dictionary device identifier to maximum memory. Will default to the maximum memory available for each - GPU and the available CPU RAM if unset. + A dictionary mapping device identifiers to their maximum memory. Default to the maximum memory + available for each GPU and the available CPU RAM if unset. low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): Speed up model loading by not initializing the weights and only loading the pre-trained weights. This also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, setting this argument to `True` will raise an error. variant (`str`, *optional*): - If specified load weights from `variant` filename, *e.g.* pytorch_model..bin. `variant` is - ignored when using `from_flax`. + If specified, load weights from a `variant` file (*e.g.* pytorch_model..bin). `variant` will + be ignored when using `from_flax`. use_safetensors (`bool`, *optional*, defaults to `None`): - If set to `None`, the `safetensors` weights will be downloaded if they're available **and** if the - `safetensors` library is installed. If set to `True`, the model will be forcibly loaded from - `safetensors` weights. If set to `False`, loading will *not* use `safetensors`. + If `None`, the `safetensors` weights will be downloaded if available **and** if`safetensors` library is + installed. If `True`, the model will be forcibly loaded from`safetensors` weights. If `False`, + `safetensors` is not used. """ idx = 0 adapters = [] @@ -223,22 +223,22 @@ class T2IAdapter(ModelMixin, ConfigMixin): and [AdapterLight](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L235). - This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library - implements for all the model (such as downloading or saving, etc.) + This model inherits from [`ModelMixin`]. Check the superclass documentation for the common methods, such as + downloading or saving. - Parameters: - in_channels (`int`, *optional*, defaults to 3): - Number of channels of Aapter's input(*control image*). Set this parameter to 1 if you're using gray scale - image as *control image*. + Args: + in_channels (`int`, *optional*, defaults to `3`): + The number of channels in the adapter's input (*control image*). Set it to 1 if you're using a gray scale + image. channels (`List[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`): - The number of channel of each downsample block's output hidden state. The `len(block_out_channels)` will - also determine the number of downsample blocks in the Adapter. - num_res_blocks (`int`, *optional*, defaults to 2): + The number of channels in each downsample block's output hidden state. The `len(block_out_channels)` + determines the number of downsample blocks in the adapter. + num_res_blocks (`int`, *optional*, defaults to `2`): Number of ResNet blocks in each downsample block. - downscale_factor (`int`, *optional*, defaults to 8): + downscale_factor (`int`, *optional*, defaults to `8`): A factor that determines the total downscale factor of the Adapter. adapter_type (`str`, *optional*, defaults to `full_adapter`): - The type of Adapter to use. Choose either `full_adapter` or `full_adapter_xl` or `light_adapter`. + Adapter type (`full_adapter` or `full_adapter_xl` or `light_adapter`) to use. """ @register_to_config @@ -393,7 +393,7 @@ class AdapterBlock(nn.Module): An AdapterBlock is a helper model that contains multiple ResNet-like blocks. It is used in the `FullAdapter` and `FullAdapterXL` models. - Parameters: + Args: in_channels (`int`): Number of channels of AdapterBlock's input. out_channels (`int`): @@ -401,7 +401,7 @@ class AdapterBlock(nn.Module): num_res_blocks (`int`): Number of ResNet blocks in the AdapterBlock. down (`bool`, *optional*, defaults to `False`): - Whether to perform downsampling on AdapterBlock's input. + If `True`, perform downsampling on AdapterBlock's input. """ def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False): @@ -440,7 +440,7 @@ class AdapterResnetBlock(nn.Module): r""" An `AdapterResnetBlock` is a helper model that implements a ResNet-like block. - Parameters: + Args: channels (`int`): Number of channels of AdapterResnetBlock's input and output. """ @@ -518,7 +518,7 @@ class LightAdapterBlock(nn.Module): A `LightAdapterBlock` is a helper model that contains multiple `LightAdapterResnetBlocks`. It is used in the `LightAdapter` model. - Parameters: + Args: in_channels (`int`): Number of channels of LightAdapterBlock's input. out_channels (`int`): @@ -526,7 +526,7 @@ class LightAdapterBlock(nn.Module): num_res_blocks (`int`): Number of LightAdapterResnetBlocks in the LightAdapterBlock. down (`bool`, *optional*, defaults to `False`): - Whether to perform downsampling on LightAdapterBlock's input. + If `True`, perform downsampling on LightAdapterBlock's input. """ def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False): @@ -561,7 +561,7 @@ class LightAdapterResnetBlock(nn.Module): A `LightAdapterResnetBlock` is a helper model that implements a ResNet-like block with a slightly different architecture than `AdapterResnetBlock`. - Parameters: + Args: channels (`int`): Number of channels of LightAdapterResnetBlock's input and output. """