diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index a836d136ec96..ba2340789970 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -294,31 +294,27 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] - - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] - - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] - - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) + + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/chameleon/image_processing_chameleon.py b/src/transformers/models/chameleon/image_processing_chameleon.py index a23fdbed0288..46d081973bb4 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon.py +++ b/src/transformers/models/chameleon/image_processing_chameleon.py @@ -311,32 +311,26 @@ def preprocess( if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] - - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] - - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] - - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index b93bb81606a9..515c2de0cfc3 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -280,31 +280,26 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index bc545e08e20e..fa398821ca61 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -319,31 +319,26 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] - - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] - - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] - - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) + + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index d5dfb211e03c..bafb5f6e71ad 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -270,31 +270,26 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index 89a8f9e676e8..e7c3193ceab4 100644 --- a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -312,31 +312,26 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index f744b9fcf9c1..579e6d44c143 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -409,31 +409,26 @@ def _preprocess( """ images = make_list_of_images(images) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] - - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] - - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] - - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) + + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] return images diff --git a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py index e16e71875bb2..59d0d9d94472 100644 --- a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py @@ -272,31 +272,26 @@ def _preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] - - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] - - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] - - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) + + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] return images diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index 967d17929f82..7981947307fd 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -276,31 +276,26 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images} diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index 072295a4ff77..25d227bd582f 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -279,31 +279,26 @@ def preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) - if do_resize: - images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) - for image in images - ] - - if do_center_crop: - images = [ - self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images - ] - - if do_rescale: - images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) - for image in images - ] - - if do_normalize: - images = [ - self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format) - for image in images - ] + all_images = [] + for image in images: + if do_resize: + image = self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + if do_center_crop: + image = self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) + + if do_rescale: + image = self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + + if do_normalize: + image = self.normalize( + image=image, mean=image_mean, std=image_std, input_data_format=input_data_format + ) + + all_images.append(image) images = [ - to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images + to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) + for image in all_images ] data = {"pixel_values": images}