WanVACEPipeline: Passing a list of prompts is not yet supported. This may be supported in the future.

### Describe the bug

Is it that the WanVACEPipeline doesn't support prompt_embed?

```
Traceback (most recent call last):
  File "C:\aiOWN\diffuser_webui\WanVace_GGUF_BROKEN.py", line 122, in <module>
    result = pipe(
             ^^^^^
  File "C:\Users\nitin\miniconda3\envs\sddw-dev\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\nitin\miniconda3\envs\sddw-dev\Lib\site-packages\diffusers\pipelines\wan\pipeline_wan_vace.py", line 779, in __call__
    raise ValueError("Passing a list of prompts is not yet supported. This may be supported in the future.")
ValueError: Passing a list of prompts is not yet supported. This may be supported in the future.

```

### Reproduction

```python
from typing import List
import torch
import time
import gc
import PIL.Image
from diffusers import AutoencoderKLWan, WanVACEPipeline, WanVACETransformer3DModel
from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
from diffusers.utils import export_to_video, load_image, load_video
from diffusers import GGUFQuantizationConfig
from transformers import UMT5EncoderModel

def flush_memory(pipe=None, pipe_component=None):
    """Clean up GPU memory"""
    if pipe is not None:
        if hasattr(pipe, 'remove_all_hooks'):
            pipe.remove_all_hooks()
        if hasattr(pipe, '__dict__'):
            for attr_name, attr_value in list(pipe.__dict__.items()):
                if hasattr(attr_value, 'to'):
                    try:
                        attr_value.to("cpu")
                    except:
                        pass
                delattr(pipe, attr_name)
    del pipe
    pipe = None
    del pipe_component
    pipe_component = None
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    torch.cuda.synchronize()

def print_memory_usage(stage=""):
    """Print current GPU memory usage"""
    if stage:
        print(f"=== {stage} ===")
    print(f"CUDA allocated: {torch.cuda.memory_allocated() / 1e6:.1f} MB")
    print(f"CUDA reserved: {torch.cuda.memory_reserved() / 1e6:.1f} MB")
    print()

# Check if CUDA is available
if not torch.cuda.is_available():
    print("Warning: CUDA is not available. This will run very slowly on CPU.")

model_id = "Wan-AI/Wan2.1-VACE-1.3B-diffusers"

# Step 1: Load text encoder and encode prompts
print("Step 1: Loading text encoder and encoding prompts...")
text_encoder = UMT5EncoderModel.from_pretrained(
    model_id, 
    subfolder="text_encoder", 
    torch_dtype=torch.bfloat16
)

pipe = WanVACEPipeline.from_pretrained(
    model_id,
    text_encoder=text_encoder,
    transformer=None,
    vae=None, 
    torch_dtype=torch.bfloat16
)
pipe.enable_sequential_cpu_offload()

# Configure scheduler
flow_shift = 3.0
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)

# Define prompts
prompt = "A sleek, humanoid robot stands in a vast warehouse filled with neatly stacked cardboard boxes on industrial shelves. The robot's metallic body gleams under the bright, even lighting, highlighting its futuristic design and intricate joints. A glowing blue light emanates from its chest, adding a touch of advanced technology. The background is dominated by rows of boxes, suggesting a highly organized storage system. The floor is lined with wooden pallets, enhancing the industrial setting. The camera remains static, capturing the robot's poised stance amidst the orderly environment, with a shallow depth of field that keeps the focus on the robot while subtly blurring the background for a cinematic effect."
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"

# Encode prompts
with torch.no_grad():
    prompt_embeds, negative_prompt_embeds = pipe.encode_prompt(
        prompt=prompt,
        negative_prompt=negative_prompt,
    )

print("Text encoding complete!")
print(f"Prompt embeds shape: {prompt_embeds.shape if hasattr(prompt_embeds, 'shape') else type(prompt_embeds)}")
print(f"Negative prompt embeds shape: {negative_prompt_embeds.shape if hasattr(negative_prompt_embeds, 'shape') else type(negative_prompt_embeds)}")
print_memory_usage("After text encoding")

flush_memory(pipe, text_encoder)
time.sleep(1)
print_memory_usage("After cleanup")

# Step 2: Load transformer and generate latents
print("Step 2: Loading transformer and generating latents...")
transformer = WanVACETransformer3DModel.from_pretrained(
    model_id, 
    subfolder="transformer", 
    torch_dtype=torch.bfloat16
)

pipe = WanVACEPipeline.from_pretrained(
    model_id,
    text_encoder=None,
    tokenizer=None,
    transformer=transformer,
    vae=None, 
    torch_dtype=torch.bfloat16,
)
pipe.enable_sequential_cpu_offload()
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)

# Generate latents
print("Generating latents...")
print(f"Input prompt embeds shape: {prompt_embeds.shape}")
print(f"Input negative prompt embeds shape: {negative_prompt_embeds.shape}")

latents = pipe(
    prompt_embeds=prompt_embeds,
    negative_prompt_embeds=negative_prompt_embeds,
    width=832,
    height=480,
    num_frames=81,
    num_inference_steps=30,
    guidance_scale=5.0,
    conditioning_scale=0.0,
    generator=torch.Generator().manual_seed(0),
    output_type="latent"

print("Latent generation complete!")
print_memory_usage("After latent generation")

flush_memory(pipe, transformer)
time.sleep(1)
print_memory_usage("After transformer cleanup")

# Step 3: Load VAE and decode latents to video
print("Step 3: Loading VAE and decoding to video...")
vae = AutoencoderKLWan.from_pretrained(
    model_id, 
    subfolder="vae", 
    torch_dtype=torch.float32
)

pipe = WanVACEPipeline.from_pretrained(
    model_id,
    text_encoder=None,
    tokenizer=None,
    transformer=None,
    vae=vae, 
    torch_dtype=torch.bfloat16,
).to("cuda")


# Decode latents to video frames
print("Decoding latents to video frames...")
with torch.no_grad():
    video_frames = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
    
    # Convert to proper format for video export
    video_frames = (video_frames / 2 + 0.5).clamp(0, 1)
    video_frames = video_frames.cpu().float().numpy()
    
    # Convert from (batch, channel, time, height, width) to (time, height, width, channel)
    video_frames = video_frames.transpose(0, 2, 3, 4, 1)[0]
    video_frames = (video_frames * 255).astype('uint8')

print("Video decoding complete!")
print_memory_usage("After video decoding")

# Step 4: Export video
print("Step 4: Exporting video...")
output_path = "generated_robot_video.mp4"

try:
    # Use diffusers' export_to_video function
    export_to_video(video_frames, output_path, fps=8)
    print(f"Video successfully saved to: {output_path}")
except Exception as e:
    print(f"Error exporting video: {e}")

flush_memory(pipe, vae)
print_memory_usage("Final cleanup")
print("Video generation complete!", output_path)
```

### Logs

```shell

```

### System Info

Not required, using latest diffusers from source

### Who can help?

@a-r-r-o-w 

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

WanVACEPipeline: Passing a list of prompts is not yet supported. This may be supported in the future. #11821

Describe the bug

Reproduction

Logs

System Info

Who can help?

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

WanVACEPipeline: Passing a list of prompts is not yet supported. This may be supported in the future. #11821

Description

Describe the bug

Reproduction

Logs

System Info

Who can help?

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions