Skip to content

Commit 59b35d7

Browse files
committed
Fix-3
Signed-off-by: Amit Raj <[email protected]>
1 parent 2779920 commit 59b35d7

File tree

9 files changed

+234
-140
lines changed

9 files changed

+234
-140
lines changed

QEfficient/diffusers/models/attention_processor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def __call__(
123123
hidden_states = torch.bmm(attention_probs, value)
124124
else: # self-attention, use blocked attention
125125
# QKV done with block-attention (a la FlashAttentionV2)
126-
print(f"{query.shape = }, {key.shape = }, {value.shape = }")
127126
query_block_size = self.query_block_size
128127
query_seq_len = query.size(-2)
129128
num_blocks = (query_seq_len + query_block_size - 1) // query_block_size

QEfficient/diffusers/models/pytorch_transforms.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
class CustomOpsTransform(ModuleMappingTransform):
2424
_module_mapping = {RMSNorm: CustomRMSNormAIC}
2525

26+
@classmethod
27+
def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
28+
model, transformed = super().apply(model)
29+
return model, transformed
30+
2631

2732
class AttentionTransform(ModuleMappingTransform):
2833
_module_mapping = {

QEfficient/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion3.py

Lines changed: 145 additions & 122 deletions
Large diffs are not rendered by default.

QEfficient/transformers/models/modeling_auto.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@
1414
import numpy as np
1515
import torch
1616
import torch.nn as nn
17+
from transformers import (
18+
AutoModel,
19+
AutoModelForCausalLM,
20+
AutoModelForImageTextToText,
21+
AutoModelForSpeechSeq2Seq,
22+
PreTrainedTokenizer,
23+
PreTrainedTokenizerFast,
24+
TextStreamer,
25+
)
1726

1827
import QEfficient
1928
from QEfficient.base.modeling_qeff import QEFFBaseModel
@@ -49,15 +58,6 @@
4958
)
5059
from QEfficient.utils.cache import to_hashable
5160
from QEfficient.utils.logging_utils import logger
52-
from transformers import (
53-
AutoModel,
54-
AutoModelForCausalLM,
55-
AutoModelForImageTextToText,
56-
AutoModelForSpeechSeq2Seq,
57-
PreTrainedTokenizer,
58-
PreTrainedTokenizerFast,
59-
TextStreamer,
60-
)
6161

6262

6363
class QEFFTransformersBase(QEFFBaseModel):

examples/diffusers/__init__.py

Whitespace-only changes.

examples/diffusers/stable_diffusion_3/__init__.py

Whitespace-only changes.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from QEfficient import QEFFStableDiffusion3Pipeline
2+
3+
pipeline = QEFFStableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large")
4+
pipeline.compile(num_devices_text_encoder=1, num_devices_transformer=4, num_devices_vae_decoder=1)
5+
image = pipeline("A girl laughing", num_inference_steps=1, guidance_scale=0.0).images[0]
6+
image.save("new_testing.png")
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"Compile": {
3+
4+
"text_encoder_compile_config": {
5+
"num_devices": 1,
6+
"num_coresr": 16,
7+
"mxfp6_matmul": false,
8+
"batch_size": 1,
9+
"onnx_path": null,
10+
"compile_dir": null
11+
},
12+
13+
"text_encoder_compile_config_2": {
14+
"num_devices": 1,
15+
"num_cores": 16,
16+
"mxfp6_matmul": false,
17+
"batch_size": 1,
18+
"onnx_path": null,
19+
"compile_dir": null
20+
},
21+
22+
"text_encoder_compile_config_3": {
23+
"num_devices": 1,
24+
"num_cores": 16,
25+
"mxfp6_matmul": false,
26+
"batch_size": 1,
27+
"onnx_path": null,
28+
"compile_dir": null
29+
},
30+
31+
"transformer_config": {
32+
"num_devices": 4,
33+
"num_cores": 16,
34+
"mxfp6_matmul": false,
35+
"fp16": true,
36+
"batch_size": 1,
37+
"onnx_path": null,
38+
"compile_dir": null
39+
},
40+
41+
"vae_compile_config": {
42+
"num_devices": 1,
43+
"num_cores": 16,
44+
"mxfp6_matmul": false,
45+
"batch_size": 1,
46+
"onnx_path": null,
47+
"compile_dir": null
48+
}
49+
},
50+
51+
52+
"generate":{
53+
"text_encoder_generate_config":{
54+
"device_ids":[0]
55+
},
56+
"text_encoder_generate_config_2":{
57+
"device_ids":[1]
58+
},
59+
"text_encoder_generate_config_3":{
60+
"device_ids":[2]
61+
},
62+
"transformer_generate_config":{
63+
"device_ids":[4,5,6,7]
64+
},
65+
"vae_generate_config":{
66+
"device_ids":[3]
67+
}
68+
}
69+
}

s3_testing.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

0 commit comments

Comments
 (0)