Replies: 2 comments
-
Another question is how do I save the resulting plot? |
Beta Was this translation helpful? Give feedback.
0 replies
-
Okay so with the following code, I think I now have a better handle: from diffusers import DiffusionPipeline
import torch
import triton
pipeline = DiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16,
safety_checker=None
)
pipeline = pipeline.to("cuda")
pipeline.set_progress_bar_config(disable=True)
def run_inference(num_inference_steps=25):
_ = pipeline("a picture of a cat", num_inference_steps=num_inference_steps)
pipeline.unet = torch.compile(pipeline.unet, mode="max-autotune", fullgraph=True)
pipeline.vae.decode = torch.compile(pipeline.vae.decode, mode="max-autotune", fullgraph=True)
def run_inference_with_compile(num_inference_steps=25):
_ = pipeline("a picture of a cat", num_inference_steps=num_inference_steps)
@triton.testing.perf_report(
triton.testing.Benchmark(
x_names=['Steps'], # argument names to use as an x-axis for the plot
x_vals=list(range(10, 60, 10)), # different possible values for `x_name`
line_arg='do_compile', # argument name whose value corresponds to a different line in the plot
line_vals=[
'no-compile',
'compiled',
], # possible values for `line_arg`
line_names=[
"Not Compiled",
"Compiled",
], # label name for the lines
styles=[('blue', '-'), ('green', '-'), ('green', '--')], # line styles
ylabel="Total Time", # label name for the y-axis
plot_name="torch.compile_performance", # name for the plot. Used also as a file name for saving the plot.
args={}
))
def benchmark(Steps, do_compile):
if do_compile == 'no-compile':
min = triton.testing.do_bench(lambda: run_inference(num_inference_steps=Steps))
if do_compile == 'compiled':
min = triton.testing.do_bench(lambda: run_inference_with_compile(num_inference_steps=Steps))
return min / 1e3
benchmark.run(show_plots=True, print_data=True, save_path=".") With the |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I love the benchmarking process as shown here: https://triton-lang.org/main/getting-started/tutorials/02-fused-softmax.html.
I was trying to come up with a benchmarking script to benchmark a diffusion pipeline with the utilities shown in that tutorial. My script is like so:
This is the result I am getting:
This seems odd as
torch.compile()
should improve the performance (which I have rested in isolation).For the
diffusers
dependency, installpip install diffusers accelerate transformers
.Is this the right way to do it?
Beta Was this translation helpful? Give feedback.
All reactions