Skip to content

Add support for "benchmarking scenarios" #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ cython_debug/
*.json
*.yaml

# But not scenarios
!src/guidellm/benchmark/scenarios/*.json
!src/guidellm/benchmark/scenarios/*.yaml

# UI Section - Next.js/React application under src/ui/
# dependencies
Expand Down
155 changes: 90 additions & 65 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,55 @@
import asyncio
import codecs
import json
from pathlib import Path
from typing import get_args

import click
from pydantic import ValidationError

from guidellm.backend import BackendType
from guidellm.benchmark import ProfileType, benchmark_generative_text
from guidellm.benchmark import ProfileType
from guidellm.benchmark.entrypoints import benchmark_with_scenario
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
from guidellm.config import print_config
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
from guidellm.scheduler import StrategyType
from guidellm.utils import cli as cli_tools

STRATEGY_PROFILE_CHOICES = set(
list(get_args(ProfileType)) + list(get_args(StrategyType))
)


def parse_json(ctx, param, value): # noqa: ARG001
if value is None:
return None
try:
return json.loads(value)
except json.JSONDecodeError as err:
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err


def parse_number_str(ctx, param, value): # noqa: ARG001
if value is None:
return None

values = value.split(",") if "," in value else [value]

try:
return [float(val) for val in values]
except ValueError as err:
raise click.BadParameter(
f"{param.name} must be a number or comma-separated list of numbers."
) from err


@click.group()
def cli():
pass


@cli.command(
help="Run a benchmark against a generative model using the specified arguments."
help="Run a benchmark against a generative model using the specified arguments.",
context_settings={"auto_envvar_prefix": "GUIDELLM"},
)
@click.option(
"--scenario",
type=cli_tools.Union(
click.Path(
exists=True,
readable=True,
file_okay=True,
dir_okay=False,
path_type=Path, # type: ignore[type-var]
),
click.Choice(get_builtin_scenarios()),
),
default=None,
help=(
"The name of a builtin scenario or path to a config file. "
"Missing values from the config will use defaults. "
"Options specified on the commandline will override the scenario."
),
)
@click.option(
"--target",
required=True,
type=str,
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
)
Expand All @@ -61,20 +60,20 @@ def cli():
"The type of backend to use to run requests against. Defaults to 'openai_http'."
f" Supported types: {', '.join(get_args(BackendType))}"
),
default="openai_http",
default=GenerativeTextScenario.get_default("backend_type"),
)
@click.option(
"--backend-args",
callback=parse_json,
default=None,
callback=cli_tools.parse_json,
default=GenerativeTextScenario.get_default("backend_args"),
help=(
"A JSON string containing any arguments to pass to the backend as a "
"dict with **kwargs."
),
)
@click.option(
"--model",
default=None,
default=GenerativeTextScenario.get_default("model"),
type=str,
help=(
"The ID of the model to benchmark within the backend. "
Expand All @@ -83,7 +82,7 @@ def cli():
)
@click.option(
"--processor",
default=None,
default=GenerativeTextScenario.get_default("processor"),
type=str,
help=(
"The processor or tokenizer to use to calculate token counts for statistics "
Expand All @@ -93,16 +92,15 @@ def cli():
)
@click.option(
"--processor-args",
default=None,
callback=parse_json,
default=GenerativeTextScenario.get_default("processor_args"),
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the processor constructor "
"as a dict with **kwargs."
),
)
@click.option(
"--data",
required=True,
type=str,
help=(
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
Expand All @@ -112,15 +110,16 @@ def cli():
)
@click.option(
"--data-args",
callback=parse_json,
default=GenerativeTextScenario.get_default("data_args"),
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the dataset creation "
"as a dict with **kwargs."
),
)
@click.option(
"--data-sampler",
default=None,
default=GenerativeTextScenario.get_default("data_sampler"),
type=click.Choice(["random"]),
help=(
"The data sampler type to use. 'random' will add a random shuffle on the data. "
Expand All @@ -129,7 +128,6 @@ def cli():
)
@click.option(
"--rate-type",
required=True,
type=click.Choice(STRATEGY_PROFILE_CHOICES),
help=(
"The type of benchmark to run. "
Expand All @@ -138,8 +136,7 @@ def cli():
)
@click.option(
"--rate",
default=None,
callback=parse_number_str,
default=GenerativeTextScenario.get_default("rate"),
help=(
"The rates to run the benchmark at. "
"Can be a single number or a comma-separated list of numbers. "
Expand All @@ -152,6 +149,7 @@ def cli():
@click.option(
"--max-seconds",
type=float,
default=GenerativeTextScenario.get_default("max_seconds"),
help=(
"The maximum number of seconds each benchmark can run for. "
"If None, will run until max_requests or the data is exhausted."
Expand All @@ -160,6 +158,7 @@ def cli():
@click.option(
"--max-requests",
type=int,
default=GenerativeTextScenario.get_default("max_requests"),
help=(
"The maximum number of requests each benchmark can run for. "
"If None, will run until max_seconds or the data is exhausted."
Expand All @@ -168,7 +167,7 @@ def cli():
@click.option(
"--warmup-percent",
type=float,
default=None,
default=GenerativeTextScenario.get_default("warmup_percent"),
help=(
"The percent of the benchmark (based on max-seconds, max-requets, "
"or lenth of dataset) to run as a warmup and not include in the final results. "
Expand All @@ -178,6 +177,7 @@ def cli():
@click.option(
"--cooldown-percent",
type=float,
default=GenerativeTextScenario.get_default("cooldown_percent"),
help=(
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
"of dataset) to run as a cooldown and not include in the final results. "
Expand Down Expand Up @@ -212,7 +212,7 @@ def cli():
)
@click.option(
"--output-extras",
callback=parse_json,
callback=cli_tools.parse_json,
help="A JSON string of extra data to save with the output benchmarks",
)
@click.option(
Expand All @@ -222,15 +222,16 @@ def cli():
"The number of samples to save in the output file. "
"If None (default), will save all samples."
),
default=None,
default=GenerativeTextScenario.get_default("output_sampling"),
)
@click.option(
"--random-seed",
default=42,
default=GenerativeTextScenario.get_default("random_seed"),
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
def benchmark(
scenario,
target,
backend_type,
backend_args,
Expand All @@ -254,30 +255,53 @@ def benchmark(
output_sampling,
random_seed,
):
click_ctx = click.get_current_context()

overrides = cli_tools.set_if_not_default(
click_ctx,
target=target,
backend_type=backend_type,
backend_args=backend_args,
model=model,
processor=processor,
processor_args=processor_args,
data=data,
data_args=data_args,
data_sampler=data_sampler,
rate_type=rate_type,
rate=rate,
max_seconds=max_seconds,
max_requests=max_requests,
warmup_percent=warmup_percent,
cooldown_percent=cooldown_percent,
output_sampling=output_sampling,
random_seed=random_seed,
)

try:
# If a scenario file was specified read from it
if scenario is None:
_scenario = GenerativeTextScenario.model_validate(overrides)
elif isinstance(scenario, Path):
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
else: # Only builtins can make it here; click will catch anything else
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
except ValidationError as e:
# Translate pydantic valdation error to click argument error
errs = e.errors(include_url=False, include_context=True, include_input=True)
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
raise click.BadParameter(
errs[0]["msg"], ctx=click_ctx, param_hint=param_name
) from e

asyncio.run(
benchmark_generative_text(
target=target,
backend_type=backend_type,
backend_args=backend_args,
model=model,
processor=processor,
processor_args=processor_args,
data=data,
data_args=data_args,
data_sampler=data_sampler,
rate_type=rate_type,
rate=rate,
max_seconds=max_seconds,
max_requests=max_requests,
warmup_percent=warmup_percent,
cooldown_percent=cooldown_percent,
benchmark_with_scenario(
scenario=_scenario,
show_progress=not disable_progress,
show_progress_scheduler_stats=display_scheduler_stats,
output_console=not disable_console_outputs,
output_path=output_path,
output_extras=output_extras,
output_sampling=output_sampling,
random_seed=random_seed,
)
)

Expand Down Expand Up @@ -316,7 +340,8 @@ def preprocess():
"Convert a dataset to have specific prompt and output token sizes.\n"
"DATA: Path to the input dataset or dataset ID.\n"
"OUTPUT_PATH: Path to save the converted dataset, including file suffix."
)
),
context_settings={"auto_envvar_prefix": "GUIDELLM"},
)
@click.argument(
"data",
Expand All @@ -340,15 +365,15 @@ def preprocess():
@click.option(
"--processor-args",
default=None,
callback=parse_json,
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the processor constructor "
"as a dict with **kwargs."
),
)
@click.option(
"--data-args",
callback=parse_json,
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the dataset creation "
"as a dict with **kwargs."
Expand Down
18 changes: 15 additions & 3 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,22 @@
)
from guidellm.benchmark.profile import ProfileType, create_profile
from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
from guidellm.request import GenerativeRequestLoader
from guidellm.scheduler import StrategyType


async def benchmark_with_scenario(scenario: Scenario, **kwargs):
"""
Run a benchmark using a scenario and specify any extra arguments
"""

if isinstance(scenario, GenerativeTextScenario):
return await benchmark_generative_text(**vars(scenario), **kwargs)
else:
raise ValueError(f"Unsupported Scenario type {type(scenario)}")


async def benchmark_generative_text(
target: str,
backend_type: BackendType,
Expand All @@ -43,13 +55,13 @@ async def benchmark_generative_text(
max_requests: Optional[int],
warmup_percent: Optional[float],
cooldown_percent: Optional[float],
show_progress: bool,
show_progress_scheduler_stats: bool,
output_console: bool,
output_path: Optional[Union[str, Path]],
output_extras: Optional[dict[str, Any]],
output_sampling: Optional[int],
random_seed: int,
show_progress: bool = True,
show_progress_scheduler_stats: bool = False,
output_console: bool = True,
) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
console = GenerativeBenchmarksConsole(enabled=show_progress)
console.print_line("Creating backend...")
Expand Down
Loading