Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/prompt management #109

Merged
merged 19 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions autointent/generation/utterances/basic/chat_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""Chat template for evolution augmentation via abstractization."""

import random
from abc import ABC, abstractmethod
from typing import ClassVar

from autointent import Dataset
from autointent.generation.utterances.schemas import Message, Role
from autointent.schemas import Intent


class BaseSynthesizer(ABC):
"""Base class."""

@abstractmethod
def __call__(self, intent_data: Intent, n_examples: int) -> list[Message]:
"""Generate examples for this intent."""


class SynthesizerChatTemplate(BaseSynthesizer):
"""Chat template for generating additional examples for a given intent class."""

_messages: ClassVar[list[Message]] = [
Message(
role=Role.USER,
content=(
"You will be provided with a set of example utterances and the name "
"of the common topic (intent name) of these utterances. "
"Your task is to generate more examples that fit within the same intent name.\n\n"
"Note:\n"
"- You can generate similar utterances with only slot values changed\n"
"- You can generate completely different utterance from the same intent name\n"
"- Intent name can be missed, then you should infer from example utterances only\n"
"- Example utterances can be missed, then you should infer from intent name only\n"
"{extra_instructions}\n\n"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"{extra_instructions}\n\n"
"{extra_instructions}\n\n"

Кажется тут часть темлейта

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

не, это норм, в конструкторе эта часть заполняется

"Intent name: ordering_pizza\n\n"
"Example Utterances:\n"
"1. I want to order a large pepperoni pizza.\n"
"2. Can I get a medium cheese pizza with extra olives?\n"
"3. Please deliver a small veggie pizza to my address.\n\n"
"Please generate 3 more examples for the provided intent name."
),
),
Message(
role=Role.ASSISTANT,
content=(
"1. I'd like to order a large margherita pizza.\n"
"2. Can you deliver a medium Hawaiian pizza with extra pineapple?\n"
"3. Please send a small BBQ chicken pizza to my home."
),
),
Message(
role=Role.USER,
content=(
"Intent name: booking a hotel\n\n"
"Example Utterances:\n"
"1. I need to book a room for two nights in New York.\n\n"
"Please generate 2 more examples for the provided intent name."
),
),
Message(
role=Role.ASSISTANT,
content=(
"1. Can you reserve a deluxe room for my trip to Tokyo?\n"
"2. I need to book a hotel room with a mountain view in Denver."
),
),
Message(
role=Role.USER,
content=(
"Intent name:\n\n"
"Example Utterances:\n"
"1. What is the weather like today?\n\n"
"Please generate 2 more examples for the provided intent class."
),
),
Message(
role=Role.ASSISTANT,
content=("1. Can you tell me the forecast for tomorrow?\n" "2. Is it going to rain this weekend?"),
),
Message(
role=Role.USER,
content=(
"Intent name: Scheduling a Meeting\n\n"
"Example Utterances:\n\n"
"Please generate 3 more examples for the provided intent class."
),
),
Message(
role=Role.ASSISTANT,
content=(
"1. I need to schedule a meeting for next Tuesday.\n"
"2. Can you set up a conference call for tomorrow afternoon?\n"
"3. Please arrange a meeting with the marketing team next week."
),
),
]

def __init__(
self,
dataset: Dataset,
split: str,
extra_instructions: str | None = None,
max_sample_utterances: int | None = None,
) -> None:
"""Initialize."""
if extra_instructions is None:
extra_instructions = ""

msg = self._messages[0]
msg["content"] = msg["content"].format(extra_instructions=extra_instructions)

self.dataset = dataset
self.split = split
self.max_sample_utterances = max_sample_utterances

def __call__(self, intent_data: Intent, n_examples: int) -> list[Message]:
"""Generate additional examples for the provided intent class."""
filtered_split = self.dataset[self.split].filter(lambda sample: sample[Dataset.label_feature] == intent_data.id)
sample_utterances = filtered_split[Dataset.utterance_feature]
if self.max_sample_utterances is not None:
sample_utterances = random.sample(sample_utterances, k=self.max_sample_utterances)
return [
*self._messages,
Message(
role=Role.USER,
content=f"Intent name: {intent_data.name}\n\n"
f"Example Utterances:\n{sample_utterances}\n\n"
f"Please generate {n_examples} more examples for the provided intent class.\n",
),
]
119 changes: 0 additions & 119 deletions autointent/generation/utterances/basic/chat_template.yaml

This file was deleted.

35 changes: 6 additions & 29 deletions autointent/generation/utterances/basic/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from argparse import ArgumentParser

from autointent import load_dataset
from autointent.generation.utterances.basic.utterance_generator import LengthType, StyleType, UtteranceGenerator
from autointent.generation.utterances.basic.utterance_generator import UtteranceGenerator
from autointent.generation.utterances.generator import Generator

from .chat_template import SynthesizerChatTemplate


def main() -> None:
"""ClI endpoint."""
Expand Down Expand Up @@ -41,37 +43,12 @@ def main() -> None:
default=5,
help="Number of utterances to use as an example for augmentation",
)
parser.add_argument(
"--custom-instruction",
type=str,
action="append",
help="Add extra instructions to default prompt."
"You can use this argument multiple times to add multiple instructions",
)
parser.add_argument(
"--length",
choices=LengthType.__args__, # type: ignore[attr-defined]
default="none",
help="How to extend the prompt with length instruction",
)
parser.add_argument(
"--style",
choices=StyleType.__args__, # type: ignore[attr-defined]
default="none",
help="How to extend the prompt with style instruction",
)
parser.add_argument(
"--same-punctuation",
action="store_true",
help="Whether to extend the prompt with punctuation instruction",
)
args = parser.parse_args()

dataset = load_dataset(args.input_path)
generator = UtteranceGenerator(
Generator(), args.custom_instruction or [], args.length, args.style, args.same_punctuation
)
generator.augment(dataset, n_generations=args.n_generations, max_sample_utterances=args.n_sample_utterances)
template = SynthesizerChatTemplate(dataset, "train", max_sample_utterances=args.n_sample_utterances)
generator = UtteranceGenerator(Generator(), template)
generator.augment(dataset, n_generations=args.n_generations)

dataset.to_json(args.output_path)

Expand Down
14 changes: 0 additions & 14 deletions autointent/generation/utterances/basic/extra_instructions.json

This file was deleted.

Loading