From 481fd5591b4eb7368652fa02168fed2c7167fae6 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 02:02:24 +0300 Subject: [PATCH 01/38] qol: add attachments to cli interface --- dff/messengers/common/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dff/messengers/common/interface.py b/dff/messengers/common/interface.py index ce6a506f6..58d62ac04 100644 --- a/dff/messengers/common/interface.py +++ b/dff/messengers/common/interface.py @@ -167,7 +167,7 @@ def _request(self) -> List[Tuple[Message, Any]]: return [(Message(input(self._prompt_request)), self._ctx_id)] def _respond(self, responses: List[Context]): - print(f"{self._prompt_response}{responses[0].last_response.text}", file=self._descriptor) + print(f"{self._prompt_response}{responses[0].last_response.text}; attachments: {responses[0].last_response.attachments}", file=self._descriptor) async def connect(self, pipeline_runner: PipelineRunnerFunction, **kwargs): """ From 973abaa345b02769265b9bb55d248c4d7d153679 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 02:03:44 +0300 Subject: [PATCH 02/38] qol: make keywords upper case This allows using `Keywords.GLOBAL` and literal `"GLOBAL"` interchangeably. --- dff/script/core/keywords.py | 15 +++++++-------- dff/script/core/script.py | 12 ++++++------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dff/script/core/keywords.py b/dff/script/core/keywords.py index 805c10db5..8a573cd6f 100644 --- a/dff/script/core/keywords.py +++ b/dff/script/core/keywords.py @@ -80,14 +80,13 @@ class Keywords(str, Enum): """ - GLOBAL = "global" - LOCAL = "local" - TRANSITIONS = "transitions" - RESPONSE = "response" - MISC = "misc" - PRE_RESPONSE_PROCESSING = "pre_response_processing" - PRE_TRANSITIONS_PROCESSING = "pre_transitions_processing" - PROCESSING = "pre_transitions_processing" + GLOBAL = "GLOBAL" + LOCAL = "LOCAL" + TRANSITIONS = "TRANSITIONS" + RESPONSE = "RESPONSE" + MISC = "MISC" + PRE_RESPONSE_PROCESSING = "PRE_RESPONSE_PROCESSING" + PRE_TRANSITIONS_PROCESSING = "PRE_TRANSITIONS_PROCESSING" # Redefine shortcuts diff --git a/dff/script/core/script.py b/dff/script/core/script.py index 25c60cc5c..ccf326058 100644 --- a/dff/script/core/script.py +++ b/dff/script/core/script.py @@ -10,7 +10,7 @@ import logging from typing import Callable, Optional, Any, Dict, Union, TYPE_CHECKING -from pydantic import BaseModel, field_validator, validate_call +from pydantic import BaseModel, field_validator, validate_call, Field from .types import LabelType, NodeLabelType, ConditionType, NodeLabel3Type from .message import Message @@ -29,11 +29,11 @@ class Node(BaseModel, extra="forbid", validate_assignment=True): The class for the `Node` object. """ - transitions: Dict[NodeLabelType, ConditionType] = {} - response: Optional[Union[Message, Callable[[Context, Pipeline], Message]]] = None - pre_transitions_processing: Dict[Any, Callable] = {} - pre_response_processing: Dict[Any, Callable] = {} - misc: dict = {} + transitions: Dict[NodeLabelType, ConditionType] = Field(default_factory=dict, alias="TRANSITIONS") + response: Union[Message, Callable[[Context, Pipeline], Message]] = Field(default_factory=Message, alias="RESPONSE") + pre_transitions_processing: Dict[Any, Callable] = Field(default_factory=dict, alias="PRE_TRANSITIONS_PROCESSING") + pre_response_processing: Dict[Any, Callable] = Field(default_factory=dict, alias="PRE_RESPONSE_PROCESSING") + misc: dict = Field(default_factory=dict, alias="MISC") @field_validator("transitions", mode="before") @classmethod From 98692f96f19610e570612448e574a9cf8ac12480 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 02:04:46 +0300 Subject: [PATCH 03/38] qol: accept list of attachments as `Message.attachment` This commit will conflict with #328 and can be dropped. --- dff/script/core/message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dff/script/core/message.py b/dff/script/core/message.py index 9f530968b..1d3f859d2 100644 --- a/dff/script/core/message.py +++ b/dff/script/core/message.py @@ -192,7 +192,7 @@ class level variables to store message information. text: Optional[str] = None commands: Optional[List[Command]] = None - attachments: Optional[Attachments] = None + attachments: Optional[List[DataModel]] = None annotations: Optional[dict] = None misc: Optional[dict] = None # commands and state options are required for integration with services @@ -204,7 +204,7 @@ def __init__( self, text: Optional[str] = None, commands: Optional[List[Command]] = None, - attachments: Optional[Attachments] = None, + attachments: Optional[List[DataModel]] = None, annotations: Optional[dict] = None, misc: Optional[dict] = None, ): From a9bbbb9fe9a76389d4bbfcd0e9d513ef167561ef Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 02:07:26 +0300 Subject: [PATCH 04/38] qol: make choice function accept responses as args --- dff/script/responses/std_responses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dff/script/responses/std_responses.py b/dff/script/responses/std_responses.py index 5bd2d2576..d8c509369 100644 --- a/dff/script/responses/std_responses.py +++ b/dff/script/responses/std_responses.py @@ -15,7 +15,7 @@ from dff.script import Context, Message -def choice(responses: List[Message]): +def choice(*responses): """ Function wrapper that takes the list of responses as an input and returns handler which outputs a response randomly chosen from that list. From d25fd3086f2a12646dd41e5f9e9356114203750c Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 02:08:39 +0300 Subject: [PATCH 05/38] qol: import script objects inside `dff/__init__.py` This allows importing `Message` directly from `dff`, among other import improvements. --- dff/__init__.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dff/__init__.py b/dff/__init__.py index f3cfbd963..bd06ef21b 100644 --- a/dff/__init__.py +++ b/dff/__init__.py @@ -13,4 +13,22 @@ from dff.pipeline import Pipeline from dff.script import Context, Script +from dff.script.core.message import ( + Location, + Attachment, + Audio, + Video, + Image, + Document, + Attachments, + Link, + Button, + Keyboard, + Message, + MultiMessage +) +import dff.script.responses as rsp +import dff.script.labels as lbl +import dff.script.conditions as cnd + Script.model_rebuild() From e8fc8f6e41d5e50aa6638801067fcef2fec7c789 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 03:04:18 +0300 Subject: [PATCH 06/38] json import draft --- dff/pipeline/pipeline/pipeline.py | 38 ++++++ dff/pipeline/pipeline/script_parsing.py | 152 ++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 dff/pipeline/pipeline/script_parsing.py diff --git a/dff/pipeline/pipeline/pipeline.py b/dff/pipeline/pipeline/pipeline.py index fb548e9f5..1484f6d16 100644 --- a/dff/pipeline/pipeline/pipeline.py +++ b/dff/pipeline/pipeline/pipeline.py @@ -35,6 +35,7 @@ from ..types import PIPELINE_STATE_KEY from .utils import finalize_service_group, pretty_format_component_info_dict from dff.pipeline.pipeline.actor import Actor +from dff.pipeline.pipeline.script_parsing import JSONImporter, Path logger = logging.getLogger(__name__) @@ -274,6 +275,43 @@ def from_script( components=[*pre_services, ACTOR, *post_services], ) + @classmethod + def from_file( + cls, + file: Union[str, Path], + validation_stage: Optional[bool] = None, + condition_handler: Optional[Callable] = None, + verbose: bool = True, + parallelize_processing: bool = False, + handlers: Optional[Dict[ActorStage, List[Callable]]] = None, + context_storage: Optional[Union[DBContextStorage, Dict]] = None, + messenger_interface: Optional[MessengerInterface] = None, + pre_services: Optional[List[Union[ServiceBuilder, ServiceGroupBuilder]]] = None, + post_services: Optional[List[Union[ServiceBuilder, ServiceGroupBuilder]]] = None, + ): + pre_services = [] if pre_services is None else pre_services + post_services = [] if post_services is None else post_services + script = JSONImporter.from_file(file).import_script() + + def to_tuple(i): + if isinstance(i, list): + return tuple(i) + return i + params = {param: to_tuple(script["CONFIG"].get(param)) for param in ("start_label", "fallback_label", "label_priority")} + del script["CONFIG"] # todo: add support for CONFIG + return cls( + script=script, + **params, + validation_stage=validation_stage, + condition_handler=condition_handler, + verbose=verbose, + parallelize_processing=parallelize_processing, + handlers=handlers, + messenger_interface=messenger_interface, + context_storage=context_storage, + components=[*pre_services, ACTOR, *post_services], + ) + def set_actor( self, script: Union[Script, Dict], diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py new file mode 100644 index 000000000..be1de9772 --- /dev/null +++ b/dff/pipeline/pipeline/script_parsing.py @@ -0,0 +1,152 @@ +from typing import Union, Dict +import importlib +import logging +from pathlib import Path +import json + +from pydantic import JsonValue +import yaml +try: + from yaml import CLoader as Loader +except ImportError: + from yaml import Loader + + +logger = logging.getLogger(__name__) + + +class JSONImportError(Exception): + __notes__ = ["Please read the guide on YAML-formatted scripts: url here"] # todo: update placeholder string + + +class JSONImporter: + DFF_NAMESPACE_PREFIX = "dff." + CUSTOM_DIR_CONFIG_OPTION = "custom_dir" + TRANSITIONS_KEY = "TRANSITIONS" + CONFIG_KEY = "CONFIG" + TRANSITION_ITEM_KEYS = {"lbl", "cnd"} + + def __init__(self, script: Dict[str, JsonValue]): + self.script = script + config = self.script.get(self.CONFIG_KEY) + if config is None: + raise JSONImportError("config is not found") + self.config = config + custom_dir = config.get(self.CUSTOM_DIR_CONFIG_OPTION, "custom_dir") + if "." in custom_dir: + raise JSONImportError("custom dir cannot contain `.`") + if not Path(custom_dir).exists(): + raise JSONImportError(f"could not find directory {custom_dir}") + self.custom_dir_prefix = custom_dir + "." + + def resolve_target_object(self, obj: str): + obj_parts = obj.split(".") + if obj_parts[0] == self.DFF_NAMESPACE_PREFIX[:-1]: + module = importlib.import_module(obj_parts[0]) + new_obj = module + for part in obj_parts[1:]: + new_obj = new_obj.__getattribute__(part) + return new_obj + elif obj_parts[0] == self.custom_dir_prefix[:-1]: + module = importlib.import_module(".".join(obj_parts[:-1])) + return module.__getattribute__(obj_parts[-1]) + else: + raise RuntimeError(obj_parts) + + def import_script(self): + return self.replace_script_objects(self.script) + + def replace_obj(self, obj: JsonValue): + if not isinstance(obj, dict): + raise JSONImportError(f"obj {obj} has to be a dictionary") + keys = obj.keys() + if len(keys) != 1: + raise JSONImportError("obj has to have only 1 key") + key = keys.__iter__().__next__() + logger.debug(f"obj: {key}") + target_obj = self.resolve_target_object(key) + + if target_obj is None: + raise ImportError(f"Could not find object {key}") + + if not callable(target_obj): + raise JSONImportError(f"object `{key}` has to be callable") + + args = [] + kwargs = {} + if isinstance(obj[key], dict): + for k, v in obj[key].items(): + kwargs[k] = self.replace_script_objects(v) + elif isinstance(obj[key], list): + for item in obj[key]: + args.append(self.replace_script_objects(item)) + elif obj[key] is not None: + args.append(self.replace_script_objects(obj[key])) + + return target_obj(*args, **kwargs) + + def process_transitions(self, transition_list: list): + if not isinstance(transition_list, list): + raise JSONImportError(f"transitions value should be a list of dictionaries, not {transition_list}") + + transitions = {} + for item in transition_list: + if not isinstance(item, dict): + raise JSONImportError(f"transition items have to be dictionaries, not {item}") + if item.keys() != self.TRANSITION_ITEM_KEYS: + raise JSONImportError(f"transition items' keys have to be `lbl` and `cnd`, not {item.keys()}") + + lbl = self.replace_script_objects(item["lbl"]) + if isinstance(lbl, list): + lbl = tuple(lbl) + cnd = self.replace_script_objects(item["cnd"]) + + if isinstance(lbl, tuple) and lbl in transitions: + raise JSONImportError(f"label {lbl} already exists in {transitions}") + + transitions[lbl] = cnd + return transitions + + def replace_string_values(self, obj: JsonValue): + if not isinstance(obj, str): + raise JSONImportError(f"obj {obj} has to be a string") + if obj.startswith(self.DFF_NAMESPACE_PREFIX) or obj.startswith(self.custom_dir_prefix): + target_obj = self.resolve_target_object(obj) + + if target_obj is None: + raise JSONImportError(f"Could not find object {obj}") + + return target_obj + raise RuntimeError() + + def replace_script_objects(self, obj: JsonValue): + if isinstance(obj, dict): + keys = obj.keys() + if len(keys) == 1: + key = keys.__iter__().__next__() + if key.startswith(self.DFF_NAMESPACE_PREFIX) or key.startswith(self.custom_dir_prefix): + return self.replace_obj(obj) + + return {k: ( + self.replace_script_objects(v) if k != self.TRANSITIONS_KEY else self.process_transitions(v) + ) for k, v in obj.items()} + elif isinstance(obj, list): + return [self.replace_script_objects(item) for item in obj] + elif isinstance(obj, str): + if obj.startswith(self.DFF_NAMESPACE_PREFIX) or obj.startswith(self.custom_dir_prefix): + return self.replace_string_values(obj) + return obj + + @classmethod + def from_file(cls, file: Union[str, Path]): + if isinstance(file, str): + file = Path(file) + + if file.suffix == ".json": + with open(file, "r") as fd: + return cls(json.load(fd)) + elif file.suffix in (".yaml", ".yml"): + with open(file, "r") as fd: + return cls(yaml.load(fd, Loader=Loader)) + else: + raise JSONImportError("file should have a `.json`, `.yaml` or `.yml` extension") From dfd61b5ed406effea06e8da2805aa809984bc515 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 12 Feb 2024 03:04:37 +0300 Subject: [PATCH 07/38] add json import examples --- json_import_examples/README.md | 21 ++++++++++ json_import_examples/json/custom/__init__.py | 0 json_import_examples/json/custom/cnd.py | 2 + json_import_examples/json/pipeline.py | 12 ++++++ json_import_examples/json/script.json | 43 ++++++++++++++++++++ json_import_examples/yaml/custom/__init__.py | 0 json_import_examples/yaml/custom/cnd.py | 2 + json_import_examples/yaml/pipeline.py | 12 ++++++ json_import_examples/yaml/script.yaml | 19 +++++++++ 9 files changed, 111 insertions(+) create mode 100644 json_import_examples/README.md create mode 100644 json_import_examples/json/custom/__init__.py create mode 100644 json_import_examples/json/custom/cnd.py create mode 100644 json_import_examples/json/pipeline.py create mode 100644 json_import_examples/json/script.json create mode 100644 json_import_examples/yaml/custom/__init__.py create mode 100644 json_import_examples/yaml/custom/cnd.py create mode 100644 json_import_examples/yaml/pipeline.py create mode 100644 json_import_examples/yaml/script.yaml diff --git a/json_import_examples/README.md b/json_import_examples/README.md new file mode 100644 index 000000000..879116675 --- /dev/null +++ b/json_import_examples/README.md @@ -0,0 +1,21 @@ +# JSON Import examples + +This directory is temporary and will be removed. + +The purpose of this directory is to showcase JSON Import capabilities of the DFF Pipeline. + +Currently, it has two examples: +- JSON script +- YAML script + +Both scripts are equal and are very basic. + +In order to run them: +1. Install dff via `poetry install`. +2. Run pipeline via `python json_import_examples/yaml/pipeline.py`. + +This will run a CLI interface. For information on how to set up a web interface +for the DFF Pipeline see the corresponding tutorials: +https://deeppavlov.github.io/dialog_flow_framework/tutorials/index_interfaces.html#web-api + +Not Yet Implemented: an object containing a list of all object names from DFF. \ No newline at end of file diff --git a/json_import_examples/json/custom/__init__.py b/json_import_examples/json/custom/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/json_import_examples/json/custom/cnd.py b/json_import_examples/json/custom/cnd.py new file mode 100644 index 000000000..fb228857d --- /dev/null +++ b/json_import_examples/json/custom/cnd.py @@ -0,0 +1,2 @@ +def condition(ctx, pipeline): + return True diff --git a/json_import_examples/json/pipeline.py b/json_import_examples/json/pipeline.py new file mode 100644 index 000000000..3b1e39968 --- /dev/null +++ b/json_import_examples/json/pipeline.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from dff import Pipeline + + +SCRIPT_FILE = Path(__file__).parent / "script.json" + +pipeline = Pipeline.from_file(SCRIPT_FILE) + + +if __name__ == "__main__": + pipeline.run() diff --git a/json_import_examples/json/script.json b/json_import_examples/json/script.json new file mode 100644 index 000000000..a78436a15 --- /dev/null +++ b/json_import_examples/json/script.json @@ -0,0 +1,43 @@ +{ + "CONFIG": { + "custom_dir": "custom", + "start_label": [ + "flow", + "node" + ] + }, + "flow": { + "node": { + "RESPONSE": { + "dff.rsp.choice": [ + { + "dff.Message": { + "text": "Hi", + "attachments": [ + { + "dff.Link": { + "source": "https://github.com/deeppavlov/dialog_flow_framework", + "title": "DFF" + } + } + ] + } + }, + { + "dff.Message": "message" + } + ] + }, + "TRANSITIONS": [ + { + "lbl": [ + "flow", + "node", + 3 + ], + "cnd": "custom.cnd.condition" + } + ] + } + } +} diff --git a/json_import_examples/yaml/custom/__init__.py b/json_import_examples/yaml/custom/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/json_import_examples/yaml/custom/cnd.py b/json_import_examples/yaml/custom/cnd.py new file mode 100644 index 000000000..fb228857d --- /dev/null +++ b/json_import_examples/yaml/custom/cnd.py @@ -0,0 +1,2 @@ +def condition(ctx, pipeline): + return True diff --git a/json_import_examples/yaml/pipeline.py b/json_import_examples/yaml/pipeline.py new file mode 100644 index 000000000..af2267707 --- /dev/null +++ b/json_import_examples/yaml/pipeline.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from dff import Pipeline + + +SCRIPT_FILE = Path(__file__).parent / "script.yaml" + +pipeline = Pipeline.from_file(SCRIPT_FILE) + + +if __name__ == "__main__": + pipeline.run() diff --git a/json_import_examples/yaml/script.yaml b/json_import_examples/yaml/script.yaml new file mode 100644 index 000000000..cc869d811 --- /dev/null +++ b/json_import_examples/yaml/script.yaml @@ -0,0 +1,19 @@ +CONFIG: + custom_dir: custom + start_label: [flow, node] +flow: + node: + RESPONSE: + dff.rsp.choice: + - dff.Message: + text: "Hi" + attachments: + - dff.Link: + source: https://github.com/deeppavlov/dialog_flow_framework + title: DFF + - dff.Message: "message" + TRANSITIONS: + - lbl: + [flow, node, 3] + cnd: + custom.cnd.condition From 66b3bb054c5d3bc4f8194859c1a9836f567da5c5 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Tue, 20 Feb 2024 12:55:47 +0300 Subject: [PATCH 08/38] add submodule aliases --- dff/__init__.py | 18 +----------------- dff/cnd.py | 1 + dff/lbl.py | 1 + dff/msg.py | 14 ++++++++++++++ dff/rsp.py | 1 + 5 files changed, 18 insertions(+), 17 deletions(-) create mode 100644 dff/cnd.py create mode 100644 dff/lbl.py create mode 100644 dff/msg.py create mode 100644 dff/rsp.py diff --git a/dff/__init__.py b/dff/__init__.py index bd06ef21b..6f58f2a4a 100644 --- a/dff/__init__.py +++ b/dff/__init__.py @@ -13,22 +13,6 @@ from dff.pipeline import Pipeline from dff.script import Context, Script -from dff.script.core.message import ( - Location, - Attachment, - Audio, - Video, - Image, - Document, - Attachments, - Link, - Button, - Keyboard, - Message, - MultiMessage -) -import dff.script.responses as rsp -import dff.script.labels as lbl -import dff.script.conditions as cnd +from dff.msg import * Script.model_rebuild() diff --git a/dff/cnd.py b/dff/cnd.py new file mode 100644 index 000000000..c6cbbd966 --- /dev/null +++ b/dff/cnd.py @@ -0,0 +1 @@ +from dff.script.conditions import * diff --git a/dff/lbl.py b/dff/lbl.py new file mode 100644 index 000000000..e1ac05d88 --- /dev/null +++ b/dff/lbl.py @@ -0,0 +1 @@ +from dff.script.labels import * diff --git a/dff/msg.py b/dff/msg.py new file mode 100644 index 000000000..ee953a136 --- /dev/null +++ b/dff/msg.py @@ -0,0 +1,14 @@ +from dff.script.core.message import ( + Location, + Attachment, + Audio, + Video, + Image, + Document, + Attachments, + Link, + Button, + Keyboard, + Message, + MultiMessage +) diff --git a/dff/rsp.py b/dff/rsp.py new file mode 100644 index 000000000..04137084b --- /dev/null +++ b/dff/rsp.py @@ -0,0 +1 @@ +from dff.script.responses import * From 9637110168ad4729107291ed6072894babda5bee Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Tue, 20 Feb 2024 12:56:22 +0300 Subject: [PATCH 09/38] update resolve_target_object to use submodule aliases --- dff/pipeline/pipeline/script_parsing.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py index be1de9772..db475704a 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/dff/pipeline/pipeline/script_parsing.py @@ -39,19 +39,11 @@ def __init__(self, script: Dict[str, JsonValue]): raise JSONImportError(f"could not find directory {custom_dir}") self.custom_dir_prefix = custom_dir + "." - def resolve_target_object(self, obj: str): - obj_parts = obj.split(".") - if obj_parts[0] == self.DFF_NAMESPACE_PREFIX[:-1]: - module = importlib.import_module(obj_parts[0]) - new_obj = module - for part in obj_parts[1:]: - new_obj = new_obj.__getattribute__(part) - return new_obj - elif obj_parts[0] == self.custom_dir_prefix[:-1]: - module = importlib.import_module(".".join(obj_parts[:-1])) - return module.__getattribute__(obj_parts[-1]) - else: - raise RuntimeError(obj_parts) + @staticmethod + def resolve_target_object(obj: str): + module_name, object_name = obj.rsplit(".", maxsplit=1) + module = importlib.import_module(module_name) + return module.__getattribute__(object_name) def import_script(self): return self.replace_script_objects(self.script) From c4f04ba8bbe3271fbcebe5b0685fd55bbe513285 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Tue, 20 Feb 2024 12:56:41 +0300 Subject: [PATCH 10/38] add function to retrieve dff object mapping --- dff/pipeline/pipeline/script_parsing.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py index db475704a..cd59f990e 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/dff/pipeline/pipeline/script_parsing.py @@ -142,3 +142,20 @@ def from_file(cls, file: Union[str, Path]): return cls(yaml.load(fd, Loader=Loader)) else: raise JSONImportError("file should have a `.json`, `.yaml` or `.yml` extension") + + +def get_dff_objects(): + def get_objects_from_submodule(submodule_name: str, alias: Optional[str] = None): + module = importlib.import_module(submodule_name) + + return { + ".".join([alias or submodule_name, name]): obj + for name, obj in module.__dict__.items() if not name.startswith("_") and not ismodule(obj) + } + + return { + **get_objects_from_submodule("dff.cnd"), + **get_objects_from_submodule("dff.rsp"), + **get_objects_from_submodule("dff.lbl"), + **get_objects_from_submodule("dff.msg", "dff") + } From cfda4f7242eb04d0abd2c9cbd0651bc89a6e4793 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Thu, 22 Feb 2024 13:23:50 +0300 Subject: [PATCH 11/38] small fixes --- dff/pipeline/pipeline/script_parsing.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py index cd59f990e..6f9fe9077 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/dff/pipeline/pipeline/script_parsing.py @@ -1,8 +1,9 @@ -from typing import Union, Dict +from typing import Union, Dict, Optional import importlib import logging from pathlib import Path import json +from inspect import ismodule from pydantic import JsonValue import yaml @@ -29,8 +30,8 @@ class JSONImporter: def __init__(self, script: Dict[str, JsonValue]): self.script = script config = self.script.get(self.CONFIG_KEY) - if config is None: - raise JSONImportError("config is not found") + if not isinstance(config, dict): + raise JSONImportError("config is not found -- your script has to define a CONFIG dictionary") self.config = config custom_dir = config.get(self.CUSTOM_DIR_CONFIG_OPTION, "custom_dir") if "." in custom_dir: @@ -157,5 +158,5 @@ def get_objects_from_submodule(submodule_name: str, alias: Optional[str] = None) **get_objects_from_submodule("dff.cnd"), **get_objects_from_submodule("dff.rsp"), **get_objects_from_submodule("dff.lbl"), - **get_objects_from_submodule("dff.msg", "dff") + **get_objects_from_submodule("dff.msg", "dff"), } From 0e5f72a312eda0f8a303ef234c8e854bd275bd39 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Thu, 21 Mar 2024 02:03:26 +0300 Subject: [PATCH 12/38] capitalize messages --- dff/pipeline/pipeline/script_parsing.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py index 6f9fe9077..a78ae4a8c 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/dff/pipeline/pipeline/script_parsing.py @@ -31,7 +31,7 @@ def __init__(self, script: Dict[str, JsonValue]): self.script = script config = self.script.get(self.CONFIG_KEY) if not isinstance(config, dict): - raise JSONImportError("config is not found -- your script has to define a CONFIG dictionary") + raise JSONImportError("Config is not found -- your script has to define a CONFIG dictionary") self.config = config custom_dir = config.get(self.CUSTOM_DIR_CONFIG_OPTION, "custom_dir") if "." in custom_dir: @@ -51,19 +51,19 @@ def import_script(self): def replace_obj(self, obj: JsonValue): if not isinstance(obj, dict): - raise JSONImportError(f"obj {obj} has to be a dictionary") + raise JSONImportError(f"DFF object has to be a dictionary: {obj}") keys = obj.keys() if len(keys) != 1: - raise JSONImportError("obj has to have only 1 key") + raise JSONImportError(f"DFF object has to have only 1 key: {obj.keys()}") key = keys.__iter__().__next__() - logger.debug(f"obj: {key}") + logger.debug(f"Replacing object: {key}") target_obj = self.resolve_target_object(key) if target_obj is None: raise ImportError(f"Could not find object {key}") if not callable(target_obj): - raise JSONImportError(f"object `{key}` has to be callable") + raise JSONImportError(f"Object `{key}` has to be callable") args = [] kwargs = {} @@ -80,14 +80,14 @@ def replace_obj(self, obj: JsonValue): def process_transitions(self, transition_list: list): if not isinstance(transition_list, list): - raise JSONImportError(f"transitions value should be a list of dictionaries, not {transition_list}") + raise JSONImportError(f"Transitions value should be a list of dictionaries, not {transition_list}") transitions = {} for item in transition_list: if not isinstance(item, dict): - raise JSONImportError(f"transition items have to be dictionaries, not {item}") + raise JSONImportError(f"Transition items have to be dictionaries, not {item}") if item.keys() != self.TRANSITION_ITEM_KEYS: - raise JSONImportError(f"transition items' keys have to be `lbl` and `cnd`, not {item.keys()}") + raise JSONImportError(f"Transition items' keys have to be `lbl` and `cnd`, not {item.keys()}") lbl = self.replace_script_objects(item["lbl"]) if isinstance(lbl, list): @@ -95,14 +95,14 @@ def process_transitions(self, transition_list: list): cnd = self.replace_script_objects(item["cnd"]) if isinstance(lbl, tuple) and lbl in transitions: - raise JSONImportError(f"label {lbl} already exists in {transitions}") + raise JSONImportError(f"Label {lbl} already exists in {transitions}") transitions[lbl] = cnd return transitions def replace_string_values(self, obj: JsonValue): if not isinstance(obj, str): - raise JSONImportError(f"obj {obj} has to be a string") + raise JSONImportError(f"Obj {obj} has to be a string") if obj.startswith(self.DFF_NAMESPACE_PREFIX) or obj.startswith(self.custom_dir_prefix): target_obj = self.resolve_target_object(obj) From 56755b9ecac4facf16edcff573c1a5cc132df2f7 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Thu, 21 Mar 2024 02:05:10 +0300 Subject: [PATCH 13/38] improve import system for custom objects --- dff/pipeline/pipeline/pipeline.py | 2 +- dff/pipeline/pipeline/script_parsing.py | 95 ++++++++++++++++++------- 2 files changed, 69 insertions(+), 28 deletions(-) diff --git a/dff/pipeline/pipeline/pipeline.py b/dff/pipeline/pipeline/pipeline.py index 1484f6d16..6ddddeb94 100644 --- a/dff/pipeline/pipeline/pipeline.py +++ b/dff/pipeline/pipeline/pipeline.py @@ -291,7 +291,7 @@ def from_file( ): pre_services = [] if pre_services is None else pre_services post_services = [] if post_services is None else post_services - script = JSONImporter.from_file(file).import_script() + script = JSONImporter(file).import_script() def to_tuple(i): if isinstance(i, list): diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py index a78ae4a8c..db81bfc8f 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/dff/pipeline/pipeline/script_parsing.py @@ -1,5 +1,8 @@ -from typing import Union, Dict, Optional +from typing import Union, Optional, Sequence import importlib +import importlib.util +import importlib.machinery +import sys import logging from pathlib import Path import json @@ -27,24 +30,76 @@ class JSONImporter: CONFIG_KEY = "CONFIG" TRANSITION_ITEM_KEYS = {"lbl", "cnd"} - def __init__(self, script: Dict[str, JsonValue]): + def __init__(self, file: Union[str, Path]): + if isinstance(file, str): + file = Path(file) + + if file.suffix == ".json": + with open(file, "r") as fd: + script = json.load(fd) + elif file.suffix in (".yaml", ".yml"): + with open(file, "r") as fd: + script = yaml.load(fd, Loader=Loader) + else: + raise JSONImportError("File should have a `.json`, `.yaml` or `.yml` extension") + logger.info(f"Loaded file {file}") + self.script = script config = self.script.get(self.CONFIG_KEY) if not isinstance(config, dict): raise JSONImportError("Config is not found -- your script has to define a CONFIG dictionary") self.config = config + custom_dir = config.get(self.CUSTOM_DIR_CONFIG_OPTION, "custom_dir") - if "." in custom_dir: - raise JSONImportError("custom dir cannot contain `.`") - if not Path(custom_dir).exists(): - raise JSONImportError(f"could not find directory {custom_dir}") - self.custom_dir_prefix = custom_dir + "." - - @staticmethod - def resolve_target_object(obj: str): - module_name, object_name = obj.rsplit(".", maxsplit=1) - module = importlib.import_module(module_name) - return module.__getattribute__(object_name) + if not isinstance(custom_dir, str): + raise JSONImportError("CUSTOM_DIR must be a string") + custom_dir_path = Path(custom_dir) + if not custom_dir_path.is_absolute(): + custom_dir_path = (file.parent / custom_dir_path).resolve(strict=False) + + if not custom_dir_path.exists(): + raise JSONImportError(f"Could not find directory {custom_dir_path}. custom_dir: {custom_dir}") + + logger.info(f"CUSTOM_DIR set to {custom_dir_path}") + self.custom_dir_prefix = custom_dir_path.stem + "." + + self._custom_dir_location = str(custom_dir_path.parent) + self._custom_modules = {} + + def import_custom_module(self, module_name: str, paths: Optional[Sequence[str]] = None): + if module_name in self._custom_modules: + return self._custom_modules[module_name] + + if paths is None: + paths = [self._custom_dir_location] + + parent_name, _, child_name = module_name.rpartition(".") + + if parent_name: + parent_module = self.import_custom_module(parent_name, paths) + + paths = parent_module.__spec__.submodule_search_locations + + for finder in sys.meta_path: + spec = finder.find_spec(child_name, paths) + if spec is not None: + break + else: + raise ModuleNotFoundError(f"No module named {child_name!r} at {paths!r}") + + module = importlib.util.module_from_spec(spec) + self._custom_modules[module_name] = module + spec.loader.exec_module(module) + return module + + def resolve_target_object(self, obj: str): + module_name, _, obj_name = obj.rpartition(".") + + if obj.startswith(self.DFF_NAMESPACE_PREFIX): + module = importlib.import_module(module_name) + else: + module = self.import_custom_module(module_name) + return getattr(module, obj_name) def import_script(self): return self.replace_script_objects(self.script) @@ -130,20 +185,6 @@ def replace_script_objects(self, obj: JsonValue): return self.replace_string_values(obj) return obj - @classmethod - def from_file(cls, file: Union[str, Path]): - if isinstance(file, str): - file = Path(file) - - if file.suffix == ".json": - with open(file, "r") as fd: - return cls(json.load(fd)) - elif file.suffix in (".yaml", ".yml"): - with open(file, "r") as fd: - return cls(yaml.load(fd, Loader=Loader)) - else: - raise JSONImportError("file should have a `.json`, `.yaml` or `.yml` extension") - def get_dff_objects(): def get_objects_from_submodule(submodule_name: str, alias: Optional[str] = None): From 4b4cf69dd96e1ed21ec3bf9d5950c010616b24cc Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Fri, 22 Mar 2024 14:29:20 +0300 Subject: [PATCH 14/38] make custom_dir standard way to address custom dir --- dff/pipeline/pipeline/script_parsing.py | 46 +++++++++++++++++-------- json_import_examples/json/script.json | 2 +- json_import_examples/yaml/script.yaml | 2 +- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/dff/pipeline/pipeline/script_parsing.py b/dff/pipeline/pipeline/script_parsing.py index db81bfc8f..23119b99b 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/dff/pipeline/pipeline/script_parsing.py @@ -24,7 +24,8 @@ class JSONImportError(Exception): class JSONImporter: - DFF_NAMESPACE_PREFIX = "dff." + DFF_NAMESPACE_PREFIX = "dff" + CUSTOM_DIR_NAMESPACE_PREFIX = "custom_dir" CUSTOM_DIR_CONFIG_OPTION = "custom_dir" TRANSITIONS_KEY = "TRANSITIONS" CONFIG_KEY = "CONFIG" @@ -50,27 +51,37 @@ def __init__(self, file: Union[str, Path]): raise JSONImportError("Config is not found -- your script has to define a CONFIG dictionary") self.config = config - custom_dir = config.get(self.CUSTOM_DIR_CONFIG_OPTION, "custom_dir") - if not isinstance(custom_dir, str): - raise JSONImportError("CUSTOM_DIR must be a string") - custom_dir_path = Path(custom_dir) - if not custom_dir_path.is_absolute(): - custom_dir_path = (file.parent / custom_dir_path).resolve(strict=False) + custom_dir = config.get(self.CUSTOM_DIR_CONFIG_OPTION) + if custom_dir is not None: + if not isinstance(custom_dir, str): + raise JSONImportError("custom_dir must be a string") + custom_dir_path = Path(custom_dir) + if not custom_dir_path.is_absolute(): + custom_dir_path = (file.parent / custom_dir_path).resolve(strict=False) - if not custom_dir_path.exists(): - raise JSONImportError(f"Could not find directory {custom_dir_path}. custom_dir: {custom_dir}") + if not custom_dir_path.exists(): + raise JSONImportError(f"Could not find directory {custom_dir_path}. custom_dir: {custom_dir}") - logger.info(f"CUSTOM_DIR set to {custom_dir_path}") - self.custom_dir_prefix = custom_dir_path.stem + "." + logger.info(f"custom_dir set to {custom_dir_path}") - self._custom_dir_location = str(custom_dir_path.parent) + self._custom_dir_stem = str(custom_dir_path.stem) + self._custom_dir_location = str(custom_dir_path.parent) + else: + self._custom_dir_location = None self._custom_modules = {} + @staticmethod + def is_resolvable(value: str) -> bool: + return value.startswith(JSONImporter.DFF_NAMESPACE_PREFIX + ".") or\ + value.startswith(JSONImporter.CUSTOM_DIR_NAMESPACE_PREFIX + ".") + def import_custom_module(self, module_name: str, paths: Optional[Sequence[str]] = None): if module_name in self._custom_modules: return self._custom_modules[module_name] if paths is None: + if self._custom_dir_location is None: + raise JSONImportError("custom_dir option must be set in order to use objects from it") paths = [self._custom_dir_location] parent_name, _, child_name = module_name.rpartition(".") @@ -79,6 +90,11 @@ def import_custom_module(self, module_name: str, paths: Optional[Sequence[str]] parent_module = self.import_custom_module(parent_name, paths) paths = parent_module.__spec__.submodule_search_locations + else: + # root level import; replace `custom_dir` with actual module name + if child_name != self.CUSTOM_DIR_NAMESPACE_PREFIX: + raise RuntimeError(f"Trying to import from custom_dir while using wrong module_name: {child_name!r}") + child_name = self._custom_dir_stem for finder in sys.meta_path: spec = finder.find_spec(child_name, paths) @@ -158,7 +174,7 @@ def process_transitions(self, transition_list: list): def replace_string_values(self, obj: JsonValue): if not isinstance(obj, str): raise JSONImportError(f"Obj {obj} has to be a string") - if obj.startswith(self.DFF_NAMESPACE_PREFIX) or obj.startswith(self.custom_dir_prefix): + if self.is_resolvable(obj): target_obj = self.resolve_target_object(obj) if target_obj is None: @@ -172,7 +188,7 @@ def replace_script_objects(self, obj: JsonValue): keys = obj.keys() if len(keys) == 1: key = keys.__iter__().__next__() - if key.startswith(self.DFF_NAMESPACE_PREFIX) or key.startswith(self.custom_dir_prefix): + if self.is_resolvable(key): return self.replace_obj(obj) return {k: ( @@ -181,7 +197,7 @@ def replace_script_objects(self, obj: JsonValue): elif isinstance(obj, list): return [self.replace_script_objects(item) for item in obj] elif isinstance(obj, str): - if obj.startswith(self.DFF_NAMESPACE_PREFIX) or obj.startswith(self.custom_dir_prefix): + if self.is_resolvable(obj): return self.replace_string_values(obj) return obj diff --git a/json_import_examples/json/script.json b/json_import_examples/json/script.json index a78436a15..6eda7bf20 100644 --- a/json_import_examples/json/script.json +++ b/json_import_examples/json/script.json @@ -35,7 +35,7 @@ "node", 3 ], - "cnd": "custom.cnd.condition" + "cnd": "custom_dir.cnd.condition" } ] } diff --git a/json_import_examples/yaml/script.yaml b/json_import_examples/yaml/script.yaml index cc869d811..6f8978a78 100644 --- a/json_import_examples/yaml/script.yaml +++ b/json_import_examples/yaml/script.yaml @@ -16,4 +16,4 @@ flow: - lbl: [flow, node, 3] cnd: - custom.cnd.condition + custom_dir.cnd.condition From ce18579946895d4c7c565a98a1a9f4e70ffd66df Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Mon, 27 May 2024 13:17:55 +0300 Subject: [PATCH 15/38] prepare parser for dev release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e071ae56e..e6494a014 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dff" -version = "0.6.4" +version = "0.6.4.dev0" description = "Dialog Flow Framework is a free and open-source software stack for creating chatbots, released under the terms of Apache License 2.0." license = "Apache-2.0" authors = [ From 702ddc80a492f94d112a1875735132da2e8ca45d Mon Sep 17 00:00:00 2001 From: Ramimashkouk Date: Thu, 18 Jul 2024 14:07:44 +0300 Subject: [PATCH 16/38] Merge branch 'master' into 'chore/slots2parser' --- .dockerignore | 2 + .github/dependabot.yml | 2 +- .github/process_github_events.py | 4 +- .github/workflows/build_and_publish_docs.yml | 2 - .github/workflows/test_coverage.yml | 2 - .github/workflows/test_full.yml | 4 - .github/workflows/update_dashboard.yml | 6 +- .gitignore | 2 + CONTRIBUTING.md | 39 +- README.md | 82 +- {dff => chatsky}/__init__.py | 8 +- chatsky/__rebuild_pydantic_models__.py | 9 + chatsky/cnd.py | 1 + {dff => chatsky}/config/README.md | 4 +- .../Current_topic_slot_bar_chart_4.yaml | 0 ...Current_topic_time_series_bar_chart_2.yaml | 0 .../charts/Flow_visit_ratio_monitor_13.yaml | 0 .../charts/Node_Visits_7.yaml | 0 .../charts/Node_counts_3.yaml | 0 .../charts/Node_visit_ratio_monitor_8.yaml | 0 .../charts/Node_visits_ratio_6.yaml | 0 .../charts/Node_visits_sunburst_5.yaml | 0 .../charts/Rating_slot_line_chart_1.yaml | 0 .../charts/Requests_17.yaml | 0 .../charts/Responses_16.yaml | 0 .../charts/Service_load_users_9.yaml | 0 .../superset_dashboard/charts/Table_14.yaml | 0 .../charts/Terminal_labels_15.yaml | 0 .../charts/Transition_counts_12.yaml | 0 .../charts/Transition_layout_10.yaml | 0 .../charts/Transition_ratio_chord_11.yaml | 0 .../chatsky_statistics_dashboard_1.yaml | 10 +- .../databases/chatsky_database.yaml | 2 +- .../chatsky_database/chatsky_final_nodes.yaml | 2 +- .../chatsky_database/chatsky_node_stats.yaml | 2 +- .../chatsky_database/chatsky_stats.yaml | 2 +- .../config/superset_dashboard/metadata.yaml | 0 {dff => chatsky}/context_storages/__init__.py | 0 {dff => chatsky}/context_storages/database.py | 5 +- {dff => chatsky}/context_storages/json.py | 5 +- {dff => chatsky}/context_storages/mongo.py | 5 +- {dff => chatsky}/context_storages/pickle.py | 5 +- {dff => chatsky}/context_storages/protocol.py | 9 +- .../context_storages/protocols.json | 0 {dff => chatsky}/context_storages/redis.py | 5 +- {dff => chatsky}/context_storages/shelve.py | 5 +- {dff => chatsky}/context_storages/sql.py | 5 +- {dff => chatsky}/context_storages/ydb.py | 5 +- chatsky/lbl.py | 1 + {dff => chatsky}/messengers/__init__.py | 0 chatsky/messengers/common/__init__.py | 9 + .../messengers/common/interface.py | 78 +- {dff => chatsky}/messengers/common/types.py | 3 +- chatsky/messengers/console.py | 49 + chatsky/messengers/telegram/__init__.py | 4 + chatsky/messengers/telegram/abstract.py | 660 +++ chatsky/messengers/telegram/interface.py | 65 + chatsky/msg.py | 14 + {dff => chatsky}/pipeline/__init__.py | 3 - {dff => chatsky}/pipeline/conditions.py | 8 +- .../pipeline/pipeline/__init__.py | 0 {dff => chatsky}/pipeline/pipeline/actor.py | 207 +- .../pipeline/pipeline/component.py | 25 +- .../pipeline/pipeline/pipeline.py | 103 +- .../pipeline/pipeline/script_parsing.py | 20 +- {dff => chatsky}/pipeline/pipeline/utils.py | 5 +- {dff => chatsky}/pipeline/service/__init__.py | 0 {dff => chatsky}/pipeline/service/extra.py | 13 +- {dff => chatsky}/pipeline/service/group.py | 23 +- {dff => chatsky}/pipeline/service/service.py | 21 +- {dff => chatsky}/pipeline/service/utils.py | 20 +- {dff => chatsky}/pipeline/types.py | 80 +- chatsky/rsp.py | 1 + {dff => chatsky}/script/__init__.py | 6 +- .../script/conditions/__init__.py | 2 + .../script/conditions/std_conditions.py | 61 +- {dff => chatsky}/script/core/__init__.py | 0 {dff => chatsky}/script/core/context.py | 76 +- {dff => chatsky}/script/core/keywords.py | 9 +- chatsky/script/core/message.py | 316 ++ {dff => chatsky}/script/core/normalization.py | 20 +- chatsky/script/core/script.py | 267 + {dff => chatsky}/script/core/types.py | 18 +- {dff => chatsky}/script/extras/__init__.py | 0 .../script/extras/conditions/__init__.py | 0 .../script/extras/slots/__init__.py | 0 {dff => chatsky}/script/labels/__init__.py | 0 {dff => chatsky}/script/labels/std_labels.py | 73 +- {dff => chatsky}/script/responses/__init__.py | 0 .../script/responses/std_responses.py | 5 +- chatsky/slots/__init__.py | 7 + chatsky/slots/conditions.py | 32 + chatsky/slots/processing.py | 98 + chatsky/slots/response.py | 34 + chatsky/slots/slots.py | 418 ++ {dff => chatsky}/stats/__init__.py | 2 +- {dff => chatsky}/stats/__main__.py | 5 +- {dff => chatsky}/stats/cli.py | 29 +- {dff => chatsky}/stats/default_extractors.py | 26 +- {dff => chatsky}/stats/instrumentor.py | 27 +- {dff => chatsky}/stats/utils.py | 12 +- {dff => chatsky}/utils/__init__.py | 0 chatsky/utils/db_benchmark/__init__.py | 11 + .../utils/db_benchmark/basic_config.py | 5 +- .../utils/db_benchmark/benchmark.py | 17 +- {dff => chatsky}/utils/db_benchmark/report.py | 11 +- chatsky/utils/devel/__init__.py | 14 + chatsky/utils/devel/async_helpers.py | 24 + chatsky/utils/devel/extra_field_helpers.py | 22 + chatsky/utils/devel/json_serialization.py | 193 + chatsky/utils/docker/README.md | 11 + .../utils/docker/dockerfile_stats | 0 .../utils/docker/entrypoint_stats.sh | 0 .../utils/docker/superset_config_docker.py | 0 .../utils/otel/otelcol-config-extras.yml | 0 .../utils/otel/otelcol-config.yml | 0 {dff => chatsky}/utils/parser/__init__.py | 0 {dff => chatsky}/utils/testing/__init__.py | 2 +- {dff => chatsky}/utils/testing/cleanup_db.py | 3 +- {dff => chatsky}/utils/testing/common.py | 17 +- .../utils/testing/response_comparers.py | 3 +- {dff => chatsky}/utils/testing/toy_script.py | 81 +- .../utils/turn_caching/__init__.py | 0 .../turn_caching/singleton_turn_caching.py | 1 + {dff => chatsky}/utils/viewer/__init__.py | 0 compose.yml | 20 +- dff/cnd.py | 1 - dff/lbl.py | 1 - dff/messengers/common/__init__.py | 4 - dff/messengers/telegram/__init__.py | 14 - dff/messengers/telegram/interface.py | 222 - dff/messengers/telegram/message.py | 105 - dff/messengers/telegram/messenger.py | 247 - dff/messengers/telegram/utils.py | 54 - dff/msg.py | 14 - dff/rsp.py | 1 - dff/script/core/message.py | 232 - dff/script/core/script.py | 99 - dff/utils/db_benchmark/__init__.py | 11 - dff/utils/docker/README.md | 11 - dff/utils/testing/telegram.py | 278 - docs/source/_templates/example-links.html | 4 +- docs/source/_templates/source-links.html | 2 +- docs/source/about_us.rst | 2 +- docs/source/community.rst | 16 +- docs/source/conf.py | 33 +- docs/source/development.rst | 12 +- .../{dfe => core}/user_actor.drawio | 0 docs/source/get_started.rst | 42 +- docs/source/index.rst | 18 +- docs/source/tutorials.rst | 12 +- docs/source/user_guides.rst | 15 +- docs/source/user_guides/basic_conceptions.rst | 62 +- docs/source/user_guides/context_guide.rst | 44 +- .../source/user_guides/optimization_guide.rst | 8 +- docs/source/user_guides/slot_extraction.rst | 176 + docs/source/user_guides/superset_guide.rst | 34 +- docs/source/utils/notebook.py | 10 +- json_import_examples/README.md | 8 +- json_import_examples/json/pipeline.py | 2 +- json_import_examples/json/script.json | 10 +- json_import_examples/yaml/pipeline.py | 2 +- json_import_examples/yaml/script.yaml | 10 +- poetry.lock | 4759 +++++++++-------- pyproject.toml | 31 +- scripts/codestyle.py | 2 +- scripts/doc.py | 2 +- scripts/misc.py | 2 +- scripts/test.py | 35 +- tests/conftest.py | 2 +- tests/context_storages/conftest.py | 2 +- tests/context_storages/test_dbs.py | 14 +- tests/messengers/telegram/conftest.py | 78 - .../messengers/telegram/test_happy_paths.json | 958 ++++ tests/messengers/telegram/test_tutorials.py | 76 +- tests/messengers/telegram/test_types.py | 222 - tests/messengers/telegram/utils.py | 130 + tests/pipeline/test_messenger_interface.py | 17 +- tests/pipeline/test_parallel_processing.py | 4 +- tests/pipeline/test_pipeline.py | 27 +- tests/pipeline/test_tutorials.py | 31 - tests/pipeline/test_update_ctx_misc.py | 4 +- tests/script/conditions/test_conditions.py | 35 +- tests/script/core/test_actor.py | 27 +- tests/script/core/test_context.py | 4 +- tests/script/core/test_message.py | 178 +- tests/script/core/test_normalization.py | 16 +- tests/script/core/test_script.py | 44 +- tests/script/core/test_tutorials.py | 30 - tests/script/core/test_validation.py | 215 + tests/script/labels/test_labels.py | 6 +- tests/script/responses/test_responses.py | 6 +- tests/script/responses/test_tutorials.py | 17 - tests/slots/__init__.py | 0 tests/slots/conftest.py | 28 + tests/slots/test_slot_manager.py | 262 + tests/slots/test_slot_types.py | 161 + tests/slots/test_tutorials.py | 20 + tests/stats/conftest.py | 4 +- tests/stats/test_defaults.py | 16 +- tests/stats/test_instrumentation.py | 4 +- tests/stats/test_main.py | 24 +- tests/stats/test_tutorials.py | 14 +- tests/tutorials/test_format.py | 25 +- tests/tutorials/test_tutorials.py | 10 +- tests/tutorials/test_utils.py | 5 +- tests/utils/test_benchmark.py | 10 +- tests/utils/test_serialization.py | 134 + tests/utils/test_tutorials.py | 17 - tutorials/context_storages/1_basics.py | 10 +- tutorials/context_storages/2_postgresql.py | 12 +- tutorials/context_storages/3_mongodb.py | 12 +- tutorials/context_storages/4_redis.py | 12 +- tutorials/context_storages/5_mysql.py | 12 +- tutorials/context_storages/6_sqlite.py | 12 +- .../context_storages/7_yandex_database.py | 12 +- .../context_storages/8_db_benchmarking.py | 10 +- tutorials/messengers/telegram/1_basic.py | 75 +- .../messengers/telegram/2_attachments.py | 273 + tutorials/messengers/telegram/2_buttons.py | 193 - tutorials/messengers/telegram/3_advanced.py | 262 + .../telegram/3_buttons_with_callback.py | 181 - tutorials/messengers/telegram/4_conditions.py | 147 - .../telegram/5_conditions_with_media.py | 204 - .../telegram/6_conditions_extras.py | 126 - .../messengers/telegram/7_polling_setup.py | 64 - .../messengers/telegram/8_webhook_setup.py | 61 - .../messengers/web_api_interface/1_fastapi.py | 12 +- .../web_api_interface/2_websocket_chat.py | 10 +- .../3_load_testing_with_locust.py | 8 +- .../web_api_interface/4_streamlit_chat.py | 10 +- tutorials/pipeline/1_basics.py | 12 +- .../pipeline/2_pre_and_post_processors.py | 12 +- .../3_pipeline_dict_with_services_basic.py | 16 +- .../3_pipeline_dict_with_services_full.py | 40 +- .../pipeline/4_groups_and_conditions_basic.py | 12 +- .../pipeline/4_groups_and_conditions_full.py | 23 +- ..._asynchronous_groups_and_services_basic.py | 8 +- ...5_asynchronous_groups_and_services_full.py | 10 +- tutorials/pipeline/6_extra_handlers_basic.py | 15 +- tutorials/pipeline/6_extra_handlers_full.py | 10 +- .../7_extra_handlers_and_extensions.py | 8 +- tutorials/script/core/1_basics.py | 68 +- tutorials/script/core/2_conditions.py | 62 +- tutorials/script/core/3_responses.py | 64 +- tutorials/script/core/4_transitions.py | 150 +- tutorials/script/core/5_global_transitions.py | 101 +- .../script/core/6_context_serialization.py | 20 +- .../script/core/7_pre_response_processing.py | 29 +- tutorials/script/core/8_misc.py | 79 +- .../core/9_pre_transitions_processing.py | 24 +- tutorials/script/responses/1_basics.py | 28 +- tutorials/script/responses/2_buttons.py | 259 - .../responses/{3_media.py => 2_media.py} | 24 +- ...{4_multi_message.py => 3_multi_message.py} | 100 +- tutorials/slots/1_basic_example.py | 236 + tutorials/stats/1_extractor_functions.py | 22 +- tutorials/stats/2_pipeline_integration.py | 26 +- tutorials/utils/1_cache.py | 24 +- tutorials/utils/2_lru_cache.py | 24 +- utils/db_benchmark/benchmark_dbs.py | 3 +- utils/db_benchmark/benchmark_streamlit.py | 11 +- utils/stats/sample_data_provider.py | 16 +- utils/test_data_generators/__init__.py | 0 .../telegram_tutorial_data.py | 67 + 265 files changed, 9596 insertions(+), 6873 deletions(-) rename {dff => chatsky}/__init__.py (55%) create mode 100644 chatsky/__rebuild_pydantic_models__.py create mode 100644 chatsky/cnd.py rename {dff => chatsky}/config/README.md (73%) rename {dff => chatsky}/config/superset_dashboard/charts/Current_topic_slot_bar_chart_4.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Current_topic_time_series_bar_chart_2.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Flow_visit_ratio_monitor_13.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Node_Visits_7.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Node_counts_3.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Node_visit_ratio_monitor_8.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Node_visits_ratio_6.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Node_visits_sunburst_5.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Rating_slot_line_chart_1.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Requests_17.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Responses_16.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Service_load_users_9.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Table_14.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Terminal_labels_15.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Transition_counts_12.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Transition_layout_10.yaml (100%) rename {dff => chatsky}/config/superset_dashboard/charts/Transition_ratio_chord_11.yaml (100%) rename dff/config/superset_dashboard/dashboards/DFF_statistics_dashboard_1.yaml => chatsky/config/superset_dashboard/dashboards/chatsky_statistics_dashboard_1.yaml (99%) rename dff/config/superset_dashboard/databases/dff_database.yaml => chatsky/config/superset_dashboard/databases/chatsky_database.yaml (91%) rename dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml => chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_final_nodes.yaml (98%) rename dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml => chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_node_stats.yaml (98%) rename dff/config/superset_dashboard/datasets/dff_database/dff_stats.yaml => chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_stats.yaml (99%) rename {dff => chatsky}/config/superset_dashboard/metadata.yaml (100%) rename {dff => chatsky}/context_storages/__init__.py (100%) rename {dff => chatsky}/context_storages/database.py (98%) rename {dff => chatsky}/context_storages/json.py (97%) rename {dff => chatsky}/context_storages/mongo.py (97%) rename {dff => chatsky}/context_storages/pickle.py (96%) rename {dff => chatsky}/context_storages/protocol.py (86%) rename {dff => chatsky}/context_storages/protocols.json (100%) rename {dff => chatsky}/context_storages/redis.py (96%) rename {dff => chatsky}/context_storages/shelve.py (94%) rename {dff => chatsky}/context_storages/sql.py (98%) rename {dff => chatsky}/context_storages/ydb.py (98%) create mode 100644 chatsky/lbl.py rename {dff => chatsky}/messengers/__init__.py (100%) create mode 100644 chatsky/messengers/common/__init__.py rename {dff => chatsky}/messengers/common/interface.py (67%) rename {dff => chatsky}/messengers/common/types.py (93%) create mode 100644 chatsky/messengers/console.py create mode 100644 chatsky/messengers/telegram/__init__.py create mode 100644 chatsky/messengers/telegram/abstract.py create mode 100644 chatsky/messengers/telegram/interface.py create mode 100644 chatsky/msg.py rename {dff => chatsky}/pipeline/__init__.py (92%) rename {dff => chatsky}/pipeline/conditions.py (93%) rename {dff => chatsky}/pipeline/pipeline/__init__.py (100%) rename {dff => chatsky}/pipeline/pipeline/actor.py (62%) rename {dff => chatsky}/pipeline/pipeline/component.py (92%) rename {dff => chatsky}/pipeline/pipeline/pipeline.py (83%) rename {dff => chatsky}/pipeline/pipeline/script_parsing.py (92%) rename {dff => chatsky}/pipeline/pipeline/utils.py (96%) rename {dff => chatsky}/pipeline/service/__init__.py (100%) rename {dff => chatsky}/pipeline/service/extra.py (96%) rename {dff => chatsky}/pipeline/service/group.py (93%) rename {dff => chatsky}/pipeline/service/service.py (91%) rename {dff => chatsky}/pipeline/service/utils.py (77%) rename {dff => chatsky}/pipeline/types.py (75%) create mode 100644 chatsky/rsp.py rename {dff => chatsky}/script/__init__.py (83%) rename {dff => chatsky}/script/conditions/__init__.py (84%) rename {dff => chatsky}/script/conditions/std_conditions.py (76%) rename {dff => chatsky}/script/core/__init__.py (100%) rename {dff => chatsky}/script/core/context.py (82%) rename {dff => chatsky}/script/core/keywords.py (93%) create mode 100644 chatsky/script/core/message.py rename {dff => chatsky}/script/core/normalization.py (88%) create mode 100644 chatsky/script/core/script.py rename {dff => chatsky}/script/core/types.py (89%) rename {dff => chatsky}/script/extras/__init__.py (100%) rename {dff => chatsky}/script/extras/conditions/__init__.py (100%) rename {dff => chatsky}/script/extras/slots/__init__.py (100%) rename {dff => chatsky}/script/labels/__init__.py (100%) rename {dff => chatsky}/script/labels/std_labels.py (73%) rename {dff => chatsky}/script/responses/__init__.py (100%) rename {dff => chatsky}/script/responses/std_responses.py (90%) create mode 100644 chatsky/slots/__init__.py create mode 100644 chatsky/slots/conditions.py create mode 100644 chatsky/slots/processing.py create mode 100644 chatsky/slots/response.py create mode 100644 chatsky/slots/slots.py rename {dff => chatsky}/stats/__init__.py (83%) rename {dff => chatsky}/stats/__main__.py (98%) rename {dff => chatsky}/stats/cli.py (93%) rename {dff => chatsky}/stats/default_extractors.py (75%) rename {dff => chatsky}/stats/instrumentor.py (92%) rename {dff => chatsky}/stats/utils.py (96%) rename {dff => chatsky}/utils/__init__.py (100%) create mode 100644 chatsky/utils/db_benchmark/__init__.py rename {dff => chatsky}/utils/db_benchmark/basic_config.py (98%) rename {dff => chatsky}/utils/db_benchmark/benchmark.py (97%) rename {dff => chatsky}/utils/db_benchmark/report.py (85%) create mode 100644 chatsky/utils/devel/__init__.py create mode 100644 chatsky/utils/devel/async_helpers.py create mode 100644 chatsky/utils/devel/extra_field_helpers.py create mode 100644 chatsky/utils/devel/json_serialization.py create mode 100644 chatsky/utils/docker/README.md rename {dff => chatsky}/utils/docker/dockerfile_stats (100%) rename {dff => chatsky}/utils/docker/entrypoint_stats.sh (100%) rename {dff => chatsky}/utils/docker/superset_config_docker.py (100%) rename {dff => chatsky}/utils/otel/otelcol-config-extras.yml (100%) rename {dff => chatsky}/utils/otel/otelcol-config.yml (100%) rename {dff => chatsky}/utils/parser/__init__.py (100%) rename {dff => chatsky}/utils/testing/__init__.py (79%) rename {dff => chatsky}/utils/testing/cleanup_db.py (98%) rename {dff => chatsky}/utils/testing/common.py (85%) rename {dff => chatsky}/utils/testing/response_comparers.py (93%) rename {dff => chatsky}/utils/testing/toy_script.py (62%) rename {dff => chatsky}/utils/turn_caching/__init__.py (100%) rename {dff => chatsky}/utils/turn_caching/singleton_turn_caching.py (99%) rename {dff => chatsky}/utils/viewer/__init__.py (100%) delete mode 100644 dff/cnd.py delete mode 100644 dff/lbl.py delete mode 100644 dff/messengers/common/__init__.py delete mode 100644 dff/messengers/telegram/__init__.py delete mode 100644 dff/messengers/telegram/interface.py delete mode 100644 dff/messengers/telegram/message.py delete mode 100644 dff/messengers/telegram/messenger.py delete mode 100644 dff/messengers/telegram/utils.py delete mode 100644 dff/msg.py delete mode 100644 dff/rsp.py delete mode 100644 dff/script/core/message.py delete mode 100644 dff/script/core/script.py delete mode 100644 dff/utils/db_benchmark/__init__.py delete mode 100644 dff/utils/docker/README.md delete mode 100644 dff/utils/testing/telegram.py rename docs/source/drawio_src/{dfe => core}/user_actor.drawio (100%) create mode 100644 docs/source/user_guides/slot_extraction.rst delete mode 100644 tests/messengers/telegram/conftest.py create mode 100644 tests/messengers/telegram/test_happy_paths.json delete mode 100644 tests/messengers/telegram/test_types.py create mode 100644 tests/messengers/telegram/utils.py delete mode 100644 tests/pipeline/test_tutorials.py delete mode 100644 tests/script/core/test_tutorials.py create mode 100644 tests/script/core/test_validation.py delete mode 100644 tests/script/responses/test_tutorials.py create mode 100644 tests/slots/__init__.py create mode 100644 tests/slots/conftest.py create mode 100644 tests/slots/test_slot_manager.py create mode 100644 tests/slots/test_slot_types.py create mode 100644 tests/slots/test_tutorials.py create mode 100644 tests/utils/test_serialization.py delete mode 100644 tests/utils/test_tutorials.py create mode 100644 tutorials/messengers/telegram/2_attachments.py delete mode 100644 tutorials/messengers/telegram/2_buttons.py create mode 100644 tutorials/messengers/telegram/3_advanced.py delete mode 100644 tutorials/messengers/telegram/3_buttons_with_callback.py delete mode 100644 tutorials/messengers/telegram/4_conditions.py delete mode 100644 tutorials/messengers/telegram/5_conditions_with_media.py delete mode 100644 tutorials/messengers/telegram/6_conditions_extras.py delete mode 100644 tutorials/messengers/telegram/7_polling_setup.py delete mode 100644 tutorials/messengers/telegram/8_webhook_setup.py delete mode 100644 tutorials/script/responses/2_buttons.py rename tutorials/script/responses/{3_media.py => 2_media.py} (83%) rename tutorials/script/responses/{4_multi_message.py => 3_multi_message.py} (55%) create mode 100644 tutorials/slots/1_basic_example.py create mode 100644 utils/test_data_generators/__init__.py create mode 100644 utils/test_data_generators/telegram_tutorial_data.py diff --git a/.dockerignore b/.dockerignore index 7f6151613..20720fea6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,7 @@ *.DS_Store* *.egg-info/ dist/ +venv/ build/ docs/source/apiref docs/source/_misc @@ -21,6 +22,7 @@ GlobalUserTableAccessor* memory_debugging* opening_database* _globals.py +venv* .vscode .coverage .coverage.* diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 383a17e31..9c050cafb 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -29,7 +29,7 @@ updates: directory: "/" target-branch: dev schedule: - interval: "daily" + interval: "weekly" ignore: - dependency-name: "*" update-types: ["version-update:semver-patch"] diff --git a/.github/process_github_events.py b/.github/process_github_events.py index d288e8939..3f4331aeb 100644 --- a/.github/process_github_events.py +++ b/.github/process_github_events.py @@ -48,9 +48,7 @@ def post_comment_on_pr(comment: str, pr_number: int): - [ ] Update package version - [ ] Update `poetry.lock` - [ ] Change PR merge option -- [ ] Test modules without automated testing: - - [ ] Requiring telegram `api_id` and `api_hash` - - [ ] Requiring `HF_API_KEY` +- [ ] Update template repo - [ ] Search for objects to be deprecated """ diff --git a/.github/workflows/build_and_publish_docs.yml b/.github/workflows/build_and_publish_docs.yml index 5ea3fd66e..a74150e5e 100644 --- a/.github/workflows/build_and_publish_docs.yml +++ b/.github/workflows/build_and_publish_docs.yml @@ -39,8 +39,6 @@ jobs: - name: build documentation env: TG_BOT_TOKEN: ${{ secrets.TG_BOT_TOKEN }} - TG_API_ID: ${{ secrets.TG_API_ID }} - TG_API_HASH: ${{ secrets.TG_API_HASH }} TG_BOT_USERNAME: ${{ secrets.TG_BOT_USERNAME }} run: | python -m poetry run poe docs diff --git a/.github/workflows/test_coverage.yml b/.github/workflows/test_coverage.yml index d8712705a..0251907ec 100644 --- a/.github/workflows/test_coverage.yml +++ b/.github/workflows/test_coverage.yml @@ -38,8 +38,6 @@ jobs: - name: run tests env: TG_BOT_TOKEN: ${{ secrets.TG_BOT_TOKEN }} - TG_API_ID: ${{ secrets.TG_API_ID }} - TG_API_HASH: ${{ secrets.TG_API_HASH }} TG_BOT_USERNAME: ${{ secrets.TG_BOT_USERNAME }} run: | python -m poetry run poe test_all diff --git a/.github/workflows/test_full.yml b/.github/workflows/test_full.yml index 8644e9036..4cda7fb37 100644 --- a/.github/workflows/test_full.yml +++ b/.github/workflows/test_full.yml @@ -40,8 +40,6 @@ jobs: - name: run pytest env: TG_BOT_TOKEN: ${{ secrets.TG_BOT_TOKEN }} - TG_API_ID: ${{ secrets.TG_API_ID }} - TG_API_HASH: ${{ secrets.TG_API_HASH }} TG_BOT_USERNAME: ${{ secrets.TG_BOT_USERNAME }} run: | python -m poetry run poe test_no_cov @@ -64,8 +62,6 @@ jobs: - name: run pytest env: TG_BOT_TOKEN: ${{ secrets.TG_BOT_TOKEN }} - TG_API_ID: ${{ secrets.TG_API_ID }} - TG_API_HASH: ${{ secrets.TG_API_HASH }} TG_BOT_USERNAME: ${{ secrets.TG_BOT_USERNAME }} run: | python -m poetry run poe test_no_deps diff --git a/.github/workflows/update_dashboard.yml b/.github/workflows/update_dashboard.yml index 72042885f..34e447967 100644 --- a/.github/workflows/update_dashboard.yml +++ b/.github/workflows/update_dashboard.yml @@ -5,7 +5,7 @@ on: branches: - 'master' paths: - - 'dff/utils/docker/**' + - 'chatsky/utils/docker/**' workflow_dispatch: concurrency: @@ -41,7 +41,7 @@ jobs: - name: Build and upload image uses: docker/build-push-action@v5 with: - context: dff/utils/docker - file: dff/utils/docker/dockerfile_stats + context: chatsky/utils/docker + file: chatsky/utils/docker/dockerfile_stats tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7f6151613..20720fea6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.DS_Store* *.egg-info/ dist/ +venv/ build/ docs/source/apiref docs/source/_misc @@ -21,6 +22,7 @@ GlobalUserTableAccessor* memory_debugging* opening_database* _globals.py +venv* .vscode .coverage .coverage.* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9939a5dc8..80cfe0fe9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,10 +1,10 @@ ## Introduction We will be glad to receive your pull requests (PRs) and issues for adding new features if you are missing something. -We always look forward to your contributions to the Dialog Flow Framework (DFF). +We always look forward to your contributions to Chatsky. ## Rules for submitting a PR -All PRs are reviewed by DFF developers team. +All PRs are reviewed by Chatsky developers team. In order to make the job of reviewing easier and increase the chance that your PR will be accepted, please add a short description with information about why this PR is needed and what changes will be made. Please use the following rules to write the names of branches and commit messages. @@ -69,27 +69,9 @@ by activating the virtual environment and then running poetry run poe docs ``` -After that `docs/build` dir will be created and you can open index file `docs/build/index.html` in your browser of choice. - -#### Documentation links - -In your tutorials, you can use special expanding directives in markdown cells. -They can help shorten the comments and avoid boilerplate code. -The documentation links generated by the directives are always relative -to the local documentation and verified during build. +Note that you'll need `pandoc` installed on your system in order to build docs. -- `%pip install {args}` - This directive generates dependency installation cell, adds a comment and sets up "quiet" flag. - - It should be used in tutorials, like this: `# %pip install dff[...]`. -- `%doclink({args})` - This directive generates a documentation link. It supports 2 or three arguments and the generated link will look like: `ARG1/ARG2#ARG3`. - - The first argument can be either `api` for DFF codebase, `tutorial` for tutorials or `guide` for user guides. -- `%mddoclink({args})` - This directive is a shortcut for `%doclink` that generates a markdown format link instead. - - The generated link will be either `[ARG2](%doclink(ARG1,ARG2))` or `[ARG3](%doclink(ARG1,ARG2,ARG3))`. +After that `docs/build` dir will be created and you can open index file `docs/build/index.html` in your browser of choice. #### Documentation links @@ -101,11 +83,11 @@ to the local documentation and verified during build. - `%pip install {args}` This directive generates dependency installation cell, adds a comment and sets up "quiet" flag. - It should be used in tutorials, like this: `# %pip install dff[...]`. + It should be used in tutorials, like this: `# %pip install chatsky[...]`. - `%doclink({args})` This directive generates a documentation link. It supports 2 or three arguments and the generated link will look like: `ARG1/ARG2#ARG3`. - The first argument can be either `api` for DFF codebase, `tutorial` for tutorials or `guide` for user guides. + The first argument can be either `api` for Chatsky codebase, `tutorial` for tutorials or `guide` for user guides. - `%mddoclink({args})` This directive is a shortcut for `%doclink` that generates a markdown format link instead. @@ -132,6 +114,13 @@ poetry run poe test_all ``` for successful execution of this command `Docker` and `docker compose` are required. +To do a quick sanity check without the need to up docker containers or wait for long tests, run +```bash +poetry run poe quick_test +``` + +_There's also quick_test_coverage for quick htmlcov generation, though it is very likely to be incomplete due to deselection of some tests._ + To make sure that the code satisfies only the style requirements, run ```bash poetry run poe lint @@ -144,7 +133,7 @@ poetry run poe format Tests are configured via [`.env_file`](.env_file). ### Docker -DFF uses docker images for two purposes: +Chatsky uses docker images for two purposes: 1. Database images for integration testing. 2. Images for statistics collection. diff --git a/README.md b/README.md index 2afa8b3ae..946d7feed 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,35 @@ -# Dialog Flow Framework +# Chatsky -[![Documentation Status](https://github.com/deeppavlov/dialog_flow_framework/workflows/build_and_publish_docs/badge.svg?branch=dev)](https://deeppavlov.github.io/dialog_flow_framework) -[![Codestyle](https://github.com/deeppavlov/dialog_flow_framework/workflows/codestyle/badge.svg?branch=dev)](https://github.com/deeppavlov/dialog_flow_framework/actions/workflows/codestyle.yml) -[![Tests](https://github.com/deeppavlov/dialog_flow_framework/workflows/test_coverage/badge.svg?branch=dev)](https://github.com/deeppavlov/dialog_flow_framework/actions/workflows/test_coverage.yml) -[![License Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/deeppavlov/dialog_flow_framework/blob/master/LICENSE) +[![Documentation Status](https://github.com/deeppavlov/chatsky/workflows/build_and_publish_docs/badge.svg?branch=dev)](https://deeppavlov.github.io/chatsky) +[![Codestyle](https://github.com/deeppavlov/chatsky/workflows/codestyle/badge.svg?branch=dev)](https://github.com/deeppavlov/chatsky/actions/workflows/codestyle.yml) +[![Tests](https://github.com/deeppavlov/chatsky/workflows/test_coverage/badge.svg?branch=dev)](https://github.com/deeppavlov/chatsky/actions/workflows/test_coverage.yml) +[![License Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/deeppavlov/chatsky/blob/master/LICENSE) ![Python 3.8, 3.9, 3.10, 3.11](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-green.svg) -[![PyPI](https://img.shields.io/pypi/v/dff)](https://pypi.org/project/dff/) -[![Downloads](https://pepy.tech/badge/dff)](https://pepy.tech/project/dff) +[![PyPI](https://img.shields.io/pypi/v/chatsky)](https://pypi.org/project/chatsky/) +[![Downloads](https://static.pepy.tech/badge/chatsky)](https://pepy.tech/project/chatsky) -The Dialog Flow Framework (DFF) allows you to develop conversational services. -DFF offers a specialized domain-specific language (DSL) for quickly writing dialogs in pure Python. The service is created by defining a special dialog graph that determines the behavior of the dialog agent. The latter is then leveraged in the DFF pipeline. +Chatsky allows you to develop conversational services. +Chatsky offers a specialized domain-specific language (DSL) for quickly writing dialogs in pure Python. The service is created by defining a special dialog graph that determines the behavior of the dialog agent. The latter is then leveraged in the Chatsky pipeline. You can use the framework in various services such as social networks, call centers, websites, personal assistants, etc. -DFF, a versatile Python-based conversational service framework, can be deployed across a spectrum of platforms, +Chatsky, a versatile Python-based conversational service framework, can be deployed across a spectrum of platforms, ensuring flexibility for both novice and seasoned developers: -- Cloud platforms like AWS, Azure, and GCP offer scalable environments for DFF, +- Cloud platforms like AWS, Azure, and GCP offer scalable environments for Chatsky, with options such as AWS Lambda and Azure Functions providing serverless execution. -- For containerized deployment, Docker and Kubernetes streamline the orchestration of DFF applications. +- For containerized deployment, Docker and Kubernetes streamline the orchestration of Chatsky applications. - Furthermore, the framework's adaptability extends to IoT ecosystems, making it suitable for integration with edge devices in scenarios like smart homes or industrial automation. Whether deployed on cloud platforms, containerized environments, or directly on IoT devices, -DFF's accessibility and customization options make it a robust choice for developing conversational services +Chatsky's accessibility and customization options make it a robust choice for developing conversational services in the evolving landscape of Python applications and IoT connectivity. -## Why choose DFF +## Why choose Chatsky * Written in pure Python, the framework is easily accessible for both beginners and experienced developers. -* For the same reason, all the abstractions used in DFF can be easily customized and extended using regular language synthax. -* DFF offers easy and straightforward tools for state management which is as easy as setting values of a Python dictionary. +* For the same reason, all the abstractions used in Chatsky can be easily customized and extended using regular language synthax. +* Chatsky offers easy and straightforward tools for state management which is as easy as setting values of a Python dictionary. * The framework is being actively maintained and thoroughly tested. The team is open to suggestions and quickly reacts to bug reports. # Quick Start @@ -44,31 +44,31 @@ in the evolving landscape of Python applications and IoT connectivity. ## Installation -DFF can be installed via pip: +Chatsky can be installed via pip: ```bash -pip install dff +pip install chatsky ``` -The above command will set the minimum dependencies to start working with DFF. +The above command will set the minimum dependencies to start working with Chatsky. The installation process allows the user to choose from different packages based on their dependencies, which are: ```bash -pip install dff[json] # dependencies for using JSON -pip install dff[pickle] # dependencies for using Pickle -pip install dff[redis] # dependencies for using Redis -pip install dff[mongodb] # dependencies for using MongoDB -pip install dff[mysql] # dependencies for using MySQL -pip install dff[postgresql] # dependencies for using PostgreSQL -pip install dff[sqlite] # dependencies for using SQLite -pip install dff[ydb] # dependencies for using Yandex Database -pip install dff[telegram] # dependencies for using Telegram -pip install dff[benchmark] # dependencies for benchmarking +pip install chatsky[json] # dependencies for using JSON +pip install chatsky[pickle] # dependencies for using Pickle +pip install chatsky[redis] # dependencies for using Redis +pip install chatsky[mongodb] # dependencies for using MongoDB +pip install chatsky[mysql] # dependencies for using MySQL +pip install chatsky[postgresql] # dependencies for using PostgreSQL +pip install chatsky[sqlite] # dependencies for using SQLite +pip install chatsky[ydb] # dependencies for using Yandex Database +pip install chatsky[telegram] # dependencies for using Telegram +pip install chatsky[benchmark] # dependencies for benchmarking ``` For example, if you are going to use one of the database backends, you can specify the corresponding requirements yourself. Multiple dependencies can be installed at once, e.g. ```bash -pip install dff[postgresql,mysql] +pip install chatsky[postgresql,mysql] ``` ## Basic example @@ -76,18 +76,18 @@ pip install dff[postgresql,mysql] The following code snippet builds a simplistic chat bot that replies with messages ``Hi!`` and ``OK`` depending on user input, which only takes a few lines of code. All the abstractions used in this example are thoroughly explained in the dedicated -[user guide](https://deeppavlov.github.io/dialog_flow_framework/user_guides/basic_conceptions.html). +[user guide](https://deeppavlov.github.io/chatsky/user_guides/basic_conceptions.html). ```python -from dff.script import GLOBAL, TRANSITIONS, RESPONSE, Message -from dff.pipeline import Pipeline -import dff.script.conditions.std_conditions as cnd +from chatsky.script import GLOBAL, TRANSITIONS, RESPONSE, Message +from chatsky.pipeline import Pipeline +import chatsky.script.conditions.std_conditions as cnd # create a dialog script script = { GLOBAL: { TRANSITIONS: { - ("flow", "node_hi"): cnd.exact_match(Message("Hi")), + ("flow", "node_hi"): cnd.exact_match("Hi"), ("flow", "node_ok"): cnd.true() } }, @@ -129,19 +129,19 @@ Response: OK ``` More advanced examples are available as a part of documentation: -[tutorials](https://deeppavlov.github.io/dialog_flow_framework/tutorials.html). +[tutorials](https://deeppavlov.github.io/chatsky/tutorials.html). ## Further steps -To further explore the API of the framework, you can make use of the [detailed documentation](https://deeppavlov.github.io/dialog_flow_framework/index.html). -Broken down into several sections to highlight all the aspects of development with DFF, +To further explore the API of the framework, you can make use of the [detailed documentation](https://deeppavlov.github.io/chatsky/index.html). +Broken down into several sections to highlight all the aspects of development with Chatsky, the documentation for the library is constantly available online. -# Contributing to the Dialog Flow Framework +# Contributing to Chatsky We are open to accepting pull requests and bug reports. -Please refer to [CONTRIBUTING.md](https://github.com/deeppavlov/dialog_flow_framework/blob/master/CONTRIBUTING.md). +Please refer to [CONTRIBUTING.md](https://github.com/deeppavlov/chatsky/blob/master/CONTRIBUTING.md). # License -DFF is distributed under the terms of the [Apache License 2.0](https://github.com/deeppavlov/dialog_flow_framework/blob/master/LICENSE). +Chatsky is distributed under the terms of the [Apache License 2.0](https://github.com/deeppavlov/chatsky/blob/master/LICENSE). diff --git a/dff/__init__.py b/chatsky/__init__.py similarity index 55% rename from dff/__init__.py rename to chatsky/__init__.py index 6f58f2a4a..7239850f3 100644 --- a/dff/__init__.py +++ b/chatsky/__init__.py @@ -10,9 +10,11 @@ nest_asyncio.apply() -from dff.pipeline import Pipeline -from dff.script import Context, Script +from chatsky.pipeline import Pipeline +from chatsky.script import Context, Script -from dff.msg import * +from chatsky.msg import * Script.model_rebuild() + +import chatsky.__rebuild_pydantic_models__ diff --git a/chatsky/__rebuild_pydantic_models__.py b/chatsky/__rebuild_pydantic_models__.py new file mode 100644 index 000000000..6d4c5dd92 --- /dev/null +++ b/chatsky/__rebuild_pydantic_models__.py @@ -0,0 +1,9 @@ +# flake8: noqa: F401 + +from chatsky.pipeline import Pipeline +from chatsky.pipeline.types import ExtraHandlerRuntimeInfo +from chatsky.script import Context, Script + +Script.model_rebuild() +Context.model_rebuild() +ExtraHandlerRuntimeInfo.model_rebuild() diff --git a/chatsky/cnd.py b/chatsky/cnd.py new file mode 100644 index 000000000..4625e467c --- /dev/null +++ b/chatsky/cnd.py @@ -0,0 +1 @@ +from chatsky.script.conditions import * diff --git a/dff/config/README.md b/chatsky/config/README.md similarity index 73% rename from dff/config/README.md rename to chatsky/config/README.md index f060b14d6..4a4c9f5e0 100644 --- a/dff/config/README.md +++ b/chatsky/config/README.md @@ -5,6 +5,6 @@ This directory provides yaml files for Superset dashboard configuration. The files inside are not supposed to be edited manually for lest of compatibility breaks. Placeholders inside the files will be filled automatically when you use the -`dff.stats` CLI command to generate a configuration archive. +`chatsky.stats` CLI command to generate a configuration archive. -Use `dff.stats -h` for more info. \ No newline at end of file +Use `chatsky.stats -h` for more info. \ No newline at end of file diff --git a/dff/config/superset_dashboard/charts/Current_topic_slot_bar_chart_4.yaml b/chatsky/config/superset_dashboard/charts/Current_topic_slot_bar_chart_4.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Current_topic_slot_bar_chart_4.yaml rename to chatsky/config/superset_dashboard/charts/Current_topic_slot_bar_chart_4.yaml diff --git a/dff/config/superset_dashboard/charts/Current_topic_time_series_bar_chart_2.yaml b/chatsky/config/superset_dashboard/charts/Current_topic_time_series_bar_chart_2.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Current_topic_time_series_bar_chart_2.yaml rename to chatsky/config/superset_dashboard/charts/Current_topic_time_series_bar_chart_2.yaml diff --git a/dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_13.yaml b/chatsky/config/superset_dashboard/charts/Flow_visit_ratio_monitor_13.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Flow_visit_ratio_monitor_13.yaml rename to chatsky/config/superset_dashboard/charts/Flow_visit_ratio_monitor_13.yaml diff --git a/dff/config/superset_dashboard/charts/Node_Visits_7.yaml b/chatsky/config/superset_dashboard/charts/Node_Visits_7.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Node_Visits_7.yaml rename to chatsky/config/superset_dashboard/charts/Node_Visits_7.yaml diff --git a/dff/config/superset_dashboard/charts/Node_counts_3.yaml b/chatsky/config/superset_dashboard/charts/Node_counts_3.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Node_counts_3.yaml rename to chatsky/config/superset_dashboard/charts/Node_counts_3.yaml diff --git a/dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_8.yaml b/chatsky/config/superset_dashboard/charts/Node_visit_ratio_monitor_8.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Node_visit_ratio_monitor_8.yaml rename to chatsky/config/superset_dashboard/charts/Node_visit_ratio_monitor_8.yaml diff --git a/dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml b/chatsky/config/superset_dashboard/charts/Node_visits_ratio_6.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Node_visits_ratio_6.yaml rename to chatsky/config/superset_dashboard/charts/Node_visits_ratio_6.yaml diff --git a/dff/config/superset_dashboard/charts/Node_visits_sunburst_5.yaml b/chatsky/config/superset_dashboard/charts/Node_visits_sunburst_5.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Node_visits_sunburst_5.yaml rename to chatsky/config/superset_dashboard/charts/Node_visits_sunburst_5.yaml diff --git a/dff/config/superset_dashboard/charts/Rating_slot_line_chart_1.yaml b/chatsky/config/superset_dashboard/charts/Rating_slot_line_chart_1.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Rating_slot_line_chart_1.yaml rename to chatsky/config/superset_dashboard/charts/Rating_slot_line_chart_1.yaml diff --git a/dff/config/superset_dashboard/charts/Requests_17.yaml b/chatsky/config/superset_dashboard/charts/Requests_17.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Requests_17.yaml rename to chatsky/config/superset_dashboard/charts/Requests_17.yaml diff --git a/dff/config/superset_dashboard/charts/Responses_16.yaml b/chatsky/config/superset_dashboard/charts/Responses_16.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Responses_16.yaml rename to chatsky/config/superset_dashboard/charts/Responses_16.yaml diff --git a/dff/config/superset_dashboard/charts/Service_load_users_9.yaml b/chatsky/config/superset_dashboard/charts/Service_load_users_9.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Service_load_users_9.yaml rename to chatsky/config/superset_dashboard/charts/Service_load_users_9.yaml diff --git a/dff/config/superset_dashboard/charts/Table_14.yaml b/chatsky/config/superset_dashboard/charts/Table_14.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Table_14.yaml rename to chatsky/config/superset_dashboard/charts/Table_14.yaml diff --git a/dff/config/superset_dashboard/charts/Terminal_labels_15.yaml b/chatsky/config/superset_dashboard/charts/Terminal_labels_15.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Terminal_labels_15.yaml rename to chatsky/config/superset_dashboard/charts/Terminal_labels_15.yaml diff --git a/dff/config/superset_dashboard/charts/Transition_counts_12.yaml b/chatsky/config/superset_dashboard/charts/Transition_counts_12.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Transition_counts_12.yaml rename to chatsky/config/superset_dashboard/charts/Transition_counts_12.yaml diff --git a/dff/config/superset_dashboard/charts/Transition_layout_10.yaml b/chatsky/config/superset_dashboard/charts/Transition_layout_10.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Transition_layout_10.yaml rename to chatsky/config/superset_dashboard/charts/Transition_layout_10.yaml diff --git a/dff/config/superset_dashboard/charts/Transition_ratio_chord_11.yaml b/chatsky/config/superset_dashboard/charts/Transition_ratio_chord_11.yaml similarity index 100% rename from dff/config/superset_dashboard/charts/Transition_ratio_chord_11.yaml rename to chatsky/config/superset_dashboard/charts/Transition_ratio_chord_11.yaml diff --git a/dff/config/superset_dashboard/dashboards/DFF_statistics_dashboard_1.yaml b/chatsky/config/superset_dashboard/dashboards/chatsky_statistics_dashboard_1.yaml similarity index 99% rename from dff/config/superset_dashboard/dashboards/DFF_statistics_dashboard_1.yaml rename to chatsky/config/superset_dashboard/dashboards/chatsky_statistics_dashboard_1.yaml index ce8e32496..a32c1c91d 100644 --- a/dff/config/superset_dashboard/dashboards/DFF_statistics_dashboard_1.yaml +++ b/chatsky/config/superset_dashboard/dashboards/chatsky_statistics_dashboard_1.yaml @@ -1,7 +1,7 @@ -dashboard_title: DFF statistics dashboard +dashboard_title: Chatsky statistics dashboard description: null css: '' -slug: dff-stats +slug: chatsky-stats uuid: 68bce374-99bc-4890-b8c2-cb172409b894 position: CHART-91whs_IaiF: @@ -297,7 +297,7 @@ position: HEADER_ID: id: HEADER_ID meta: - text: DFF statistics dashboard + text: Chatsky statistics dashboard type: HEADER MARKDOWN-8Q9BhcEwva: children: [] @@ -378,7 +378,7 @@ position: To make them available in the dashboard, you need to define a custom extractor - function for them (see the [Extractor functions](https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.stats.1_extractor_functions.html) ). + function for them (see the [Extractor functions](https://deeppavlov.github.io/chatsky/tutorials/tutorials.stats.1_extractor_functions.html) ). The output of that function will then be persisted to the `data` column of the logs table, while the name of the function will be available in the `data key` column. That makes it easy to filter the relevant log entries and use @@ -621,7 +621,7 @@ position: ## Service users - This plot aggregates the count of unique users querying the DFF service at + This plot aggregates the count of unique users querying the Chatsky service at any given point in time. The time periods to aggregate over can be changed using the filter on the left. diff --git a/dff/config/superset_dashboard/databases/dff_database.yaml b/chatsky/config/superset_dashboard/databases/chatsky_database.yaml similarity index 91% rename from dff/config/superset_dashboard/databases/dff_database.yaml rename to chatsky/config/superset_dashboard/databases/chatsky_database.yaml index b178a0f80..851fa9d33 100644 --- a/dff/config/superset_dashboard/databases/dff_database.yaml +++ b/chatsky/config/superset_dashboard/databases/chatsky_database.yaml @@ -1,4 +1,4 @@ -database_name: dff_database +database_name: chatsky_database sqlalchemy_uri: clickhousedb+connect://username:XXXXXXXXXX@clickhouse:8123/test cache_timeout: null expose_in_sqllab: true diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml b/chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_final_nodes.yaml similarity index 98% rename from dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml rename to chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_final_nodes.yaml index 4a8628298..8cce16f6a 100644 --- a/dff/config/superset_dashboard/datasets/dff_database/dff_final_nodes.yaml +++ b/chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_final_nodes.yaml @@ -1,4 +1,4 @@ -table_name: dff_final_nodes +table_name: chatsky_final_nodes main_dttm_col: null description: null default_endpoint: null diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml b/chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_node_stats.yaml similarity index 98% rename from dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml rename to chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_node_stats.yaml index 4e2cc5792..88196bf50 100644 --- a/dff/config/superset_dashboard/datasets/dff_database/dff_node_stats.yaml +++ b/chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_node_stats.yaml @@ -1,4 +1,4 @@ -table_name: dff_node_stats +table_name: chatsky_node_stats main_dttm_col: null description: null default_endpoint: null diff --git a/dff/config/superset_dashboard/datasets/dff_database/dff_stats.yaml b/chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_stats.yaml similarity index 99% rename from dff/config/superset_dashboard/datasets/dff_database/dff_stats.yaml rename to chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_stats.yaml index 6f1efe683..a33d97261 100644 --- a/dff/config/superset_dashboard/datasets/dff_database/dff_stats.yaml +++ b/chatsky/config/superset_dashboard/datasets/chatsky_database/chatsky_stats.yaml @@ -1,4 +1,4 @@ -table_name: dff_stats +table_name: chatsky_stats main_dttm_col: null description: null default_endpoint: null diff --git a/dff/config/superset_dashboard/metadata.yaml b/chatsky/config/superset_dashboard/metadata.yaml similarity index 100% rename from dff/config/superset_dashboard/metadata.yaml rename to chatsky/config/superset_dashboard/metadata.yaml diff --git a/dff/context_storages/__init__.py b/chatsky/context_storages/__init__.py similarity index 100% rename from dff/context_storages/__init__.py rename to chatsky/context_storages/__init__.py diff --git a/dff/context_storages/database.py b/chatsky/context_storages/database.py similarity index 98% rename from dff/context_storages/database.py rename to chatsky/context_storages/database.py index 0427a1e7d..3d3a857d1 100644 --- a/dff/context_storages/database.py +++ b/chatsky/context_storages/database.py @@ -7,6 +7,7 @@ that developers can inherit from in order to create their own context storage solutions. This class implements the basic functionality and can be extended to add additional features as needed. """ + import asyncio import importlib import threading @@ -20,7 +21,7 @@ class DBContextStorage(ABC): r""" - An abstract interface for `dff` DB context storages. + An abstract interface for `chatsky` DB context storages. It includes the most essential methods of the python `dict` class. Can not be instantiated. @@ -223,5 +224,5 @@ def context_storage_factory(path: str, **kwargs) -> DBContextStorage: For more information, see the function doc:\n{context_storage_factory.__doc__} """ _class, module = PROTOCOLS[prefix]["class"], PROTOCOLS[prefix]["module"] - target_class = getattr(importlib.import_module(f".{module}", package="dff.context_storages"), _class) + target_class = getattr(importlib.import_module(f".{module}", package="chatsky.context_storages"), _class) return target_class(path, **kwargs) diff --git a/dff/context_storages/json.py b/chatsky/context_storages/json.py similarity index 97% rename from dff/context_storages/json.py rename to chatsky/context_storages/json.py index 4a56b7ff5..9ecc44b63 100644 --- a/dff/context_storages/json.py +++ b/chatsky/context_storages/json.py @@ -2,9 +2,10 @@ JSON ---- The JSON module provides a json-based version of the :py:class:`.DBContextStorage` class. -This class is used to store and retrieve context data in a JSON. It allows the DFF to easily +This class is used to store and retrieve context data in a JSON. It allows Chatsky to easily store and retrieve context data. """ + import asyncio from typing import Hashable @@ -19,7 +20,7 @@ from pydantic import BaseModel, model_validator from .database import DBContextStorage, threadsafe_method -from dff.script import Context +from chatsky.script import Context class SerializableStorage(BaseModel, extra="allow"): diff --git a/dff/context_storages/mongo.py b/chatsky/context_storages/mongo.py similarity index 97% rename from dff/context_storages/mongo.py rename to chatsky/context_storages/mongo.py index 1afeeda84..166045a12 100644 --- a/dff/context_storages/mongo.py +++ b/chatsky/context_storages/mongo.py @@ -3,7 +3,7 @@ ----- The Mongo module provides a MongoDB-based version of the :py:class:`.DBContextStorage` class. This class is used to store and retrieve context data in a MongoDB. -It allows the DFF to easily store and retrieve context data in a format that is highly scalable +It allows Chatsky to easily store and retrieve context data in a format that is highly scalable and easy to work with. MongoDB is a widely-used, open-source NoSQL database that is known for its scalability and performance. @@ -11,6 +11,7 @@ and environments. Additionally, MongoDB is highly scalable and can handle large amounts of data and high levels of read and write traffic. """ + from typing import Hashable, Dict, Any try: @@ -25,7 +26,7 @@ import json -from dff.script import Context +from chatsky.script import Context from .database import DBContextStorage, threadsafe_method from .protocol import get_protocol_install_suggestion diff --git a/dff/context_storages/pickle.py b/chatsky/context_storages/pickle.py similarity index 96% rename from dff/context_storages/pickle.py rename to chatsky/context_storages/pickle.py index aaa9b2c93..9f72a22c3 100644 --- a/dff/context_storages/pickle.py +++ b/chatsky/context_storages/pickle.py @@ -3,13 +3,14 @@ ------ The Pickle module provides a pickle-based version of the :py:class:`.DBContextStorage` class. This class is used to store and retrieve context data in a pickle format. -It allows the DFF to easily store and retrieve context data in a format that is efficient +It allows Chatsky to easily store and retrieve context data in a format that is efficient for serialization and deserialization and can be easily used in python. Pickle is a python library that allows to serialize and deserialize python objects. It is efficient and fast, but it is not recommended to use it to transfer data across different languages or platforms because it's not cross-language compatible. """ + import asyncio import pickle from typing import Hashable @@ -23,7 +24,7 @@ pickle_available = False from .database import DBContextStorage, threadsafe_method -from dff.script import Context +from chatsky.script import Context class PickleContextStorage(DBContextStorage): diff --git a/dff/context_storages/protocol.py b/chatsky/context_storages/protocol.py similarity index 86% rename from dff/context_storages/protocol.py rename to chatsky/context_storages/protocol.py index a1e1ddf1c..05f04f5b9 100644 --- a/dff/context_storages/protocol.py +++ b/chatsky/context_storages/protocol.py @@ -1,17 +1,18 @@ """ Protocol -------- -The Protocol module contains the base code for the different communication protocols used in the DFF. -It defines the :py:data:`.PROTOCOLS` constant, which lists all the supported protocols in the DFF. +The Protocol module contains the base code for the different communication protocols used in Chatsky. +It defines the :py:data:`.PROTOCOLS` constant, which lists all the supported protocols in Chatsky. The module also includes a function :py:func:`.get_protocol_install_suggestion()` that is used to provide suggestions for installing the necessary dependencies for a specific protocol. This function takes the name of the desired protocol as an argument and returns a string containing the necessary installation commands for that protocol. -The DFF supports a variety of communication protocols, +Chatsky supports a variety of communication protocols, which allows it to communicate with different types of databases. """ + import json import pathlib @@ -30,5 +31,5 @@ def get_protocol_install_suggestion(protocol_name: str) -> str: protocol = PROTOCOLS.get(protocol_name, {}) slug = protocol.get("slug") if slug: - return f"Try to run `pip install dff[{slug}]`" + return f"Try to run `pip install chatsky[{slug}]`" return "" diff --git a/dff/context_storages/protocols.json b/chatsky/context_storages/protocols.json similarity index 100% rename from dff/context_storages/protocols.json rename to chatsky/context_storages/protocols.json diff --git a/dff/context_storages/redis.py b/chatsky/context_storages/redis.py similarity index 96% rename from dff/context_storages/redis.py rename to chatsky/context_storages/redis.py index d506a1d47..7334097c7 100644 --- a/dff/context_storages/redis.py +++ b/chatsky/context_storages/redis.py @@ -3,7 +3,7 @@ ----- The Redis module provides a Redis-based version of the :py:class:`.DBContextStorage` class. This class is used to store and retrieve context data in a Redis. -It allows the DFF to easily store and retrieve context data in a format that is highly scalable +It allows Chatsky to easily store and retrieve context data in a format that is highly scalable and easy to work with. Redis is an open-source, in-memory data structure store that is known for its @@ -12,6 +12,7 @@ Additionally, Redis can be used as a cache, message broker, and database, making it a versatile and powerful choice for data storage and management. """ + import json from typing import Hashable @@ -22,7 +23,7 @@ except ImportError: redis_available = False -from dff.script import Context +from chatsky.script import Context from .database import DBContextStorage, threadsafe_method from .protocol import get_protocol_install_suggestion diff --git a/dff/context_storages/shelve.py b/chatsky/context_storages/shelve.py similarity index 94% rename from dff/context_storages/shelve.py rename to chatsky/context_storages/shelve.py index 728ddc5dc..de2e97ea5 100644 --- a/dff/context_storages/shelve.py +++ b/chatsky/context_storages/shelve.py @@ -3,7 +3,7 @@ ------ The Shelve module provides a shelve-based version of the :py:class:`.DBContextStorage` class. This class is used to store and retrieve context data in a shelve format. -It allows the DFF to easily store and retrieve context data in a format that is efficient +It allows Chatsky to easily store and retrieve context data in a format that is efficient for serialization and deserialization and can be easily used in python. Shelve is a python library that allows to store and retrieve python objects. @@ -12,11 +12,12 @@ It stores data in a dbm-style format in the file system, which is not as fast as the other serialization libraries like pickle or JSON. """ + import pickle from shelve import DbfilenameShelf from typing import Hashable -from dff.script import Context +from chatsky.script import Context from .database import DBContextStorage diff --git a/dff/context_storages/sql.py b/chatsky/context_storages/sql.py similarity index 98% rename from dff/context_storages/sql.py rename to chatsky/context_storages/sql.py index 9018d5da7..677c1648d 100644 --- a/dff/context_storages/sql.py +++ b/chatsky/context_storages/sql.py @@ -3,7 +3,7 @@ --- The SQL module provides a SQL-based version of the :py:class:`.DBContextStorage` class. This class is used to store and retrieve context data from SQL databases. -It allows the DFF to easily store and retrieve context data in a format that is highly scalable +It allows Chatsky to easily store and retrieve context data in a format that is highly scalable and easy to work with. The SQL module provides the ability to choose the backend of your choice from @@ -12,12 +12,13 @@ reliability and scalability. SQLite is a self-contained, high-reliability, embedded, full-featured, public-domain, SQL database engine. """ + import asyncio import importlib import json from typing import Hashable -from dff.script import Context +from chatsky.script import Context from .database import DBContextStorage, threadsafe_method from .protocol import get_protocol_install_suggestion diff --git a/dff/context_storages/ydb.py b/chatsky/context_storages/ydb.py similarity index 98% rename from dff/context_storages/ydb.py rename to chatsky/context_storages/ydb.py index f499592c6..ff50f5b7b 100644 --- a/dff/context_storages/ydb.py +++ b/chatsky/context_storages/ydb.py @@ -6,16 +6,17 @@ operate, and scale high-performance and high-availability databases for your applications. The Yandex DB module uses the Yandex Cloud SDK, which is a python library that allows you to work -with Yandex Cloud services using python. This allows the DFF to easily integrate with the Yandex DataBase and +with Yandex Cloud services using python. This allows Chatsky to easily integrate with the Yandex DataBase and take advantage of the scalability and high-availability features provided by the service. """ + import asyncio import os from typing import Hashable from urllib.parse import urlsplit -from dff.script import Context +from chatsky.script import Context from .database import DBContextStorage from .protocol import get_protocol_install_suggestion diff --git a/chatsky/lbl.py b/chatsky/lbl.py new file mode 100644 index 000000000..ea70a6d40 --- /dev/null +++ b/chatsky/lbl.py @@ -0,0 +1 @@ +from chatsky.script.labels import * diff --git a/dff/messengers/__init__.py b/chatsky/messengers/__init__.py similarity index 100% rename from dff/messengers/__init__.py rename to chatsky/messengers/__init__.py diff --git a/chatsky/messengers/common/__init__.py b/chatsky/messengers/common/__init__.py new file mode 100644 index 000000000..713974c16 --- /dev/null +++ b/chatsky/messengers/common/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .interface import ( + MessengerInterface, + MessengerInterfaceWithAttachments, + PollingMessengerInterface, + CallbackMessengerInterface, +) +from .types import PollingInterfaceLoopFunction diff --git a/dff/messengers/common/interface.py b/chatsky/messengers/common/interface.py similarity index 67% rename from dff/messengers/common/interface.py rename to chatsky/messengers/common/interface.py index 58d62ac04..0635e0af7 100644 --- a/dff/messengers/common/interface.py +++ b/chatsky/messengers/common/interface.py @@ -2,20 +2,23 @@ Message Interfaces ------------------ The Message Interfaces module contains several basic classes that define the message interfaces. -These classes provide a way to define the structure of the messengers that are used to communicate with the DFF. +These classes provide a way to define the structure of the messengers that are used to communicate with Chatsky. """ + from __future__ import annotations import abc import asyncio import logging import uuid -from typing import Optional, Any, List, Tuple, TextIO, Hashable, TYPE_CHECKING - -from dff.script import Context, Message -from dff.messengers.common.types import PollingInterfaceLoopFunction +from pathlib import Path +from tempfile import gettempdir +from typing import Optional, Any, List, Tuple, TextIO, Hashable, TYPE_CHECKING, Type if TYPE_CHECKING: - from dff.pipeline.types import PipelineRunnerFunction + from chatsky.script import Context, Message + from chatsky.pipeline.types import PipelineRunnerFunction + from chatsky.messengers.common.types import PollingInterfaceLoopFunction + from chatsky.script.core.message import Attachment logger = logging.getLogger(__name__) @@ -33,7 +36,56 @@ async def connect(self, pipeline_runner: PipelineRunnerFunction): May be used for sending an introduction message or displaying general bot information. :param pipeline_runner: A function that should process user request and return context; - usually it's a :py:meth:`~dff.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. + usually it's a :py:meth:`~chatsky.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. + """ + raise NotImplementedError + + +class MessengerInterfaceWithAttachments(MessengerInterface, abc.ABC): + """ + MessengerInterface subclass that has methods for attachment handling. + + :param attachments_directory: Directory where attachments will be stored. + If not specified, the temporary directory will be used. + """ + + supported_request_attachment_types: set[Type[Attachment]] = set() + """ + Types of attachment that this messenger interface can receive. + Attachments not in this list will be neglected. + """ + + supported_response_attachment_types: set[Type[Attachment]] = set() + """ + Types of attachment that this messenger interface can send. + Attachments not in this list will be neglected. + """ + + def __init__(self, attachments_directory: Optional[Path] = None) -> None: + tempdir = gettempdir() + if attachments_directory is not None and not str(attachments_directory.absolute()).startswith(tempdir): + self.attachments_directory = attachments_directory + else: + warning_start = f"Attachments directory for {type(self).__name__} messenger interface" + warning_end = "attachment data won't be cached locally!" + if attachments_directory is None: + self.attachments_directory = Path(tempdir) / f"chatsky-cache-{type(self).__name__}" + logger.info(f"{warning_start} is None, so will be set to tempdir and {warning_end}") + else: + self.attachments_directory = attachments_directory + logger.info(f"{warning_start} is in tempdir, so {warning_end}") + self.attachments_directory.mkdir(parents=True, exist_ok=True) + + @abc.abstractmethod + async def get_attachment_bytes(self, source: str) -> bytes: + """ + Get attachment bytes from file source. + + E.g. if a file attachment consists of a URL of the file uploaded to the messenger servers, + this method is the right place to call the messenger API for the file downloading. + + :param source: Identifying string for the file. + :return: The attachment bytes. """ raise NotImplementedError @@ -70,7 +122,7 @@ def _on_exception(self, e: BaseException): :param e: The exception. """ if isinstance(e, Exception): - logger.error(f"Exception in {type(self).__name__} loop!\n{str(e)}") + logger.error(f"Exception in {type(self).__name__} loop!", exc_info=e) else: logger.info(f"{type(self).__name__} has stopped polling.") @@ -99,7 +151,7 @@ async def connect( for most cases the loop itself shouldn't be overridden. :param pipeline_runner: A function that should process user request and return context; - usually it's a :py:meth:`~dff.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. + usually it's a :py:meth:`~chatsky.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. :param loop: a function that determines whether polling should be continued; called in each cycle, should return `True` to continue polling or `False` to stop. :param timeout: a time interval between polls (in seconds). @@ -118,7 +170,7 @@ class CallbackMessengerInterface(MessengerInterface): Callback message interface is waiting for user input and answers once it gets one. """ - def __init__(self): + def __init__(self) -> None: self._pipeline_runner: Optional[PipelineRunnerFunction] = None async def connect(self, pipeline_runner: PipelineRunnerFunction): @@ -129,7 +181,7 @@ async def on_request_async( ) -> Context: """ Method that should be invoked on user input. - This method has the same signature as :py:class:`~dff.pipeline.types.PipelineRunnerFunction`. + This method has the same signature as :py:class:`~chatsky.pipeline.types.PipelineRunnerFunction`. """ return await self._pipeline_runner(request, ctx_id, update_ctx_misc) @@ -138,7 +190,7 @@ def on_request( ) -> Context: """ Method that should be invoked on user input. - This method has the same signature as :py:class:`~dff.pipeline.types.PipelineRunnerFunction`. + This method has the same signature as :py:class:`~chatsky.pipeline.types.PipelineRunnerFunction`. """ return asyncio.run(self.on_request_async(request, ctx_id, update_ctx_misc)) @@ -174,7 +226,7 @@ async def connect(self, pipeline_runner: PipelineRunnerFunction, **kwargs): The CLIProvider generates new dialog id used to user identification on each `connect` call. :param pipeline_runner: A function that should process user request and return context; - usually it's a :py:meth:`~dff.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. + usually it's a :py:meth:`~chatsky.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. :param \\**kwargs: argument, added for compatibility with super class, it shouldn't be used normally. """ self._ctx_id = uuid.uuid4() diff --git a/dff/messengers/common/types.py b/chatsky/messengers/common/types.py similarity index 93% rename from dff/messengers/common/types.py rename to chatsky/messengers/common/types.py index e43769551..35696805e 100644 --- a/dff/messengers/common/types.py +++ b/chatsky/messengers/common/types.py @@ -1,8 +1,9 @@ """ Types ----- -The Types module contains special types that are used throughout the `DFF Messengers`. +The Types module contains special types that are used throughout `Chatsky Messengers`. """ + from typing import Callable from typing_extensions import TypeAlias diff --git a/chatsky/messengers/console.py b/chatsky/messengers/console.py new file mode 100644 index 000000000..a0fe8c690 --- /dev/null +++ b/chatsky/messengers/console.py @@ -0,0 +1,49 @@ +from typing import Any, Hashable, List, Optional, TextIO, Tuple +from uuid import uuid4 +from chatsky.messengers.common.interface import PollingMessengerInterface +from chatsky.pipeline.types import PipelineRunnerFunction +from chatsky.script.core.context import Context +from chatsky.script.core.message import Message + + +class CLIMessengerInterface(PollingMessengerInterface): + """ + Command line message interface is the default message interface, communicating with user via `STDIN/STDOUT`. + This message interface can maintain dialog with one user at a time only. + """ + + supported_request_attachment_types = set() + supported_response_attachment_types = set() + + def __init__( + self, + intro: Optional[str] = None, + prompt_request: str = "request: ", + prompt_response: str = "response: ", + out_descriptor: Optional[TextIO] = None, + ): + super().__init__() + self._ctx_id: Optional[Hashable] = None + self._intro: Optional[str] = intro + self._prompt_request: str = prompt_request + self._prompt_response: str = prompt_response + self._descriptor: Optional[TextIO] = out_descriptor + + def _request(self) -> List[Tuple[Message, Any]]: + return [(Message(input(self._prompt_request)), self._ctx_id)] + + def _respond(self, responses: List[Context]): + print(f"{self._prompt_response}{responses[0].last_response.text}", file=self._descriptor) + + async def connect(self, pipeline_runner: PipelineRunnerFunction, **kwargs): + """ + The CLIProvider generates new dialog id used to user identification on each `connect` call. + + :param pipeline_runner: A function that should process user request and return context; + usually it's a :py:meth:`~chatsky.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function. + :param \\**kwargs: argument, added for compatibility with super class, it shouldn't be used normally. + """ + self._ctx_id = uuid4() + if self._intro is not None: + print(self._intro) + await super().connect(pipeline_runner, **kwargs) diff --git a/chatsky/messengers/telegram/__init__.py b/chatsky/messengers/telegram/__init__.py new file mode 100644 index 000000000..771e96332 --- /dev/null +++ b/chatsky/messengers/telegram/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +from .abstract import telegram_available +from .interface import LongpollingInterface, WebhookInterface diff --git a/chatsky/messengers/telegram/abstract.py b/chatsky/messengers/telegram/abstract.py new file mode 100644 index 000000000..30742579d --- /dev/null +++ b/chatsky/messengers/telegram/abstract.py @@ -0,0 +1,660 @@ +""" +Telegram Base +------------- +This module implements a base interface for interactions with the +Telegram API. +""" + +from pathlib import Path +from typing import Any, Callable, Optional + +from chatsky.utils.devel.extra_field_helpers import grab_extra_fields + +from chatsky.messengers.common import MessengerInterfaceWithAttachments +from chatsky.pipeline.types import PipelineRunnerFunction +from chatsky.script.core.message import ( + Animation, + Audio, + CallbackQuery, + Contact, + Document, + Image, + Invoice, + Location, + Message, + Poll, + PollOption, + Sticker, + Video, + VideoMessage, + VoiceMessage, + MediaGroup, +) + +try: + from telegram import ( + InputMediaAnimation, + InputMediaAudio, + InputMediaDocument, + InputMediaPhoto, + InputMediaVideo, + Update, + Message as TelegramMessage, + ) + from telegram.ext import Application, ExtBot, MessageHandler, CallbackQueryHandler + from telegram.ext.filters import ALL + + telegram_available = True +except ImportError: + ExtBot = Any + Update = Any + TelegramMessage = Any + + telegram_available = False + + +class _AbstractTelegramInterface(MessengerInterfaceWithAttachments): + """ + Messenger interface mixin for Telegram API usage. + """ + + supported_request_attachment_types = { + Location, + Contact, + Poll, + Sticker, + Audio, + Video, + Animation, + Image, + Document, + VoiceMessage, + VideoMessage, + Invoice, + } + supported_response_attachment_types = { + Location, + Contact, + Poll, + Sticker, + Audio, + Video, + Animation, + Image, + Document, + VoiceMessage, + VideoMessage, + MediaGroup, + } + + def __init__(self, token: str, attachments_directory: Optional[Path] = None) -> None: + super().__init__(attachments_directory) + if not telegram_available: + raise ImportError("`python-telegram-bot` package is missing.\nTry to run `pip install chatsky[telegram]`.") + + self.application = Application.builder().token(token).build() + self.application.add_handler(MessageHandler(ALL, self.on_message)) + self.application.add_handler(CallbackQueryHandler(self.on_callback)) + + async def get_attachment_bytes(self, source: str) -> bytes: + file = await self.application.bot.get_file(source) + data = await file.download_as_bytearray() + return bytes(data) + + def extract_message_from_telegram(self, update: TelegramMessage) -> Message: + """ + Convert Telegram update to Chatsky message. + Extract text and supported attachments. + + :param update: Telegram update object. + :return: Chatsky message object. + """ + + message = Message() + message.attachments = list() + + message.text = update.text or update.caption + if update.location is not None: + message.attachments += [Location(latitude=update.location.latitude, longitude=update.location.longitude)] + if update.contact is not None: + message.attachments += [ + Contact( + phone_number=update.contact.phone_number, + first_name=update.contact.first_name, + last_name=update.contact.last_name, + user_id=update.contact.user_id, + ) + ] + if update.invoice is not None: + message.attachments += [ + Invoice( + title=update.invoice.title, + description=update.invoice.description, + currency=update.invoice.currency, + amount=update.invoice.total_amount, + ) + ] + if update.poll is not None: + message.attachments += [ + Poll( + question=update.poll.question, + options=[PollOption(text=option.text, votes=option.voter_count) for option in update.poll.options], + is_closed=update.poll.is_closed, + is_anonymous=update.poll.is_anonymous, + type=update.poll.type, + multiple_answers=update.poll.allows_multiple_answers, + correct_option_id=update.poll.correct_option_id, + explanation=update.poll.explanation, + open_period=update.poll.open_period, + ) + ] + if update.sticker is not None: + message.attachments += [ + Sticker( + id=update.sticker.file_id, + is_animated=update.sticker.is_animated, + is_video=update.sticker.is_video, + type=update.sticker.type, + ) + ] + if update.audio is not None: + thumbnail = ( + Image(id=update.audio.thumbnail.file_id, file_unique_id=update.audio.thumbnail.file_unique_id) + if update.audio.thumbnail is not None + else None + ) + message.attachments += [ + Audio( + id=update.audio.file_id, + file_unique_id=update.audio.file_unique_id, + duration=update.audio.duration, + performer=update.audio.performer, + file_name=update.audio.file_name, + mime_type=update.audio.mime_type, + thumbnail=thumbnail, + ) + ] + if update.video is not None: + thumbnail = ( + Image(id=update.video.thumbnail.file_id, file_unique_id=update.video.thumbnail.file_unique_id) + if update.video.thumbnail is not None + else None + ) + message.attachments += [ + Video( + id=update.video.file_id, + file_unique_id=update.video.file_unique_id, + width=update.video.width, + height=update.video.height, + duration=update.video.duration, + file_name=update.video.file_name, + mime_type=update.video.mime_type, + thumbnail=thumbnail, + ) + ] + if update.animation is not None: + thumbnail = ( + Image(id=update.animation.thumbnail.file_id, file_unique_id=update.animation.thumbnail.file_unique_id) + if update.animation.thumbnail is not None + else None + ) + message.attachments += [ + Animation( + id=update.animation.file_id, + file_unique_id=update.animation.file_unique_id, + width=update.animation.width, + height=update.animation.height, + duration=update.animation.duration, + file_name=update.animation.file_name, + mime_type=update.animation.mime_type, + thumbnail=thumbnail, + ) + ] + if len(update.photo) > 0: + message.attachments += [ + Image( + id=picture.file_id, + file_unique_id=picture.file_unique_id, + width=picture.width, + height=picture.height, + ) + for picture in update.photo + ] + if update.document is not None: + thumbnail = ( + Image(id=update.document.thumbnail.file_id, file_unique_id=update.document.thumbnail.file_unique_id) + if update.document.thumbnail is not None + else None + ) + message.attachments += [ + Document( + id=update.document.file_id, + file_unique_id=update.document.file_unique_id, + file_name=update.document.file_name, + mime_type=update.document.mime_type, + thumbnail=thumbnail, + ) + ] + if update.voice is not None: + message.attachments += [ + VoiceMessage( + id=update.voice.file_id, + file_unique_id=update.voice.file_unique_id, + mime_type=update.voice.mime_type, + ) + ] + if update.video_note is not None: + thumbnail = ( + Image(id=update.video_note.thumbnail.file_id, file_unique_id=update.video_note.thumbnail.file_unique_id) + if update.video_note.thumbnail is not None + else None + ) + message.attachments += [ + VideoMessage( + id=update.video_note.file_id, + file_unique_id=update.video_note.file_unique_id, + thumbnail=thumbnail, + ) + ] + + return message + + async def cast_message_to_telegram_and_send(self, bot: ExtBot, chat_id: int, message: Message) -> None: + """ + Send Chatsky message to Telegram. + Sometimes, if several attachments included into message can not be sent as one update, + several Telegram updates will be produced. + Sometimes, if no text and none of the supported attachments are included, + nothing will happen. + + :param bot: Telegram bot, that is used for connection to Telegram API. + :param chat_id: Telegram dialog ID that the message will be sent to. + :param message: Chatsky message that will be processed into Telegram updates. + """ + + if message.text is not None: + await bot.send_message( + chat_id, + message.text, + **grab_extra_fields( + message, + [ + "parse_mode", + "disable_notification", + "protect_content", + "reply_markup", + "message_effect_id", + "reply_to_message_id", + "disable_web_page_preview", + ], + ), + ) + if message.attachments is not None: + for attachment in message.attachments: + if isinstance(attachment, Location): + await bot.send_location( + chat_id, + attachment.latitude, + attachment.longitude, + **grab_extra_fields( + attachment, + [ + "horizontal_accuracy", + "disable_notification", + "protect_content", + "reply_markup", + "message_effect_id", + "reply_to_message_id", + ], + ), + ) + elif isinstance(attachment, Contact): + await bot.send_contact( + chat_id, + attachment.phone_number, + attachment.first_name, + attachment.last_name, + **grab_extra_fields( + attachment, + [ + "vcard", + "disable_notification", + "protect_content", + "reply_markup", + "message_effect_id", + "reply_to_message_id", + ], + ), + ) + elif isinstance(attachment, Poll): + await bot.send_poll( + chat_id, + attachment.question, + [option.text for option in attachment.options], + **grab_extra_fields( + attachment, + [ + "is_anonymous", + "type", + "allows_multiple_answers", + "correct_option_id", + "explanation", + "explanation_parse_mode", + "open_period", + "is_closed", + "disable_notification", + "protect_content", + "reply_markup", + "question_parse_mode", + "message_effect_id", + "reply_to_message_id", + ], + ), + ) + elif isinstance(attachment, Audio): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_audio( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "caption", + "parse_mode", + "performer", + "title", + "disable_notification", + "protect_content", + "reply_markup", + "thumbnail", + "message_effect_id", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, Video): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_video( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "caption", + "parse_mode", + "supports_streaming", + "disable_notification", + "protect_content", + "reply_markup", + "has_spoiler", + "thumbnail", + "message_effect_id", + "show_caption_above_media", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, Animation): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_animation( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "caption", + "parse_mode", + "disable_notification", + "protect_content", + "reply_markup", + "has_spoiler", + "thumbnail", + "message_effect_id", + "show_caption_above_media", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, Image): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_photo( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "caption", + "parse_mode", + "disable_notification", + "protect_content", + "reply_markup", + "has_spoiler", + "message_effect_id", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, Sticker): + sticker = await attachment.get_bytes(self) if attachment.id is None else attachment.id + if sticker is not None: + await bot.send_sticker( + chat_id, + sticker, + **grab_extra_fields( + attachment, + [ + "emoji", + "disable_notification", + "protect_content", + "reply_markup", + "message_effect_id", + "reply_to_message_id", + ], + ), + ) + elif isinstance(attachment, Document): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_document( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "caption", + "parse_mode", + "disable_notification", + "protect_content", + "reply_markup", + "thumbnail", + "message_effect_id", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, VoiceMessage): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_voice( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "caption", + "parse_mode", + "disable_notification", + "protect_content", + "reply_markup", + "message_effect_id", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, VideoMessage): + attachment_bytes = await attachment.get_bytes(self) + if attachment_bytes is not None: + await bot.send_video_note( + chat_id, + attachment_bytes, + **grab_extra_fields( + attachment, + [ + "disable_notification", + "protect_content", + "reply_markup", + "thumbnail", + "message_effect_id", + "reply_to_message_id", + "filename", + ], + ), + ) + elif isinstance(attachment, MediaGroup): + files = list() + for media in attachment.group: + if isinstance(media, Image): + media_bytes = await media.get_bytes(self) + files += [ + InputMediaPhoto( + media_bytes, + **grab_extra_fields( + media, + [ + "filename", + "caption", + "parse_mode", + "has_spoiler", + "show_caption_above_media", + ], + ), + ), + ] + elif isinstance(media, Video): + media_bytes = await media.get_bytes(self) + files += [ + InputMediaVideo( + media_bytes, + **grab_extra_fields( + media, + [ + "filename", + "caption", + "parse_mode", + "supports_streaming", + "has_spoiler", + "thumbnail", + "show_caption_above_media", + ], + ), + ), + ] + elif isinstance(media, Animation): + media_bytes = await media.get_bytes(self) + files += [ + InputMediaAnimation( + media_bytes, + **grab_extra_fields( + media, + [ + "filename", + "caption", + "parse_mode", + "has_spoiler", + "thumbnail", + "show_caption_above_media", + ], + ), + ), + ] + elif isinstance(media, Audio): + media_bytes = await media.get_bytes(self) + files += [ + InputMediaAudio( + media_bytes, + **grab_extra_fields( + media, + ["filename", "caption", "parse_mode", "performer", "title", "thumbnail"], + ), + ), + ] + elif isinstance(media, Document): + media_bytes = await media.get_bytes(self) + files += [ + InputMediaDocument( + media_bytes, + **grab_extra_fields(media, ["filename", "caption", "parse_mode", "thumbnail"]), + ), + ] + else: + raise ValueError(f"Attachment {type(media).__name__} can not be sent in a media group!") + await bot.send_media_group( + chat_id, + files, + **grab_extra_fields( + attachment, + [ + "caption", + "disable_notification", + "protect_content", + "message_effect_id", + "reply_to_message_id", + "parse_mode", + ], + ), + ) + else: + raise ValueError(f"Attachment {type(attachment).__name__} is not supported!") + + async def _on_event(self, update: Update, _: Any, create_message: Callable[[Update], Message]) -> None: + """ + Process Telegram update, run pipeline and send response to Telegram. + + :param update: Telegram update that will be processed. + :param create_message: function that converts Telegram update to Chatsky message. + """ + + data_available = update.message is not None or update.callback_query is not None + if update.effective_chat is not None and data_available: + message = create_message(update) + message.original_message = update + resp = await self._pipeline_runner(message, update.effective_chat.id) + if resp.last_response is not None: + await self.cast_message_to_telegram_and_send( + self.application.bot, update.effective_chat.id, resp.last_response + ) + + async def on_message(self, update: Update, _: Any) -> None: + """ + Process normal Telegram update, extracting Chatsky message from it + using :py:meth:`~._AbstractTelegramInterface.extract_message_from_telegram`. + + :param update: Telegram update that will be processed. + """ + + await self._on_event(update, _, lambda s: self.extract_message_from_telegram(s.message)) + + async def on_callback(self, update: Update, _: Any) -> None: + """ + Process Telegram callback update, creating empty Chatsky message + with only one callback query attachment from `callback_query.data` field. + + :param update: Telegram update that will be processed. + """ + + await self._on_event( + update, _, lambda s: Message(attachments=[CallbackQuery(query_string=s.callback_query.data)]) + ) + + async def connect(self, pipeline_runner: PipelineRunnerFunction, *args, **kwargs): + self._pipeline_runner = pipeline_runner diff --git a/chatsky/messengers/telegram/interface.py b/chatsky/messengers/telegram/interface.py new file mode 100644 index 000000000..5015fbf2f --- /dev/null +++ b/chatsky/messengers/telegram/interface.py @@ -0,0 +1,65 @@ +""" +Telegram Interfaces +------------------- +This module provides concrete implementations of the +:py:class:`~._AbstractTelegramInterface`. +""" + +from pathlib import Path +from typing import Any, Optional + +from chatsky.pipeline.types import PipelineRunnerFunction + +from .abstract import _AbstractTelegramInterface + +try: + from telegram import Update +except ImportError: + Update = Any + + +class LongpollingInterface(_AbstractTelegramInterface): + """ + Telegram messenger interface, that requests Telegram API in a loop. + + :param token: The Telegram bot token. + :param attachments_directory: The directory for storing attachments. + :param interval: A time interval between polls (in seconds). + :param timeout: Timeout in seconds for long polling. + """ + + def __init__( + self, token: str, attachments_directory: Optional[Path] = None, interval: int = 2, timeout: int = 20 + ) -> None: + super().__init__(token, attachments_directory) + self.interval = interval + self.timeout = timeout + + async def connect(self, pipeline_runner: PipelineRunnerFunction, *args, **kwargs): + await super().connect(pipeline_runner, *args, **kwargs) + self.application.run_polling( + poll_interval=self.interval, timeout=self.timeout, allowed_updates=Update.ALL_TYPES + ) + + +class WebhookInterface(_AbstractTelegramInterface): + """ + Telegram messenger interface, that brings a special webserver up + and registers up for listening for Telegram updates. + + :param token: The Telegram bot token. + :param attachments_directory: The directory for storing attachments. + :param host: Local host name (or IP address). + :param port: Local port for running Telegram webhook. + """ + + def __init__( + self, token: str, attachments_directory: Optional[Path] = None, host: str = "localhost", port: int = 844 + ): + super().__init__(token, attachments_directory) + self.listen = host + self.port = port + + async def connect(self, pipeline_runner: PipelineRunnerFunction, *args, **kwargs): + await super().connect(pipeline_runner, *args, **kwargs) + self.application.run_webhook(listen=self.listen, port=self.port, allowed_updates=Update.ALL_TYPES) diff --git a/chatsky/msg.py b/chatsky/msg.py new file mode 100644 index 000000000..4fbb3ba5f --- /dev/null +++ b/chatsky/msg.py @@ -0,0 +1,14 @@ +from chatsky.script.core.message import ( + Location, + Attachment, + Audio, + Video, + Image, + Document, + # Attachments, + # Link, + # Button, + # Keyboard, + Message, + # MultiMessage +) diff --git a/dff/pipeline/__init__.py b/chatsky/pipeline/__init__.py similarity index 92% rename from dff/pipeline/__init__.py rename to chatsky/pipeline/__init__.py index 1b345f647..4fbe2286f 100644 --- a/dff/pipeline/__init__.py +++ b/chatsky/pipeline/__init__.py @@ -13,7 +13,6 @@ ComponentExecutionState, GlobalExtraHandlerType, ExtraHandlerType, - PIPELINE_STATE_KEY, StartConditionCheckerFunction, StartConditionCheckerAggregationFunction, ExtraHandlerConditionFunction, @@ -32,5 +31,3 @@ from .service.extra import BeforeHandler, AfterHandler from .service.group import ServiceGroup from .service.service import Service, to_service - -ExtraHandlerRuntimeInfo.model_rebuild() diff --git a/dff/pipeline/conditions.py b/chatsky/pipeline/conditions.py similarity index 93% rename from dff/pipeline/conditions.py rename to chatsky/pipeline/conditions.py index 6b10d7099..01a5acb45 100644 --- a/dff/pipeline/conditions.py +++ b/chatsky/pipeline/conditions.py @@ -5,20 +5,20 @@ are attached should be executed or not. The standard set of them allows user to setup dependencies between pipeline components. """ + from __future__ import annotations from typing import Optional, TYPE_CHECKING -from dff.script import Context +from chatsky.script import Context from .types import ( - PIPELINE_STATE_KEY, StartConditionCheckerFunction, ComponentExecutionState, StartConditionCheckerAggregationFunction, ) if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline def always_start_condition(_: Context, __: Pipeline) -> bool: @@ -40,7 +40,7 @@ def service_successful_condition(path: Optional[str] = None) -> StartConditionCh """ def check_service_state(ctx: Context, _: Pipeline): - state = ctx.framework_states[PIPELINE_STATE_KEY].get(path, ComponentExecutionState.NOT_RUN) + state = ctx.framework_data.service_states.get(path, ComponentExecutionState.NOT_RUN) return ComponentExecutionState[state] == ComponentExecutionState.FINISHED return check_service_state diff --git a/dff/pipeline/pipeline/__init__.py b/chatsky/pipeline/pipeline/__init__.py similarity index 100% rename from dff/pipeline/pipeline/__init__.py rename to chatsky/pipeline/pipeline/__init__.py diff --git a/dff/pipeline/pipeline/actor.py b/chatsky/pipeline/pipeline/actor.py similarity index 62% rename from dff/pipeline/pipeline/actor.py rename to chatsky/pipeline/pipeline/actor.py index fdddf542a..6f0256885 100644 --- a/dff/pipeline/pipeline/actor.py +++ b/chatsky/pipeline/pipeline/actor.py @@ -20,65 +20,50 @@ Below you can see a diagram of user request processing with Actor. Both `request` and `response` are saved to :py:class:`.Context`. -.. figure:: /_static/drawio/dfe/user_actor.png +.. figure:: /_static/drawio/core/user_actor.png """ + from __future__ import annotations import logging import asyncio from typing import Union, Callable, Optional, Dict, List, TYPE_CHECKING import copy -from dff.utils.turn_caching import cache_clear -from dff.script.core.types import ActorStage, NodeLabel2Type, NodeLabel3Type, LabelType -from dff.script.core.message import Message +from chatsky.utils.turn_caching import cache_clear +from chatsky.script.core.types import ActorStage, NodeLabel2Type, NodeLabel3Type, LabelType +from chatsky.script.core.message import Message -from dff.script.core.context import Context -from dff.script.core.script import Script, Node -from dff.script.core.normalization import normalize_label, normalize_response -from dff.script.core.keywords import GLOBAL, LOCAL -from dff.pipeline.service.utils import wrap_sync_function_in_async +from chatsky.script.core.context import Context +from chatsky.script.core.script import Script, Node +from chatsky.script.core.normalization import normalize_label, normalize_response +from chatsky.script.core.keywords import GLOBAL, LOCAL +from chatsky.utils.devel.async_helpers import wrap_sync_function_in_async logger = logging.getLogger(__name__) if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline - - -def error_handler(error_msgs: list, msg: str, exception: Optional[Exception] = None, logging_flag: bool = True): - """ - This function handles errors during :py:class:`~dff.script.Script` validation. - - :param error_msgs: List that contains error messages. :py:func:`~dff.script.error_handler` - adds every next error message to that list. - :param msg: Error message which is to be added into `error_msgs`. - :param exception: Invoked exception. If it has been set, it is used to obtain logging traceback. - Defaults to `None`. - :param logging_flag: The flag which defines whether logging is necessary. Defaults to `True`. - """ - error_msgs.append(msg) - if logging_flag: - logger.error(msg, exc_info=exception) + from chatsky.pipeline.pipeline.pipeline import Pipeline class Actor: """ - The class which is used to process :py:class:`~dff.script.Context` - according to the :py:class:`~dff.script.Script`. + The class which is used to process :py:class:`~chatsky.script.Context` + according to the :py:class:`~chatsky.script.Script`. :param script: The dialog scenario: a graph described by the :py:class:`.Keywords`. While the graph is being initialized, it is validated and then used for the dialog. - :param start_label: The start node of :py:class:`~dff.script.Script`. The execution begins with it. - :param fallback_label: The label of :py:class:`~dff.script.Script`. + :param start_label: The start node of :py:class:`~chatsky.script.Script`. The execution begins with it. + :param fallback_label: The label of :py:class:`~chatsky.script.Script`. Dialog comes into that label if all other transitions failed, or there was an error while executing the scenario. Defaults to `None`. - :param label_priority: Default priority value for all :py:const:`labels ` + :param label_priority: Default priority value for all :py:const:`labels ` where there is no priority. Defaults to `1.0`. :param condition_handler: Handler that processes a call of condition functions. Defaults to `None`. :param handlers: This variable is responsible for the usage of external handlers on - the certain stages of work of :py:class:`~dff.script.Actor`. + the certain stages of work of :py:class:`~chatsky.script.Actor`. - - key (:py:class:`~dff.script.ActorStage`) - Stage in which the handler is called. + - key (:py:class:`~chatsky.script.ActorStage`) - Stage in which the handler is called. - value (List[Callable]) - The list of called handlers for each stage. Defaults to an empty `dict`. """ @@ -91,11 +76,9 @@ def __init__( condition_handler: Optional[Callable] = None, handlers: Optional[Dict[ActorStage, List[Callable]]] = None, ): - # script validation self.script = script if isinstance(script, Script) else Script(script=script) self.label_priority = label_priority - # node labels validation self.start_label = normalize_label(start_label) if self.script.get(self.start_label[0], {}).get(self.start_label[1]) is None: raise ValueError(f"Unknown start_label={self.start_label}") @@ -114,8 +97,6 @@ def __init__( self._clean_turn_cache = True async def __call__(self, pipeline: Pipeline, ctx: Context): - # context init - self._context_init(ctx) await self._run_handlers(ctx, pipeline, ActorStage.CONTEXT_INIT) # get previous node @@ -138,7 +119,7 @@ async def __call__(self, pipeline: Pipeline, ctx: Context): self._get_next_node(ctx) await self._run_handlers(ctx, pipeline, ActorStage.GET_NEXT_NODE) - ctx.add_label(ctx.framework_states["actor"]["next_label"][:2]) + ctx.add_label(ctx.framework_data.actor_data["next_label"][:2]) # rewrite next node self._rewrite_next_node(ctx) @@ -149,89 +130,85 @@ async def __call__(self, pipeline: Pipeline, ctx: Context): await self._run_handlers(ctx, pipeline, ActorStage.RUN_PRE_RESPONSE_PROCESSING) # create response - ctx.framework_states["actor"]["response"] = await self.run_response( - ctx.framework_states["actor"]["pre_response_processed_node"].response, ctx, pipeline + ctx.framework_data.actor_data["response"] = await self.run_response( + ctx.framework_data.actor_data["pre_response_processed_node"].response, ctx, pipeline ) await self._run_handlers(ctx, pipeline, ActorStage.CREATE_RESPONSE) - ctx.add_response(ctx.framework_states["actor"]["response"]) + ctx.add_response(ctx.framework_data.actor_data["response"]) await self._run_handlers(ctx, pipeline, ActorStage.FINISH_TURN) if self._clean_turn_cache: cache_clear() - del ctx.framework_states["actor"] - - @staticmethod - def _context_init(ctx: Optional[Union[Context, dict, str]] = None): - ctx.framework_states["actor"] = {} + ctx.framework_data.actor_data.clear() def _get_previous_node(self, ctx: Context): - ctx.framework_states["actor"]["previous_label"] = ( + ctx.framework_data.actor_data["previous_label"] = ( normalize_label(ctx.last_label) if ctx.last_label else self.start_label ) - ctx.framework_states["actor"]["previous_node"] = self.script.get( - ctx.framework_states["actor"]["previous_label"][0], {} - ).get(ctx.framework_states["actor"]["previous_label"][1], Node()) + ctx.framework_data.actor_data["previous_node"] = self.script.get( + ctx.framework_data.actor_data["previous_label"][0], {} + ).get(ctx.framework_data.actor_data["previous_label"][1], Node()) async def _get_true_labels(self, ctx: Context, pipeline: Pipeline): # GLOBAL - ctx.framework_states["actor"]["global_transitions"] = ( + ctx.framework_data.actor_data["global_transitions"] = ( self.script.get(GLOBAL, {}).get(GLOBAL, Node()).transitions ) - ctx.framework_states["actor"]["global_true_label"] = await self._get_true_label( - ctx.framework_states["actor"]["global_transitions"], ctx, pipeline, GLOBAL, "global" + ctx.framework_data.actor_data["global_true_label"] = await self._get_true_label( + ctx.framework_data.actor_data["global_transitions"], ctx, pipeline, GLOBAL, "global" ) # LOCAL - ctx.framework_states["actor"]["local_transitions"] = ( - self.script.get(ctx.framework_states["actor"]["previous_label"][0], {}).get(LOCAL, Node()).transitions + ctx.framework_data.actor_data["local_transitions"] = ( + self.script.get(ctx.framework_data.actor_data["previous_label"][0], {}).get(LOCAL, Node()).transitions ) - ctx.framework_states["actor"]["local_true_label"] = await self._get_true_label( - ctx.framework_states["actor"]["local_transitions"], + ctx.framework_data.actor_data["local_true_label"] = await self._get_true_label( + ctx.framework_data.actor_data["local_transitions"], ctx, pipeline, - ctx.framework_states["actor"]["previous_label"][0], + ctx.framework_data.actor_data["previous_label"][0], "local", ) # NODE - ctx.framework_states["actor"]["node_transitions"] = ctx.framework_states["actor"][ + ctx.framework_data.actor_data["node_transitions"] = ctx.framework_data.actor_data[ "pre_transitions_processed_node" ].transitions - ctx.framework_states["actor"]["node_true_label"] = await self._get_true_label( - ctx.framework_states["actor"]["node_transitions"], + ctx.framework_data.actor_data["node_true_label"] = await self._get_true_label( + ctx.framework_data.actor_data["node_transitions"], ctx, pipeline, - ctx.framework_states["actor"]["previous_label"][0], + ctx.framework_data.actor_data["previous_label"][0], "node", ) def _get_next_node(self, ctx: Context): # choose next label - ctx.framework_states["actor"]["next_label"] = self._choose_label( - ctx.framework_states["actor"]["node_true_label"], ctx.framework_states["actor"]["local_true_label"] + ctx.framework_data.actor_data["next_label"] = self._choose_label( + ctx.framework_data.actor_data["node_true_label"], ctx.framework_data.actor_data["local_true_label"] ) - ctx.framework_states["actor"]["next_label"] = self._choose_label( - ctx.framework_states["actor"]["next_label"], ctx.framework_states["actor"]["global_true_label"] + ctx.framework_data.actor_data["next_label"] = self._choose_label( + ctx.framework_data.actor_data["next_label"], ctx.framework_data.actor_data["global_true_label"] ) # get next node - ctx.framework_states["actor"]["next_node"] = self.script.get( - ctx.framework_states["actor"]["next_label"][0], {} - ).get(ctx.framework_states["actor"]["next_label"][1]) + ctx.framework_data.actor_data["next_node"] = self.script.get( + ctx.framework_data.actor_data["next_label"][0], {} + ).get(ctx.framework_data.actor_data["next_label"][1]) def _rewrite_previous_node(self, ctx: Context): - node = ctx.framework_states["actor"]["previous_node"] - flow_label = ctx.framework_states["actor"]["previous_label"][0] - ctx.framework_states["actor"]["previous_node"] = self._overwrite_node( + node = ctx.framework_data.actor_data["previous_node"] + flow_label = ctx.framework_data.actor_data["previous_label"][0] + ctx.framework_data.actor_data["previous_node"] = self._overwrite_node( node, flow_label, only_current_node_transitions=True, ) def _rewrite_next_node(self, ctx: Context): - node = ctx.framework_states["actor"]["next_node"] - flow_label = ctx.framework_states["actor"]["next_label"][0] - ctx.framework_states["actor"]["next_node"] = self._overwrite_node(node, flow_label) + node = ctx.framework_data.actor_data["next_node"] + flow_label = ctx.framework_data.actor_data["next_label"][0] + ctx.framework_data.actor_data["next_node"] = self._overwrite_node(node, flow_label) def _overwrite_node( self, @@ -307,18 +284,18 @@ async def _run_pre_transitions_processing(self, ctx: Context, pipeline: Pipeline The execution order depends on the value of the :py:class:`.Pipeline`'s `parallelize_processing` flag. """ - ctx.framework_states["actor"]["processed_node"] = copy.deepcopy(ctx.framework_states["actor"]["previous_node"]) - pre_transitions_processing = ctx.framework_states["actor"]["previous_node"].pre_transitions_processing + ctx.framework_data.actor_data["processed_node"] = copy.deepcopy(ctx.framework_data.actor_data["previous_node"]) + pre_transitions_processing = ctx.framework_data.actor_data["previous_node"].pre_transitions_processing if pipeline.parallelize_processing: await self._run_processing_parallel(pre_transitions_processing, ctx, pipeline) else: await self._run_processing_sequential(pre_transitions_processing, ctx, pipeline) - ctx.framework_states["actor"]["pre_transitions_processed_node"] = ctx.framework_states["actor"][ + ctx.framework_data.actor_data["pre_transitions_processed_node"] = ctx.framework_data.actor_data[ "processed_node" ] - del ctx.framework_states["actor"]["processed_node"] + del ctx.framework_data.actor_data["processed_node"] async def _run_pre_response_processing(self, ctx: Context, pipeline: Pipeline) -> None: """ @@ -329,16 +306,16 @@ async def _run_pre_response_processing(self, ctx: Context, pipeline: Pipeline) - The execution order depends on the value of the :py:class:`.Pipeline`'s `parallelize_processing` flag. """ - ctx.framework_states["actor"]["processed_node"] = copy.deepcopy(ctx.framework_states["actor"]["next_node"]) - pre_response_processing = ctx.framework_states["actor"]["next_node"].pre_response_processing + ctx.framework_data.actor_data["processed_node"] = copy.deepcopy(ctx.framework_data.actor_data["next_node"]) + pre_response_processing = ctx.framework_data.actor_data["next_node"].pre_response_processing if pipeline.parallelize_processing: await self._run_processing_parallel(pre_response_processing, ctx, pipeline) else: await self._run_processing_sequential(pre_response_processing, ctx, pipeline) - ctx.framework_states["actor"]["pre_response_processed_node"] = ctx.framework_states["actor"]["processed_node"] - del ctx.framework_states["actor"]["processed_node"] + ctx.framework_data.actor_data["pre_response_processed_node"] = ctx.framework_data.actor_data["processed_node"] + del ctx.framework_data.actor_data["processed_node"] async def _get_true_label( self, @@ -388,70 +365,6 @@ def _choose_label( chosen_label = self.fallback_label return chosen_label - def validate_script(self, pipeline: Pipeline, verbose: bool = True): - # TODO: script has to not contain priority == -inf, because it uses for miss values - flow_labels = [] - node_labels = [] - labels = [] - conditions = [] - for flow_name, flow in self.script.items(): - for node_name, node in flow.items(): - flow_labels += [flow_name] * len(node.transitions) - node_labels += [node_name] * len(node.transitions) - labels += list(node.transitions.keys()) - conditions += list(node.transitions.values()) - - error_msgs = [] - for flow_label, node_label, label, condition in zip(flow_labels, node_labels, labels, conditions): - ctx = Context() - ctx.validation = True - ctx.add_request(Message("text")) - - label = label(ctx, pipeline) if callable(label) else normalize_label(label, flow_label) - - # validate labeling - try: - node = self.script[label[0]][label[1]] - except Exception as exc: - msg = ( - f"Could not find node with label={label}, " - f"error was found in (flow_label, node_label)={(flow_label, node_label)}" - ) - error_handler(error_msgs, msg, exc, verbose) - break - - # validate responsing - response_func = normalize_response(node.response) - try: - response_result = asyncio.run(wrap_sync_function_in_async(response_func, ctx, pipeline)) - if not isinstance(response_result, Message): - msg = ( - "Expected type of response_result is `Message`.\n" - + f"Got type(response_result)={type(response_result)}" - f" for label={label} , error was found in (flow_label, node_label)={(flow_label, node_label)}" - ) - error_handler(error_msgs, msg, None, verbose) - continue - except Exception as exc: - msg = ( - f"Got exception '''{exc}''' during response execution " - f"for label={label} and node.response={node.response}" - f", error was found in (flow_label, node_label)={(flow_label, node_label)}" - ) - error_handler(error_msgs, msg, exc, verbose) - continue - - # validate conditioning - try: - condition_result = condition(ctx, pipeline) - if not isinstance(condition(ctx, pipeline), bool): - raise Exception(f"Returned condition_result={condition_result}, but expected bool type") - except Exception as exc: - msg = f"Got exception '''{exc}''' during condition execution for label={label}" - error_handler(error_msgs, msg, exc, verbose) - continue - return error_msgs - async def default_condition_handler( condition: Callable, ctx: Context, pipeline: Pipeline diff --git a/dff/pipeline/pipeline/component.py b/chatsky/pipeline/pipeline/component.py similarity index 92% rename from dff/pipeline/pipeline/component.py rename to chatsky/pipeline/pipeline/component.py index 587b39c80..ab37426ea 100644 --- a/dff/pipeline/pipeline/component.py +++ b/chatsky/pipeline/pipeline/component.py @@ -8,19 +8,18 @@ The PipelineComponent class can be a group or a service. It is designed to be reusable and composable, allowing developers to create complex processing pipelines by combining multiple components. """ + from __future__ import annotations import logging import abc import asyncio -import copy from typing import Optional, Awaitable, TYPE_CHECKING -from dff.script import Context +from chatsky.script import Context from ..service.extra import BeforeHandler, AfterHandler from ..conditions import always_start_condition from ..types import ( - PIPELINE_STATE_KEY, StartConditionCheckerFunction, ComponentExecutionState, ServiceRuntimeInfo, @@ -33,7 +32,7 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline class PipelineComponent(abc.ABC): @@ -94,7 +93,7 @@ def __init__( self.path = path """ Dot-separated path to component (is universally unique). - This attribute is set in :py:func:`~dff.pipeline.pipeline.utils.finalize_service_group`. + This attribute is set in :py:func:`~chatsky.pipeline.pipeline.utils.finalize_service_group`. """ self.before_handler = BeforeHandler([] if before_handler is None else before_handler) @@ -108,28 +107,24 @@ def __init__( def _set_state(self, ctx: Context, value: ComponentExecutionState): """ - Method for component runtime state setting, state is preserved in `ctx.framework_states` dict, - in subdict, dedicated to this library. + Method for component runtime state setting, state is preserved in `ctx.framework_data`. :param ctx: :py:class:`~.Context` to keep state in. :param value: State to set. :return: `None` """ - if PIPELINE_STATE_KEY not in ctx.framework_states: - ctx.framework_states[PIPELINE_STATE_KEY] = {} - ctx.framework_states[PIPELINE_STATE_KEY][self.path] = value + ctx.framework_data.service_states[self.path] = value def get_state(self, ctx: Context, default: Optional[ComponentExecutionState] = None) -> ComponentExecutionState: """ - Method for component runtime state getting, state is preserved in `ctx.framework_states` dict, - in subdict, dedicated to this library. + Method for component runtime state getting, state is preserved in `ctx.framework_data`. :param ctx: :py:class:`~.Context` to get state from. :param default: Default to return if no record found (usually it's :py:attr:`~.pipeline.types.ComponentExecutionState.NOT_RUN`). :return: :py:class:`~pipeline.types.ComponentExecutionState` of this service or default if not found. """ - return ctx.framework_states[PIPELINE_STATE_KEY].get(self.path, default if default is not None else None) + return ctx.framework_data.service_states.get(self.path, default if default is not None else None) @property def asynchronous(self) -> bool: @@ -209,7 +204,7 @@ def _get_runtime_info(self, ctx: Context) -> ServiceRuntimeInfo: Method for retrieving runtime info about this component. :param ctx: Current dialog :py:class:`~.Context`. - :return: :py:class:`~.dff.script.typing.ServiceRuntimeInfo` + :return: :py:class:`~.chatsky.script.typing.ServiceRuntimeInfo` object where all not set fields are replaced with `[None]`. """ return ServiceRuntimeInfo( @@ -217,7 +212,7 @@ def _get_runtime_info(self, ctx: Context) -> ServiceRuntimeInfo: path=self.path if self.path is not None else "[None]", timeout=self.timeout, asynchronous=self.asynchronous, - execution_state=copy.deepcopy(ctx.framework_states[PIPELINE_STATE_KEY]), + execution_state=ctx.framework_data.service_states.copy(), ) @property diff --git a/dff/pipeline/pipeline/pipeline.py b/chatsky/pipeline/pipeline/pipeline.py similarity index 83% rename from dff/pipeline/pipeline/pipeline.py rename to chatsky/pipeline/pipeline/pipeline.py index 6ddddeb94..20ec93b99 100644 --- a/dff/pipeline/pipeline/pipeline.py +++ b/chatsky/pipeline/pipeline/pipeline.py @@ -2,7 +2,7 @@ Pipeline -------- The Pipeline module contains the :py:class:`.Pipeline` class, -which is a fundamental element of the DFF. The Pipeline class is responsible +which is a fundamental element of Chatsky. The Pipeline class is responsible for managing and executing the various components (:py:class:`.PipelineComponent`)which make up the processing of messages from and to users. It provides a way to organize and structure the messages processing flow. @@ -13,16 +13,19 @@ class, which is defined in the Component module. Together, these classes provide a powerful and flexible way to structure and manage the messages processing flow. """ + import asyncio import logging from typing import Union, List, Dict, Optional, Hashable, Callable -from dff.context_storages import DBContextStorage -from dff.script import Script, Context, ActorStage -from dff.script import NodeLabel2Type, Message -from dff.utils.turn_caching import cache_clear +from chatsky.context_storages import DBContextStorage +from chatsky.script import Script, Context, ActorStage +from chatsky.script import NodeLabel2Type, Message +from chatsky.utils.turn_caching import cache_clear -from dff.messengers.common import MessengerInterface, CLIMessengerInterface +from chatsky.messengers.console import CLIMessengerInterface +from chatsky.messengers.common import MessengerInterface +from chatsky.slots.slots import GroupSlot from ..service.group import ServiceGroup from ..types import ( ServiceBuilder, @@ -32,10 +35,9 @@ ExtraHandlerFunction, ExtraHandlerBuilder, ) -from ..types import PIPELINE_STATE_KEY from .utils import finalize_service_group, pretty_format_component_info_dict -from dff.pipeline.pipeline.actor import Actor -from dff.pipeline.pipeline.script_parsing import JSONImporter, Path +from chatsky.pipeline.pipeline.actor import Actor +from chatsky.pipeline.pipeline.script_parsing import JSONImporter, Path logger = logging.getLogger(__name__) @@ -47,28 +49,29 @@ class Pipeline: Class that automates service execution and creates service pipeline. It accepts constructor parameters: + :param components: (required) A :py:data:`~.ServiceGroupBuilder` object, + that will be transformed to root service group. It should include :py:class:`~.Actor`, + but only once (raises exception otherwise). It will always be named pipeline. :param script: (required) A :py:class:`~.Script` instance (object or dict). :param start_label: (required) Actor start label. :param fallback_label: Actor fallback label. - :param label_priority: Default priority value for all actor :py:const:`labels ` + :param label_priority: Default priority value for all actor :py:const:`labels ` where there is no priority. Defaults to `1.0`. - :param validation_stage: This flag sets whether the validation stage is executed after actor creation. - It is executed by default. Defaults to `None`. :param condition_handler: Handler that processes a call of actor condition functions. Defaults to `None`. - :param verbose: If it is `True`, logging is used in actor. Defaults to `True`. + :param slots: Slots configuration. :param handlers: This variable is responsible for the usage of external handlers on - the certain stages of work of :py:class:`~dff.script.Actor`. + the certain stages of work of :py:class:`~chatsky.script.Actor`. - - key: :py:class:`~dff.script.ActorStage` - Stage in which the handler is called. + - key: :py:class:`~chatsky.script.ActorStage` - Stage in which the handler is called. - value: List[Callable] - The list of called handlers for each stage. Defaults to an empty `dict`. :param messenger_interface: An `AbsMessagingInterface` instance for this pipeline. :param context_storage: An :py:class:`~.DBContextStorage` instance for this pipeline or a dict to store dialog :py:class:`~.Context`. - :param services: (required) A :py:data:`~.ServiceGroupBuilder` object, - that will be transformed to root service group. It should include :py:class:`~.Actor`, - but only once (raises exception otherwise). It will always be named pipeline. - :param wrappers: List of wrappers to add to pipeline root service group. + :param before_handler: List of `ExtraHandlerBuilder` to add to the group. + :type before_handler: Optional[:py:data:`~.ExtraHandlerBuilder`] + :param after_handler: List of `ExtraHandlerBuilder` to add to the group. + :type after_handler: Optional[:py:data:`~.ExtraHandlerBuilder`] :param timeout: Timeout to add to pipeline root service group. :param optimization_warnings: Asynchronous pipeline optimization check request flag; warnings will be sent to logs. Additionally it has some calculated fields: @@ -88,9 +91,8 @@ def __init__( start_label: NodeLabel2Type, fallback_label: Optional[NodeLabel2Type] = None, label_priority: float = 1.0, - validation_stage: Optional[bool] = None, condition_handler: Optional[Callable] = None, - verbose: bool = True, + slots: Optional[Union[GroupSlot, Dict]] = None, handlers: Optional[Dict[ActorStage, List[Callable]]] = None, messenger_interface: Optional[MessengerInterface] = None, context_storage: Optional[Union[DBContextStorage, Dict]] = None, @@ -103,6 +105,7 @@ def __init__( self.actor: Actor = None self.messenger_interface = CLIMessengerInterface() if messenger_interface is None else messenger_interface self.context_storage = {} if context_storage is None else context_storage + self.slots = GroupSlot.model_validate(slots) if slots is not None else None self._services_pipeline = ServiceGroup( components, before_handler=before_handler, @@ -121,9 +124,7 @@ def __init__( start_label, fallback_label, label_priority, - validation_stage, condition_handler, - verbose, handlers, ) if self.actor is None: @@ -211,9 +212,8 @@ def from_script( start_label: NodeLabel2Type, fallback_label: Optional[NodeLabel2Type] = None, label_priority: float = 1.0, - validation_stage: Optional[bool] = None, condition_handler: Optional[Callable] = None, - verbose: bool = True, + slots: Optional[Union[GroupSlot, Dict]] = None, parallelize_processing: bool = False, handlers: Optional[Dict[ActorStage, List[Callable]]] = None, context_storage: Optional[Union[DBContextStorage, Dict]] = None, @@ -232,19 +232,17 @@ def from_script( :param script: (required) A :py:class:`~.Script` instance (object or dict). :param start_label: (required) Actor start label. :param fallback_label: Actor fallback label. - :param label_priority: Default priority value for all actor :py:const:`labels ` + :param label_priority: Default priority value for all actor :py:const:`labels ` where there is no priority. Defaults to `1.0`. - :param validation_stage: This flag sets whether the validation stage is executed after actor creation. - It is executed by default. Defaults to `None`. :param condition_handler: Handler that processes a call of actor condition functions. Defaults to `None`. - :param verbose: If it is `True`, logging is used in actor. Defaults to `True`. + :param slots: Slots configuration. :param parallelize_processing: This flag determines whether or not the functions defined in the ``PRE_RESPONSE_PROCESSING`` and ``PRE_TRANSITIONS_PROCESSING`` sections of the script should be parallelized over respective groups. :param handlers: This variable is responsible for the usage of external handlers on - the certain stages of work of :py:class:`~dff.script.Actor`. + the certain stages of work of :py:class:`~chatsky.script.Actor`. - - key: :py:class:`~dff.script.ActorStage` - Stage in which the handler is called. + - key: :py:class:`~chatsky.script.ActorStage` - Stage in which the handler is called. - value: List[Callable] - The list of called handlers for each stage. Defaults to an empty `dict`. :param context_storage: An :py:class:`~.DBContextStorage` instance for this pipeline @@ -265,9 +263,8 @@ def from_script( start_label=start_label, fallback_label=fallback_label, label_priority=label_priority, - validation_stage=validation_stage, condition_handler=condition_handler, - verbose=verbose, + slots=slots, parallelize_processing=parallelize_processing, handlers=handlers, messenger_interface=messenger_interface, @@ -279,9 +276,9 @@ def from_script( def from_file( cls, file: Union[str, Path], - validation_stage: Optional[bool] = None, + # validation_stage: Optional[bool] = None, condition_handler: Optional[Callable] = None, - verbose: bool = True, + # verbose: bool = True, parallelize_processing: bool = False, handlers: Optional[Dict[ActorStage, List[Callable]]] = None, context_storage: Optional[Union[DBContextStorage, Dict]] = None, @@ -302,9 +299,9 @@ def to_tuple(i): return cls( script=script, **params, - validation_stage=validation_stage, + # validation_stage=validation_stage, condition_handler=condition_handler, - verbose=verbose, + # verbose=verbose, parallelize_processing=parallelize_processing, handlers=handlers, messenger_interface=messenger_interface, @@ -318,9 +315,7 @@ def set_actor( start_label: NodeLabel2Type, fallback_label: Optional[NodeLabel2Type] = None, label_priority: float = 1.0, - validation_stage: Optional[bool] = None, condition_handler: Optional[Callable] = None, - verbose: bool = True, handlers: Optional[Dict[ActorStage, List[Callable]]] = None, ): """ @@ -329,30 +324,20 @@ def set_actor( :param script: (required) A :py:class:`~.Script` instance (object or dict). :param start_label: (required) Actor start label. - The start node of :py:class:`~dff.script.Script`. The execution begins with it. - :param fallback_label: Actor fallback label. The label of :py:class:`~dff.script.Script`. + The start node of :py:class:`~chatsky.script.Script`. The execution begins with it. + :param fallback_label: Actor fallback label. The label of :py:class:`~chatsky.script.Script`. Dialog comes into that label if all other transitions failed, or there was an error while executing the scenario. - :param label_priority: Default priority value for all actor :py:const:`labels ` + :param label_priority: Default priority value for all actor :py:const:`labels ` where there is no priority. Defaults to `1.0`. - :param validation_stage: This flag sets whether the validation stage is executed in actor. - It is executed by default. Defaults to `None`. :param condition_handler: Handler that processes a call of actor condition functions. Defaults to `None`. - :param verbose: If it is `True`, logging is used in actor. Defaults to `True`. :param handlers: This variable is responsible for the usage of external handlers on - the certain stages of work of :py:class:`~dff.script.Actor`. + the certain stages of work of :py:class:`~chatsky.script.Actor`. - - key :py:class:`~dff.script.ActorStage` - Stage in which the handler is called. + - key :py:class:`~chatsky.script.ActorStage` - Stage in which the handler is called. - value List[Callable] - The list of called handlers for each stage. Defaults to an empty `dict`. """ - old_actor = self.actor self.actor = Actor(script, start_label, fallback_label, label_priority, condition_handler, handlers) - errors = self.actor.validate_script(self, verbose) if validation_stage is not False else [] - if errors: - self.actor = old_actor - raise ValueError( - f"Found {len(errors)} errors: " + " ".join([f"{i}) {er}" for i, er in enumerate(errors, 1)]) - ) @classmethod def from_dict(cls, dictionary: PipelineBuilder) -> "Pipeline": @@ -368,7 +353,7 @@ async def _run_pipeline( ) -> Context: """ Method that should be invoked on user input. - This method has the same signature as :py:class:`~dff.pipeline.types.PipelineRunnerFunction`. + This method has the same signature as :py:class:`~chatsky.pipeline.types.PipelineRunnerFunction`. """ if ctx_id is None: ctx = Context() @@ -380,14 +365,16 @@ async def _run_pipeline( if update_ctx_misc is not None: ctx.misc.update(update_ctx_misc) - ctx.framework_states[PIPELINE_STATE_KEY] = {} + if self.slots is not None: + ctx.framework_data.slot_manager.set_root_slot(self.slots) + ctx.add_request(request) result = await self._services_pipeline(ctx, self) if asyncio.iscoroutine(result): await result - del ctx.framework_states[PIPELINE_STATE_KEY] + ctx.framework_data.service_states.clear() if isinstance(self.context_storage, DBContextStorage): await self.context_storage.set_item_async(ctx_id, ctx) @@ -416,7 +403,7 @@ def __call__( Basically, it is a shortcut for `_run_pipeline`. NB! When pipeline is executed this way, `messenger_interface` won't be initiated nor connected. - This method has the same signature as :py:class:`~dff.pipeline.types.PipelineRunnerFunction`. + This method has the same signature as :py:class:`~chatsky.pipeline.types.PipelineRunnerFunction`. """ return asyncio.run(self._run_pipeline(request, ctx_id, update_ctx_misc)) diff --git a/dff/pipeline/pipeline/script_parsing.py b/chatsky/pipeline/pipeline/script_parsing.py similarity index 92% rename from dff/pipeline/pipeline/script_parsing.py rename to chatsky/pipeline/pipeline/script_parsing.py index 23119b99b..35bc35adc 100644 --- a/dff/pipeline/pipeline/script_parsing.py +++ b/chatsky/pipeline/pipeline/script_parsing.py @@ -24,7 +24,7 @@ class JSONImportError(Exception): class JSONImporter: - DFF_NAMESPACE_PREFIX = "dff" + CHATSKY_NAMESPACE_PREFIX = "chatsky" CUSTOM_DIR_NAMESPACE_PREFIX = "custom_dir" CUSTOM_DIR_CONFIG_OPTION = "custom_dir" TRANSITIONS_KEY = "TRANSITIONS" @@ -72,7 +72,7 @@ def __init__(self, file: Union[str, Path]): @staticmethod def is_resolvable(value: str) -> bool: - return value.startswith(JSONImporter.DFF_NAMESPACE_PREFIX + ".") or\ + return value.startswith(JSONImporter.CHATSKY_NAMESPACE_PREFIX + ".") or\ value.startswith(JSONImporter.CUSTOM_DIR_NAMESPACE_PREFIX + ".") def import_custom_module(self, module_name: str, paths: Optional[Sequence[str]] = None): @@ -111,7 +111,7 @@ def import_custom_module(self, module_name: str, paths: Optional[Sequence[str]] def resolve_target_object(self, obj: str): module_name, _, obj_name = obj.rpartition(".") - if obj.startswith(self.DFF_NAMESPACE_PREFIX): + if obj.startswith(self.CHATSKY_NAMESPACE_PREFIX): module = importlib.import_module(module_name) else: module = self.import_custom_module(module_name) @@ -122,10 +122,10 @@ def import_script(self): def replace_obj(self, obj: JsonValue): if not isinstance(obj, dict): - raise JSONImportError(f"DFF object has to be a dictionary: {obj}") + raise JSONImportError(f"Chatsky object has to be a dictionary: {obj}") keys = obj.keys() if len(keys) != 1: - raise JSONImportError(f"DFF object has to have only 1 key: {obj.keys()}") + raise JSONImportError(f"Chatsky object has to have only 1 key: {obj.keys()}") key = keys.__iter__().__next__() logger.debug(f"Replacing object: {key}") target_obj = self.resolve_target_object(key) @@ -202,7 +202,7 @@ def replace_script_objects(self, obj: JsonValue): return obj -def get_dff_objects(): +def get_chatsky_objects(): def get_objects_from_submodule(submodule_name: str, alias: Optional[str] = None): module = importlib.import_module(submodule_name) @@ -212,8 +212,8 @@ def get_objects_from_submodule(submodule_name: str, alias: Optional[str] = None) } return { - **get_objects_from_submodule("dff.cnd"), - **get_objects_from_submodule("dff.rsp"), - **get_objects_from_submodule("dff.lbl"), - **get_objects_from_submodule("dff.msg", "dff"), + **get_objects_from_submodule("chatsky.cnd"), + **get_objects_from_submodule("chatsky.rsp"), + **get_objects_from_submodule("chatsky.lbl"), + **get_objects_from_submodule("chatsky.msg", "chatsky"), } diff --git a/dff/pipeline/pipeline/utils.py b/chatsky/pipeline/pipeline/utils.py similarity index 96% rename from dff/pipeline/pipeline/utils.py rename to chatsky/pipeline/pipeline/utils.py index a778c67fa..752bde18c 100644 --- a/dff/pipeline/pipeline/utils.py +++ b/chatsky/pipeline/pipeline/utils.py @@ -4,6 +4,7 @@ The Utils module contains several service functions that are commonly used throughout the framework. These functions provide a variety of utility functionality. """ + import collections from typing import Union, List from inspect import isfunction @@ -27,9 +28,9 @@ def pretty_format_component_info_dict( However, most preferable usage is via `pipeline.pretty_format`. :param service: (required) Pipeline components info dictionary. - :param show_wrappers: (required) Whether to include Wrappers or not (could be many and/or generated). + :param show_extra_handlers: (required) Whether to include Extra Handlers or not (could be many and/or generated). :param offset: Current level new line offset. - :param wrappers_key: Key that is mapped to Wrappers lists. + :param extra_handlers_key: Key that is mapped to Extra Handlers lists. :param type_key: Key that is mapped to components type name. :param name_key: Key that is mapped to components name. :param indent: Current level new line offset (whitespace number). diff --git a/dff/pipeline/service/__init__.py b/chatsky/pipeline/service/__init__.py similarity index 100% rename from dff/pipeline/service/__init__.py rename to chatsky/pipeline/service/__init__.py diff --git a/dff/pipeline/service/extra.py b/chatsky/pipeline/service/extra.py similarity index 96% rename from dff/pipeline/service/extra.py rename to chatsky/pipeline/service/extra.py index 1212743e6..8a8a65a9b 100644 --- a/dff/pipeline/service/extra.py +++ b/chatsky/pipeline/service/extra.py @@ -5,15 +5,17 @@ beyond the core functionality. Extra handlers is an input converting addition to :py:class:`.PipelineComponent`. For example, it is used to grep statistics from components, timing, logging, etc. """ + from __future__ import annotations import asyncio import logging import inspect from typing import Optional, List, TYPE_CHECKING -from dff.script import Context +from chatsky.script import Context -from .utils import collect_defined_constructor_parameters_to_dict, _get_attrs_with_updates, wrap_sync_function_in_async +from .utils import collect_defined_constructor_parameters_to_dict, _get_attrs_with_updates +from chatsky.utils.devel.async_helpers import wrap_sync_function_in_async from ..types import ( ServiceRuntimeInfo, ExtraHandlerType, @@ -25,7 +27,7 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline class _ComponentExtraHandler: @@ -113,10 +115,9 @@ async def _run_function( async def _run(self, ctx: Context, pipeline: Pipeline, component_info: ServiceRuntimeInfo): """ - Method for executing one of the wrapper functions (before or after). + Method for executing one of the extra handler functions (before or after). If the function is not set, nothing happens. - :param stage: current `WrapperStage` (before or after). :param ctx: current dialog context. :param pipeline: the current pipeline. :param component_info: associated component's info dictionary. @@ -154,7 +155,7 @@ async def __call__(self, ctx: Context, pipeline: Pipeline, component_info: Servi @property def info_dict(self) -> dict: """ - Property for retrieving info dictionary about this wrapper. + Property for retrieving info dictionary about this extra handler. :return: Info dict, containing its fields as well as its type. All not set fields there are replaced with `None`. diff --git a/dff/pipeline/service/group.py b/chatsky/pipeline/service/group.py similarity index 93% rename from dff/pipeline/service/group.py rename to chatsky/pipeline/service/group.py index 2d977103a..22b8bae0d 100644 --- a/dff/pipeline/service/group.py +++ b/chatsky/pipeline/service/group.py @@ -7,12 +7,13 @@ allowing for easier management and organization of the services within the pipeline. The :py:class:`~.ServiceGroup` serves the important function of grouping services to work together in parallel. """ + from __future__ import annotations import asyncio import logging from typing import Optional, List, Union, Awaitable, TYPE_CHECKING -from dff.script import Context +from chatsky.script import Context from .utils import collect_defined_constructor_parameters_to_dict, _get_attrs_with_updates from ..pipeline.component import PipelineComponent @@ -31,7 +32,7 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline class ServiceGroup(PipelineComponent): @@ -101,7 +102,7 @@ def __init__( async def _run_services_group(self, ctx: Context, pipeline: Pipeline) -> None: """ Method for running this service group. - It doesn't include wrappers execution, start condition checking or error handling - pure execution only. + It doesn't include extra handlers execution, start condition checking or error handling - pure execution only. Executes components inside the group based on its `asynchronous` property. Collects information about their execution state - group is finished successfully only if all components in it finished successfully. @@ -136,7 +137,7 @@ async def _run( ) -> None: """ Method for handling this group execution. - Executes before and after execution wrappers, checks start condition and catches runtime exceptions. + Executes extra handlers before and after execution, checks start condition and catches runtime exceptions. :param ctx: Current dialog context. :param pipeline: The current pipeline. @@ -149,9 +150,9 @@ async def _run( else: self._set_state(ctx, ComponentExecutionState.NOT_RUN) - except Exception as e: + except Exception as exc: self._set_state(ctx, ComponentExecutionState.FAILED) - logger.error(f"ServiceGroup '{self.name}' execution failed!\n{e}") + logger.error(f"ServiceGroup '{self.name}' execution failed!", exc_info=exc) await self.run_extra_handler(ExtraHandlerType.AFTER, ctx, pipeline) @@ -197,13 +198,13 @@ def add_extra_handler( condition: ExtraHandlerConditionFunction = lambda _: True, ): """ - Method for adding a global wrapper to this group. - Adds wrapper to itself and propagates it to all inner components. - Uses a special condition function to determine whether to add wrapper to any particular inner component or not. + Method for adding a global extra handler to this group. + Adds extra handler to itself and propagates it to all inner components. + Uses a special condition function to determine whether to add extra handler to any particular inner component. Condition checks components path to be in whitelist (if defined) and not to be in blacklist (if defined). - :param global_extra_handler_type: A type of wrapper to add. - :param extra_handler: A `WrapperFunction` to add as a wrapper. + :param global_extra_handler_type: A type of extra handler to add. + :param extra_handler: A `ExtraHandlerFunction` to add as an extra handler. :type extra_handler: :py:data:`~.ExtraHandlerFunction` :param condition: A condition function. :return: `None` diff --git a/dff/pipeline/service/service.py b/chatsky/pipeline/service/service.py similarity index 91% rename from dff/pipeline/service/service.py rename to chatsky/pipeline/service/service.py index 0895380f8..fdf43f0bb 100644 --- a/dff/pipeline/service/service.py +++ b/chatsky/pipeline/service/service.py @@ -9,14 +9,16 @@ Service can be asynchronous only if its handler is a coroutine. Actor wrapping service is asynchronous. """ + from __future__ import annotations import logging import inspect from typing import Optional, TYPE_CHECKING -from dff.script import Context +from chatsky.script import Context -from .utils import wrap_sync_function_in_async, collect_defined_constructor_parameters_to_dict, _get_attrs_with_updates +from .utils import collect_defined_constructor_parameters_to_dict, _get_attrs_with_updates +from chatsky.utils.devel.async_helpers import wrap_sync_function_in_async from ..types import ( ServiceBuilder, StartConditionCheckerFunction, @@ -29,7 +31,7 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline class Service(PipelineComponent): @@ -41,7 +43,10 @@ class Service(PipelineComponent): :param handler: A service function or an actor. :type handler: :py:data:`~.ServiceBuilder` - :param wrappers: List of Wrappers to add to the service. + :param before_handler: List of `ExtraHandlerBuilder` to add to the group. + :type before_handler: Optional[:py:data:`~.ExtraHandlerBuilder`] + :param after_handler: List of `ExtraHandlerBuilder` to add to the group. + :type after_handler: Optional[:py:data:`~.ExtraHandlerBuilder`] :param timeout: Timeout to add to the group. :param asynchronous: Requested asynchronous property. :param start_condition: StartConditionCheckerFunction that is invoked before each service execution; @@ -134,7 +139,7 @@ async def _run_as_actor(self, ctx: Context, pipeline: Pipeline) -> None: self._set_state(ctx, ComponentExecutionState.FINISHED) except Exception as exc: self._set_state(ctx, ComponentExecutionState.FAILED) - logger.error(f"Actor '{self.name}' execution failed!\n{exc}") + logger.error(f"Actor '{self.name}' execution failed!", exc_info=exc) async def _run_as_service(self, ctx: Context, pipeline: Pipeline) -> None: """ @@ -151,14 +156,14 @@ async def _run_as_service(self, ctx: Context, pipeline: Pipeline) -> None: self._set_state(ctx, ComponentExecutionState.FINISHED) else: self._set_state(ctx, ComponentExecutionState.NOT_RUN) - except Exception as e: + except Exception as exc: self._set_state(ctx, ComponentExecutionState.FAILED) - logger.error(f"Service '{self.name}' execution failed!\n{e}") + logger.error(f"Service '{self.name}' execution failed!", exc_info=exc) async def _run(self, ctx: Context, pipeline: Pipeline) -> None: """ Method for handling this service execution. - Executes before and after execution wrappers, launches `_run_as_actor` or `_run_as_service` method. + Executes extra handlers before and after execution, launches `_run_as_actor` or `_run_as_service` method. :param ctx: (required) Current dialog context. :param pipeline: the current pipeline. diff --git a/dff/pipeline/service/utils.py b/chatsky/pipeline/service/utils.py similarity index 77% rename from dff/pipeline/service/utils.py rename to chatsky/pipeline/service/utils.py index e3162418c..651f89b92 100644 --- a/dff/pipeline/service/utils.py +++ b/chatsky/pipeline/service/utils.py @@ -1,27 +1,11 @@ """ Utility Functions ----------------- -The Utility Functions module contains several utility functions that are commonly used throughout the DFF. +The Utility Functions module contains several utility functions that are commonly used throughout Chatsky. These functions provide a variety of utility functionality. """ -import asyncio -from typing import Callable, Any, Optional, Tuple, Mapping - -async def wrap_sync_function_in_async(func: Callable, *args, **kwargs) -> Any: - """ - Utility function, that wraps both functions and coroutines in coroutines. - Invokes `func` if it is just a callable and awaits, if this is a coroutine. - - :param func: Callable to wrap. - :param \\*args: Function args. - :param \\**kwargs: Function kwargs. - :return: What function returns. - """ - if asyncio.iscoroutinefunction(func): - return await func(*args, **kwargs) - else: - return func(*args, **kwargs) +from typing import Any, Optional, Tuple, Mapping def _get_attrs_with_updates( diff --git a/dff/pipeline/types.py b/chatsky/pipeline/types.py similarity index 75% rename from dff/pipeline/types.py rename to chatsky/pipeline/types.py index 4409cc87b..118532559 100644 --- a/dff/pipeline/types.py +++ b/chatsky/pipeline/types.py @@ -1,25 +1,26 @@ """ Types ----- -The Types module contains several classes and special types that are used throughout the `DFF Pipeline`. +The Types module contains several classes and special types that are used throughout `Chatsky Pipeline`. The classes and special types in this module can include data models, data structures, and other types that are defined for type hinting. """ + from __future__ import annotations from enum import unique, Enum from typing import Callable, Union, Awaitable, Dict, List, Optional, Iterable, Any, Protocol, Hashable, TYPE_CHECKING - -from dff.context_storages import DBContextStorage -from dff.script import Context, ActorStage, NodeLabel2Type, Script, Message from typing_extensions import NotRequired, TypedDict, TypeAlias from pydantic import BaseModel + if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline - from dff.pipeline.service.service import Service - from dff.pipeline.service.group import ServiceGroup - from dff.pipeline.service.extra import _ComponentExtraHandler - from dff.messengers.common.interface import MessengerInterface + from chatsky.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.service.service import Service + from chatsky.pipeline.service.group import ServiceGroup + from chatsky.pipeline.service.extra import _ComponentExtraHandler + from chatsky.messengers.common.interface import MessengerInterface + from chatsky.context_storages import DBContextStorage + from chatsky.script import Context, ActorStage, NodeLabel2Type, Script, Message class PipelineRunnerFunction(Protocol): @@ -29,7 +30,7 @@ class PipelineRunnerFunction(Protocol): def __call__( self, message: Message, ctx_id: Optional[Hashable] = None, update_ctx_misc: Optional[dict] = None - ) -> Context: + ) -> Awaitable[Context]: """ :param message: User request for pipeline to process. :param ctx_id: @@ -52,7 +53,7 @@ def __call__( class ComponentExecutionState(str, Enum): """ Enum, representing pipeline component execution state. - These states are stored in `ctx.framework_keys[PIPELINE_STATE_KEY]`, + These states are stored in `ctx.framework_keys.service_states`, that should always be requested with `NOT_RUN` being default fallback. Following states are supported: @@ -71,7 +72,7 @@ class ComponentExecutionState(str, Enum): @unique class GlobalExtraHandlerType(str, Enum): """ - Enum, representing types of global wrappers, that can be set applied for a pipeline. + Enum, representing types of global extra handlers, that can be set applied for a pipeline. The following types are supported: - BEFORE_ALL: function called before each pipeline call, @@ -92,9 +93,9 @@ class ExtraHandlerType(str, Enum): Enum, representing wrapper execution stage: before or after the wrapped function. The following types are supported: - - UNDEFINED: wrapper function with undetermined execution stage, - - BEFORE: wrapper function called before component, - - AFTER: wrapper function called after component. + - UNDEFINED: extra handler function with undetermined execution stage, + - BEFORE: extra handler function called before component, + - AFTER: extra handler function called after component. """ UNDEFINED = "UNDEFINED" @@ -102,14 +103,7 @@ class ExtraHandlerType(str, Enum): AFTER = "AFTER" -PIPELINE_STATE_KEY = "PIPELINE" -""" -PIPELINE: storage for services and groups execution status. -Should be used in `ctx.framework_keys[PIPELINE_STATE_KEY]`. -""" - - -StartConditionCheckerFunction: TypeAlias = Callable[[Context, "Pipeline"], bool] +StartConditionCheckerFunction: TypeAlias = Callable[["Context", "Pipeline"], bool] """ A function type for components `start_conditions`. Accepts context and pipeline, returns boolean (whether service can be launched). @@ -125,10 +119,10 @@ class ExtraHandlerType(str, Enum): ExtraHandlerConditionFunction: TypeAlias = Callable[[str], bool] """ -A function type used during global wrappers initialization to determine -whether wrapper should be applied to component with given path or not. +A function type used during global extra handler initialization to determine +whether extra handler should be applied to component with given path or not. Checks components path to be in whitelist (if defined) and not to be in blacklist (if defined). -Accepts str (component path), returns boolean (whether wrapper should be applied). +Accepts str (component path), returns boolean (whether extra handler should be applied). """ @@ -148,13 +142,13 @@ class ServiceRuntimeInfo(BaseModel): ExtraHandlerFunction: TypeAlias = Union[ - Callable[[Context], Any], - Callable[[Context, "Pipeline"], Any], - Callable[[Context, "Pipeline", "ExtraHandlerRuntimeInfo"], Any], + Callable[["Context"], Any], + Callable[["Context", "Pipeline"], Any], + Callable[["Context", "Pipeline", "ExtraHandlerRuntimeInfo"], Any], ] """ -A function type for creating wrappers (before and after functions). -Can accept current dialog context, pipeline, and current wrapper info. +A function type for creating extra handler (before and after functions). +Can accept current dialog context, pipeline, and current extra handler info. """ @@ -172,12 +166,12 @@ class ExtraHandlerRuntimeInfo(BaseModel): ServiceFunction: TypeAlias = Union[ - Callable[[Context], None], - Callable[[Context], Awaitable[None]], - Callable[[Context, "Pipeline"], None], - Callable[[Context, "Pipeline"], Awaitable[None]], - Callable[[Context, "Pipeline", ServiceRuntimeInfo], None], - Callable[[Context, "Pipeline", ServiceRuntimeInfo], Awaitable[None]], + Callable[["Context"], None], + Callable[["Context"], Awaitable[None]], + Callable[["Context", "Pipeline"], None], + Callable[["Context", "Pipeline"], Awaitable[None]], + Callable[["Context", "Pipeline", ServiceRuntimeInfo], None], + Callable[["Context", "Pipeline", ServiceRuntimeInfo], Awaitable[None]], ] """ A function type for creating service handlers. @@ -252,20 +246,18 @@ class ExtraHandlerRuntimeInfo(BaseModel): "PipelineBuilder", { "messenger_interface": NotRequired[Optional["MessengerInterface"]], - "context_storage": NotRequired[Optional[Union[DBContextStorage, Dict]]], + "context_storage": NotRequired[Optional[Union["DBContextStorage", Dict]]], "components": ServiceGroupBuilder, "before_handler": NotRequired[Optional[ExtraHandlerBuilder]], "after_handler": NotRequired[Optional[ExtraHandlerBuilder]], "optimization_warnings": NotRequired[bool], "parallelize_processing": NotRequired[bool], - "script": Union[Script, Dict], - "start_label": NodeLabel2Type, - "fallback_label": NotRequired[Optional[NodeLabel2Type]], + "script": Union["Script", Dict], + "start_label": "NodeLabel2Type", + "fallback_label": NotRequired[Optional["NodeLabel2Type"]], "label_priority": NotRequired[float], - "validation_stage": NotRequired[Optional[bool]], "condition_handler": NotRequired[Optional[Callable]], - "verbose": NotRequired[bool], - "handlers": NotRequired[Optional[Dict[ActorStage, List[Callable]]]], + "handlers": NotRequired[Optional[Dict["ActorStage", List[Callable]]]], }, ) """ diff --git a/chatsky/rsp.py b/chatsky/rsp.py new file mode 100644 index 000000000..bb9ad03d8 --- /dev/null +++ b/chatsky/rsp.py @@ -0,0 +1 @@ +from chatsky.script.responses import * diff --git a/dff/script/__init__.py b/chatsky/script/__init__.py similarity index 83% rename from dff/script/__init__.py rename to chatsky/script/__init__.py index 04afef572..942d9441d 100644 --- a/dff/script/__init__.py +++ b/chatsky/script/__init__.py @@ -18,9 +18,9 @@ NodeLabel2Type, NodeLabel3Type, NodeLabelTupledType, - NodeLabelType, + ConstLabel, + Label, ConditionType, - ModuleName, ActorStage, ) -from .core.message import Message, MultiMessage +from .core.message import Message diff --git a/dff/script/conditions/__init__.py b/chatsky/script/conditions/__init__.py similarity index 84% rename from dff/script/conditions/__init__.py rename to chatsky/script/conditions/__init__.py index 49f17e8c3..9b5fe812f 100644 --- a/dff/script/conditions/__init__.py +++ b/chatsky/script/conditions/__init__.py @@ -2,6 +2,7 @@ from .std_conditions import ( exact_match, + has_text, regexp, check_cond_seq, aggregate, @@ -13,4 +14,5 @@ false, agg, neg, + has_callback_query, ) diff --git a/dff/script/conditions/std_conditions.py b/chatsky/script/conditions/std_conditions.py similarity index 76% rename from dff/script/conditions/std_conditions.py rename to chatsky/script/conditions/std_conditions.py index bd4186aa9..7a5479f9a 100644 --- a/dff/script/conditions/std_conditions.py +++ b/chatsky/script/conditions/std_conditions.py @@ -8,31 +8,37 @@ These conditions can be used to check the current context, the user's input, or other factors that may affect the conversation flow. """ + from typing import Callable, Pattern, Union, List, Optional import logging import re from pydantic import validate_call -from dff.pipeline import Pipeline -from dff.script import NodeLabel2Type, Context, Message +from chatsky.pipeline import Pipeline +from chatsky.script import NodeLabel2Type, Context, Message +from chatsky.script.core.message import CallbackQuery logger = logging.getLogger(__name__) @validate_call -def exact_match(match: Message, skip_none: bool = True) -> Callable[[Context, Pipeline], bool]: +def exact_match(match: Union[str, Message], skip_none: bool = True) -> Callable[[Context, Pipeline], bool]: """ Return function handler. This handler returns `True` only if the last user phrase - is the same Message as the :py:const:`match`. - If :py:const:`skip_none` the handler will not compare `None` fields of :py:const:`match`. + is the same `Message` as the `match`. + If `skip_none` the handler will not compare `None` fields of `match`. - :param match: A Message variable to compare user request with. + :param match: A `Message` variable to compare user request with. + Can also accept `str`, which will be converted into a `Message` with its text field equal to `match`. :param skip_none: Whether fields should be compared if they are `None` in :py:const:`match`. """ def exact_match_condition_handler(ctx: Context, pipeline: Pipeline) -> bool: request = ctx.last_request + nonlocal match + if isinstance(match, str): + match = Message(text=match) if request is None: return False for field in match.model_fields: @@ -49,11 +55,27 @@ def exact_match_condition_handler(ctx: Context, pipeline: Pipeline) -> bool: return exact_match_condition_handler +@validate_call +def has_text(text: str) -> Callable[[Context, Pipeline], bool]: + """ + Return function handler. This handler returns `True` only if the last user phrase + contains the phrase specified in `text`. + + :param text: A `str` variable to look for within the user request. + """ + + def has_text_condition_handler(ctx: Context, pipeline: Pipeline) -> bool: + request = ctx.last_request + return text in request.text + + return has_text_condition_handler + + @validate_call def regexp(pattern: Union[str, Pattern], flags: Union[int, re.RegexFlag] = 0) -> Callable[[Context, Pipeline], bool]: """ Return function handler. This handler returns `True` only if the last user phrase contains - :py:const:`pattern ` with :py:const:`flags `. + `pattern` with `flags`. :param pattern: The `RegExp` pattern. :param flags: Flags for this pattern. Defaults to 0. @@ -170,14 +192,15 @@ def has_last_labels( ) -> Callable[[Context, Pipeline], bool]: """ Return condition handler. This handler returns `True` if any label from - last :py:const:`last_n_indices` context labels is in - the :py:const:`flow_labels` list or in - the :py:const:`~dff.script.NodeLabel2Type` list. + last `last_n_indices` context labels is in + the `flow_labels` list or in + the `labels` list. :param flow_labels: List of labels to check. Every label has type `str`. Empty if not set. :param labels: List of labels corresponding to the nodes. Empty if not set. :param last_n_indices: Number of last utterances to check. """ + # todo: rewrite docs & function itself flow_labels = [] if flow_labels is None else flow_labels labels = [] if labels is None else labels @@ -225,3 +248,21 @@ def false_handler(ctx: Context, pipeline: Pipeline) -> bool: """ :py:func:`~neg` is an alias for :py:func:`~negation`. """ + + +def has_callback_query(expected_query_string: str) -> Callable[[Context, Pipeline], bool]: + """ + Condition that checks if :py:attr:`~.CallbackQuery.query_string` + of the last message matches `expected_query_string`. + + :param expected_query_string: The expected query string to compare with. + :return: The callback query comparator function. + """ + + def has_callback_query_handler(ctx: Context, _: Pipeline) -> bool: + last_request = ctx.last_request + if last_request is None or last_request.attachments is None: + return False + return CallbackQuery(query_string=expected_query_string) in last_request.attachments + + return has_callback_query_handler diff --git a/dff/script/core/__init__.py b/chatsky/script/core/__init__.py similarity index 100% rename from dff/script/core/__init__.py rename to chatsky/script/core/__init__.py diff --git a/dff/script/core/context.py b/chatsky/script/core/context.py similarity index 82% rename from dff/script/core/context.py rename to chatsky/script/core/context.py index 849a0651d..30c589a96 100644 --- a/dff/script/core/context.py +++ b/chatsky/script/core/context.py @@ -16,6 +16,7 @@ The context can be easily serialized to a format that can be stored or transmitted, such as JSON. This allows developers to save the context data and resume the conversation later. """ + from __future__ import annotations import logging from uuid import UUID, uuid4 @@ -23,11 +24,13 @@ from pydantic import BaseModel, Field, field_validator -from .types import NodeLabel2Type, ModuleName -from .message import Message +from chatsky.script.core.message import Message +from chatsky.script.core.types import NodeLabel2Type +from chatsky.pipeline.types import ComponentExecutionState +from chatsky.slots.slots import SlotManager if TYPE_CHECKING: - from dff.script.core.script import Node + from chatsky.script.core.script import Node logger = logging.getLogger(__name__) @@ -43,6 +46,21 @@ def get_last_index(dictionary: dict) -> int: return indices[-1] if indices else -1 +class FrameworkData(BaseModel): + """ + Framework uses this to store data related to any of its modules. + """ + + service_states: Dict[str, ComponentExecutionState] = Field(default_factory=dict, exclude=True) + "Statuses of all the pipeline services. Cleared at the end of every turn." + actor_data: Dict[str, Any] = Field(default_factory=dict, exclude=True) + "Actor service data. Cleared at the end of every turn." + stats: Dict[str, Any] = Field(default_factory=dict) + "Enables complex stats collection across multiple turns." + slot_manager: SlotManager = Field(default_factory=SlotManager) + "Stores extracted slots." + + class Context(BaseModel): """ A structure that is used to store data about the context of a dialog. @@ -56,57 +74,41 @@ class Context(BaseModel): `id` is the unique context identifier. By default, randomly generated using `uuid4` `id` is used. `id` can be used to trace the user behavior, e.g while collecting the statistical data. """ - labels: Dict[int, NodeLabel2Type] = {} + labels: Dict[int, NodeLabel2Type] = Field(default_factory=dict) """ `labels` stores the history of all passed `labels` - key - `id` of the turn. - value - `label` on this turn. """ - requests: Dict[int, Message] = {} + requests: Dict[int, Message] = Field(default_factory=dict) """ `requests` stores the history of all `requests` received by the agent - key - `id` of the turn. - value - `request` on this turn. """ - responses: Dict[int, Message] = {} + responses: Dict[int, Message] = Field(default_factory=dict) """ `responses` stores the history of all agent `responses` - key - `id` of the turn. - value - `response` on this turn. """ - misc: Dict[str, Any] = {} + misc: Dict[str, Any] = Field(default_factory=dict) """ `misc` stores any custom data. The scripting doesn't use this dictionary by default, - so storage of any data won't reflect on the work on the internal Dialog Flow Scripting functions. + so storage of any data won't reflect on the work on the internal Chatsky Scripting functions. Avoid storing unserializable data in order for context storages to work. - key - Arbitrary data name. - value - Arbitrary data. """ - validation: bool = False - """ - `validation` is a flag that signals that :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline`, - while being initialized, checks the :py:class:`~dff.script.core.script.Script`. - The functions that can give not valid data - while being validated must use this flag to take the validation mode into account. - Otherwise the validation will not be passed. - """ - framework_states: Dict[ModuleName, Dict[str, Any]] = {} + framework_data: FrameworkData = Field(default_factory=FrameworkData) """ - `framework_states` is used for addons states or for - :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline`'s states. - :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline` - records all its intermediate conditions into the `framework_states`. - After :py:class:`~.Context` processing is finished, - :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline` resets `framework_states` and - returns :py:class:`~.Context`. - - - key - Temporary variable name. - - value - Temporary variable data. + This attribute is used for storing custom data required for pipeline execution. + It is meant to be used by the framework only. Accessing it may result in pipeline breakage. """ @field_validator("labels", "requests", "responses") @@ -141,7 +143,7 @@ def cast(cls, ctx: Optional[Union[Context, dict, str]] = None, *args, **kwargs) ctx = Context.model_validate(ctx) elif isinstance(ctx, str): ctx = Context.model_validate_json(ctx) - elif not issubclass(type(ctx), Context): + elif not isinstance(ctx, Context): raise ValueError( f"Context expected to be an instance of the Context class " f"or an instance of the dict/str(json) type. Got: {type(ctx)}" @@ -206,8 +208,8 @@ def clear( if "labels" in field_names: for index in list(self.labels)[:-hold_last_n_indices]: del self.labels[index] - if "framework_states" in field_names: - self.framework_states.clear() + if "framework_data" in field_names: + self.framework_data = FrameworkData() @property def last_label(self) -> Optional[NodeLabel2Type]: @@ -261,15 +263,15 @@ def last_request(self, request: Optional[Message]): @property def current_node(self) -> Optional[Node]: """ - Return current :py:class:`~dff.script.core.script.Node`. + Return current :py:class:`~chatsky.script.core.script.Node`. """ - actor = self.framework_states.get("actor", {}) + actor_data = self.framework_data.actor_data node = ( - actor.get("processed_node") - or actor.get("pre_response_processed_node") - or actor.get("next_node") - or actor.get("pre_transitions_processed_node") - or actor.get("previous_node") + actor_data.get("processed_node") + or actor_data.get("pre_response_processed_node") + or actor_data.get("next_node") + or actor_data.get("pre_transitions_processed_node") + or actor_data.get("previous_node") ) if node is None: logger.warning( diff --git a/dff/script/core/keywords.py b/chatsky/script/core/keywords.py similarity index 93% rename from dff/script/core/keywords.py rename to chatsky/script/core/keywords.py index 8a573cd6f..8f4bc2399 100644 --- a/dff/script/core/keywords.py +++ b/chatsky/script/core/keywords.py @@ -5,12 +5,13 @@ They are used to determine all nodes in the script and to assign python objects and python functions for nodes. """ + from enum import Enum class Keywords(str, Enum): """ - Keywords used to define the dialog script (:py:class:`~dff.script.Script`). + Keywords used to define the dialog script (:py:class:`~chatsky.script.Script`). The data type `dict` is used to describe the scenario. `Enums` of this class are used as keys in this `dict`. Different keys correspond to the different value types aimed at different purposes. @@ -22,7 +23,7 @@ class Keywords(str, Enum): The value that corresponds to this key has the `dict` type with keywords: `{TRANSITIONS:..., RESPONSE:..., PRE_RESPONSE_PROCESSING:..., MISC:...}`. - There can be only one global node in a script :py:class:`~dff.script.Script`. + There can be only one global node in a script :py:class:`~chatsky.script.Script`. The global node is defined at the flow level as opposed to regular nodes. This node allows to define default global values for all nodes. @@ -64,7 +65,7 @@ class Keywords(str, Enum): `{"PRE_RESPONSE_PROC_0": pre_response_proc_func_0, ..., "PRE_RESPONSE_PROC_N": pre_response_proc__func_N}`, where `"PRE_RESPONSE_PROC_i"` is an arbitrary name of the preprocessing stage in the pipeline. - Unless the :py:class:`~dff.pipeline.pipeline.Pipeline`'s `parallelize_processing` flag + Unless the :py:class:`~chatsky.pipeline.pipeline.Pipeline`'s `parallelize_processing` flag is set to `True`, calls to `pre_response_proc__func_i` are made in-order. PRE_TRANSITIONS_PROCESSING: Enum(auto) @@ -75,7 +76,7 @@ class Keywords(str, Enum): "PRE_TRANSITIONS_PROC_N": pre_transitions_proc_func_N}`, where `"PRE_TRANSITIONS_PROC_i"` is an arbitrary name of the preprocessing stage in the pipeline. - Unless the :py:class:`~dff.pipeline.pipeline.Pipeline`'s `parallelize_processing` flag + Unless the :py:class:`~chatsky.pipeline.pipeline.Pipeline`'s `parallelize_processing` flag is set to `True`, calls to `pre_transitions_proc_func_i` are made in-order. """ diff --git a/chatsky/script/core/message.py b/chatsky/script/core/message.py new file mode 100644 index 000000000..61cfbbfc4 --- /dev/null +++ b/chatsky/script/core/message.py @@ -0,0 +1,316 @@ +""" +Message +------- +The :py:class:`.Message` class is a universal data model for representing a message that should be supported by +Chatsky. It only contains types and properties that are compatible with most messaging services. +""" + +from typing import Literal, Optional, List, Union +from pathlib import Path +from urllib.request import urlopen +import uuid +import abc + +from pydantic import Field, FilePath, HttpUrl, model_validator +from pydantic_core import Url + +from chatsky.messengers.common.interface import MessengerInterfaceWithAttachments +from chatsky.utils.devel import JSONSerializableDict, PickleEncodedValue, JSONSerializableExtras + + +class DataModel(JSONSerializableExtras): + """ + This class is a Pydantic BaseModel that can have any type and number of extras. + """ + + pass + + +class Attachment(DataModel, abc.ABC): + """ + Chatsky Message attachment base class. + It is capable of serializing and validating all the model fields to JSON. + """ + + chatsky_attachment_type: str + + +class CallbackQuery(Attachment): + """ + This class is a data model that represents a callback query attachment. + It is sent as a response to non-message events, e.g. keyboard UI interactions. + It has query string attribute, that represents the response data string. + """ + + query_string: Optional[str] + chatsky_attachment_type: Literal["callback_query"] = "callback_query" + + +class Location(Attachment): + """ + This class is a data model that represents a geographical + location on the Earth's surface. + It has two attributes, longitude and latitude, both of which are float values. + If the absolute difference between the latitude and longitude values of the two + locations is less than 0.00004, they are considered equal. + """ + + longitude: float + latitude: float + chatsky_attachment_type: Literal["location"] = "location" + + +class Contact(Attachment): + """ + This class is a data model that represents a contact. + It includes phone number, and user first and last name. + """ + + phone_number: str + first_name: str + last_name: Optional[str] + chatsky_attachment_type: Literal["contact"] = "contact" + + +class Invoice(Attachment): + """ + This class is a data model that represents an invoice. + It includes title, description, currency name and amount. + """ + + title: str + description: str + currency: str + amount: int + chatsky_attachment_type: Literal["invoice"] = "invoice" + + +class PollOption(DataModel): + """ + This class is a data model that represents a poll option. + It includes the option name and votes number. + """ + + text: str + votes: int = Field(default=0) + chatsky_attachment_type: Literal["poll_option"] = "poll_option" + + +class Poll(Attachment): + """ + This class is a data model that represents a poll. + It includes a list of poll options. + """ + + question: str + options: List[PollOption] + chatsky_attachment_type: Literal["poll"] = "poll" + + +class DataAttachment(Attachment): + """ + This class represents an attachment that can be either + a local file, a URL to a file or a ID of a file on a certain server (such as telegram). + This attachment can also be optionally cached for future use. + """ + + source: Optional[Union[HttpUrl, FilePath]] = None + """Attachment source -- either a URL to a file or a local filepath.""" + use_cache: bool = True + """ + Whether to cache the file (only for URL and ID files). + Disable this if you want to always respond with the most up-to-date version of the file. + """ + cached_filename: Optional[Path] = None + """ + This field is used to store a path to cached version of this file (retrieved from id or URL). + This field is managed by framework. + """ + id: Optional[str] = None + """ + ID of the file on a file server (e.g. file_id for telegram attachments). + :py:meth:`~.MessengerInterfaceWithAttachments.get_attachment_bytes` is used to retrieve bytes from ID. + """ + + async def _cache_attachment(self, data: bytes, directory: Path) -> None: + """ + Cache attachment, save bytes into a file. + File has a UUID name based on its `self.source` or `self.id`. + + :param data: attachment data bytes. + :param directory: cache directory where attachment will be saved. + """ + + filename = str(uuid.uuid5(uuid.NAMESPACE_URL, str(self.source or self.id))) + self.cached_filename = directory / filename + self.cached_filename.write_bytes(data) + + async def get_bytes(self, from_interface: MessengerInterfaceWithAttachments) -> Optional[bytes]: + """ + Retrieve attachment bytes. + If the attachment is represented by URL or saved in a file, + it will be downloaded or read automatically. + If cache use is allowed and the attachment is cached, cached file will be used. + Otherwise, a :py:meth:`~.MessengerInterfaceWithAttachments.get_attachment_bytes` + will be used for receiving attachment bytes via ID. + + If cache use is allowed and the attachment is a URL or an ID, bytes will be cached locally. + + :param from_interface: messenger interface the attachment was received from. + """ + + if isinstance(self.source, Path): + with open(self.source, "rb") as file: + return file.read() + elif self.use_cache and self.cached_filename is not None and self.cached_filename.exists(): + with open(self.cached_filename, "rb") as file: + return file.read() + elif isinstance(self.source, Url): + with urlopen(self.source.unicode_string()) as url: + attachment_data = url.read() + else: + attachment_data = await from_interface.get_attachment_bytes(self.id) + if self.use_cache: + await self._cache_attachment(attachment_data, from_interface.attachments_directory) + return attachment_data + + @model_validator(mode="before") + @classmethod + def validate_source_or_id(cls, values: dict): + if not isinstance(values, dict): + raise AssertionError(f"Invalid constructor parameters: {str(values)}") + if bool(values.get("source")) == bool(values.get("id")): + raise AssertionError("Attachment type requires exactly one parameter, `source` or `id`, to be set.") + return values + + +class Audio(DataAttachment): + """Represents an audio file attachment.""" + + chatsky_attachment_type: Literal["audio"] = "audio" + + +class Video(DataAttachment): + """Represents a video file attachment.""" + + chatsky_attachment_type: Literal["video"] = "video" + + +class Animation(DataAttachment): + """Represents an animation file attachment.""" + + chatsky_attachment_type: Literal["animation"] = "animation" + + +class Image(DataAttachment): + """Represents an image file attachment.""" + + chatsky_attachment_type: Literal["image"] = "image" + + +class Sticker(DataAttachment): + """Represents a sticker as a file attachment.""" + + chatsky_attachment_type: Literal["sticker"] = "sticker" + + +class Document(DataAttachment): + """Represents a document file attachment.""" + + chatsky_attachment_type: Literal["document"] = "document" + + +class VoiceMessage(DataAttachment): + """Represents a voice message.""" + + chatsky_attachment_type: Literal["voice_message"] = "voice_message" + + +class VideoMessage(DataAttachment): + """Represents a video message.""" + + chatsky_attachment_type: Literal["video_message"] = "video_message" + + +class MediaGroup(Attachment): + """ + Represents a group of media attachments. + Without this class attachments are sent one-by-one. + + Be mindful of limitations that certain services apply + (e.g. Telegram does not allow audio or document files to be mixed with other types when using media groups, + so you should send them separately by putting them directly in :py:attr:`~.Message.attachments`). + """ + + group: List[Union[Audio, Video, Image, Document, DataAttachment]] = Field(default_factory=list) + chatsky_attachment_type: Literal["media_group"] = "media_group" + + +class Message(DataModel): + """ + Class representing a message and contains several + class level variables to store message information. + + It includes message text, list of attachments, annotations, + MISC dictionary (that consists of user-defined parameters) + and original message field that represents + the update received from messenger interface API. + """ + + text: Optional[str] = None + attachments: Optional[ + List[ + Union[ + CallbackQuery, + Location, + Contact, + Invoice, + Poll, + Audio, + Video, + Animation, + Image, + Sticker, + Document, + VoiceMessage, + VideoMessage, + MediaGroup, + DataModel, + ] + ] + ] = None + annotations: Optional[JSONSerializableDict] = None + misc: Optional[JSONSerializableDict] = None + original_message: Optional[PickleEncodedValue] = None + + def __init__( + self, + text: Optional[str] = None, + attachments: Optional[ + List[ + Union[ + CallbackQuery, + Location, + Contact, + Invoice, + Poll, + Audio, + Video, + Animation, + Image, + Sticker, + Document, + VoiceMessage, + VideoMessage, + MediaGroup, + ] + ] + ] = None, + annotations: Optional[JSONSerializableDict] = None, + misc: Optional[JSONSerializableDict] = None, + **kwargs, + ): + super().__init__(text=text, attachments=attachments, annotations=annotations, misc=misc, **kwargs) + + def __repr__(self) -> str: + return " ".join([f"{key}='{value}'" for key, value in self.model_dump(exclude_none=True).items()]) diff --git a/dff/script/core/normalization.py b/chatsky/script/core/normalization.py similarity index 88% rename from dff/script/core/normalization.py rename to chatsky/script/core/normalization.py index 302b1f33b..39b7dde8c 100644 --- a/dff/script/core/normalization.py +++ b/chatsky/script/core/normalization.py @@ -5,39 +5,39 @@ that is suitable for script and actor execution process. This module contains a basic set of functions for normalizing data in a dialog script. """ + from __future__ import annotations import logging from typing import Union, Callable, Optional, TYPE_CHECKING from .keywords import Keywords from .context import Context -from .types import NodeLabel3Type, NodeLabelType, ConditionType, LabelType +from .types import ConstLabel, ConditionType, Label, LabelType from .message import Message if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline logger = logging.getLogger(__name__) -def normalize_label( - label: NodeLabelType, default_flow_label: LabelType = "" -) -> Union[Callable[[Context, Pipeline], NodeLabel3Type], NodeLabel3Type]: +def normalize_label(label: Label, default_flow_label: LabelType = "") -> Label: """ The function that is used for normalization of - :py:const:`default_flow_label `. + :py:const:`label `. :param label: If label is Callable the function is wrapped into try/except and normalization is used on the result of the function call with the name label. :param default_flow_label: flow_label is used if label does not contain flow_label. - :return: Result of the label normalization, - if Callable is returned, the normalized result is returned. + :return: Result of the label normalization """ if callable(label): - def get_label_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def get_label_handler(ctx: Context, pipeline: Pipeline) -> Optional[ConstLabel]: try: new_label = label(ctx, pipeline) + if new_label is None: + return None new_label = normalize_label(new_label, default_flow_label) flow_label, node_label, _ = new_label node = pipeline.script.get(flow_label, {}).get(node_label) @@ -61,6 +61,8 @@ def get_label_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: elif isinstance(label, tuple) and len(label) == 3: flow_label = label[0] or default_flow_label return (flow_label, label[1], label[2]) + else: + raise TypeError(f"Label '{label!r}' is of incorrect type. It has to follow the `Label`:\n" f"{Label!r}") def normalize_condition(condition: ConditionType) -> Callable[[Context, Pipeline], bool]: diff --git a/chatsky/script/core/script.py b/chatsky/script/core/script.py new file mode 100644 index 000000000..0422170c0 --- /dev/null +++ b/chatsky/script/core/script.py @@ -0,0 +1,267 @@ +""" +Script +------ +The Script module provides a set of `pydantic` models for representing the dialog graph. +These models are used to define the conversation flow, and to determine the appropriate response based on +the user's input and the current state of the conversation. +""" + +# %% +from __future__ import annotations +from enum import Enum +import inspect +import logging +from typing import Callable, List, Optional, Any, Dict, Tuple, Union, TYPE_CHECKING + +from pydantic import BaseModel, field_validator, validate_call, Field + +from .types import Label, LabelType, ConditionType, ConstLabel # noqa: F401 +from .message import Message +from .keywords import Keywords +from .normalization import normalize_condition, normalize_label + +if TYPE_CHECKING: + from chatsky.script.core.context import Context + from chatsky.pipeline.pipeline.pipeline import Pipeline + +logger = logging.getLogger(__name__) + + +class UserFunctionType(str, Enum): + LABEL = "label" + RESPONSE = "response" + CONDITION = "condition" + TRANSITION_PROCESSING = "pre_transitions_processing" + RESPONSE_PROCESSING = "pre_response_processing" + + +USER_FUNCTION_TYPES: Dict[UserFunctionType, Tuple[Tuple[str, ...], str]] = { + UserFunctionType.LABEL: (("Context", "Pipeline"), "ConstLabel"), + UserFunctionType.RESPONSE: (("Context", "Pipeline"), "Message"), + UserFunctionType.CONDITION: (("Context", "Pipeline"), "bool"), + UserFunctionType.RESPONSE_PROCESSING: (("Context", "Pipeline"), "None"), + UserFunctionType.TRANSITION_PROCESSING: (("Context", "Pipeline"), "None"), +} + + +def _types_equal(signature_type: Any, expected_type: str) -> bool: + """ + This function checks equality of signature type with expected type. + Three cases are handled. If no signature is present, it is presumed that types are equal. + If signature is a type, it is compared with expected type as is. + If signature is a string, it is compared with expected type name. + + :param signature_type: type received from function signature. + :param expected_type: expected type - a class. + :return: true if types are equal, false otherwise. + """ + signature_str = signature_type.__name__ if hasattr(signature_type, "__name__") else str(signature_type) + signature_empty = signature_type == inspect.Parameter.empty + expected_string = signature_str == expected_type + expected_global = str(signature_type) == str(globals().get(expected_type)) + return signature_empty or expected_string or expected_global + + +def _validate_callable(callable: Callable, func_type: UserFunctionType, flow_label: str, node_label: str) -> List: + """ + This function validates a function during :py:class:`~chatsky.script.Script` validation. + It checks parameter number (unconditionally), parameter types (if specified) and return type (if specified). + + :param callable: Function to validate. + :param func_type: Type of the function (label, condition, response, etc.). + :param flow_label: Flow label this function is related to (used for error localization only). + :param node_label: Node label this function is related to (used for error localization only). + :return: list of produced error messages. + """ + + error_msgs = list() + signature = inspect.signature(callable) + arguments_type, return_type = USER_FUNCTION_TYPES[func_type] + params = list(signature.parameters.values()) + if len(params) != len(arguments_type): + msg = ( + f"Incorrect parameter number for {callable.__name__!r}: " + f"should be {len(arguments_type)}, not {len(params)}. " + f"Error found at {(flow_label, node_label)!r}." + ) + error_msgs.append(msg) + for idx, param in enumerate(params): + if not _types_equal(param.annotation, arguments_type[idx]): + msg = ( + f"Incorrect parameter annotation for parameter #{idx + 1} " + f" of {callable.__name__!r}: " + f"should be {arguments_type[idx]}, not {param.annotation}. " + f"Error found at {(flow_label, node_label)!r}." + ) + error_msgs.append(msg) + if not _types_equal(signature.return_annotation, return_type): + msg = ( + f"Incorrect return type annotation of {callable.__name__!r}: " + f"should be {return_type!r}, not {signature.return_annotation}. " + f"Error found at {(flow_label, node_label)!r}." + ) + error_msgs.append(msg) + return error_msgs + + +class Node(BaseModel, extra="forbid", validate_assignment=True): + """ + The class for the `Node` object. + """ + + transitions: Dict[Label, ConditionType] = Field(default_factory=dict, alias="TRANSITIONS") + response: Union[Message, Callable[[Context, Pipeline], Message]] = Field(default_factory=Message, alias="RESPONSE") + pre_transitions_processing: Dict[Any, Callable] = Field(default_factory=dict, alias="PRE_TRANSITIONS_PROCESSING") + pre_response_processing: Dict[Any, Callable] = Field(default_factory=dict, alias="PRE_RESPONSE_PROCESSING") + misc: dict = Field(default_factory=dict, alias="MISC") + + @field_validator("transitions", mode="before") + @classmethod + @validate_call + def normalize_transitions(cls, transitions: Dict[Label, ConditionType]) -> Dict[Label, Callable]: + """ + The function which is used to normalize transitions and returns normalized dict. + + :param transitions: Transitions to normalize. + :return: Transitions with normalized label and condition. + """ + transitions = { + normalize_label(label): normalize_condition(condition) for label, condition in transitions.items() + } + return transitions + + +class Script(BaseModel, extra="forbid"): + """ + The class for the `Script` object. + """ + + script: Dict[LabelType, Dict[LabelType, Node]] + + @field_validator("script", mode="before") + @classmethod + @validate_call + def normalize_script(cls, script: Dict[LabelType, Any]) -> Dict[LabelType, Dict[LabelType, Dict[str, Any]]]: + """ + This function normalizes :py:class:`.Script`: it returns dict where the GLOBAL node is moved + into the flow with the GLOBAL name. The function returns the structure + + `{GLOBAL: {...NODE...}, ...}` -> `{GLOBAL: {GLOBAL: {...NODE...}}, ...}`. + + :param script: :py:class:`.Script` that describes the dialog scenario. + :return: Normalized :py:class:`.Script`. + """ + if isinstance(script, dict): + if Keywords.GLOBAL in script and all( + [isinstance(item, Keywords) for item in script[Keywords.GLOBAL].keys()] + ): + script[Keywords.GLOBAL] = {Keywords.GLOBAL: script[Keywords.GLOBAL]} + return script + + @field_validator("script", mode="before") + @classmethod + @validate_call + def validate_script_before(cls, script: Dict[LabelType, Any]) -> Dict[LabelType, Dict[LabelType, Dict[str, Any]]]: + error_msgs = [] + for flow_name, flow in script.items(): + for node_name, node in flow.items(): + # validate labeling + transitions = node.get("transitions", dict()) + for label in transitions.keys(): + if callable(label): + error_msgs += _validate_callable(label, UserFunctionType.LABEL, flow_name, node_name) + + # validate responses + response = node.get("response", None) + if callable(response): + error_msgs += _validate_callable( + response, + UserFunctionType.RESPONSE, + flow_name, + node_name, + ) + + # validate conditions + for label, condition in transitions.items(): + if callable(condition): + error_msgs += _validate_callable( + condition, + UserFunctionType.CONDITION, + flow_name, + node_name, + ) + + # validate pre_transitions- and pre_response_processing + pre_transitions_processing = node.get("pre_transitions_processing", dict()) + pre_response_processing = node.get("pre_response_processing", dict()) + for place, functions in zip( + (UserFunctionType.TRANSITION_PROCESSING, UserFunctionType.RESPONSE_PROCESSING), + (pre_transitions_processing, pre_response_processing), + ): + for function in functions.values(): + if callable(function): + error_msgs += _validate_callable( + function, + place, + flow_name, + node_name, + ) + if error_msgs: + error_number_string = "1 error" if len(error_msgs) == 1 else f"{len(error_msgs)} errors" + raise ValueError( + f"Found {error_number_string}:\n" + "\n".join([f"{i}) {er}" for i, er in enumerate(error_msgs, 1)]) + ) + else: + return script + + @field_validator("script", mode="after") + @classmethod + @validate_call + def validate_script_after(cls, script: Dict[LabelType, Any]) -> Dict[LabelType, Dict[LabelType, Dict[str, Any]]]: + error_msgs = [] + for flow_name, flow in script.items(): + for node_name, node in flow.items(): + # validate labeling + for label in node.transitions.keys(): + if not callable(label): + norm_flow_label, norm_node_label, _ = normalize_label(label, flow_name) + if norm_flow_label not in script.keys(): + msg = ( + f"Flow {norm_flow_label!r} cannot be found for label={label}. " + f"Error found at {(flow_name, node_name)!r}." + ) + elif norm_node_label not in script[norm_flow_label].keys(): + msg = ( + f"Node {norm_node_label!r} cannot be found for label={label}. " + f"Error found at {(flow_name, node_name)!r}." + ) + else: + msg = None + if msg is not None: + error_msgs.append(msg) + + if error_msgs: + error_number_string = "1 error" if len(error_msgs) == 1 else f"{len(error_msgs)} errors" + raise ValueError( + f"Found {error_number_string}:\n" + "\n".join([f"{i}) {er}" for i, er in enumerate(error_msgs, 1)]) + ) + else: + return script + + def __getitem__(self, key): + return self.script[key] + + def get(self, key, value=None): + return self.script.get(key, value) + + def keys(self): + return self.script.keys() + + def items(self): + return self.script.items() + + def values(self): + return self.script.values() + + def __iter__(self): + return self.script.__iter__() diff --git a/dff/script/core/types.py b/chatsky/script/core/types.py similarity index 89% rename from dff/script/core/types.py rename to chatsky/script/core/types.py index a3054ed57..8655c96ad 100644 --- a/dff/script/core/types.py +++ b/chatsky/script/core/types.py @@ -6,6 +6,7 @@ The types defined in this module include basic data types such as strings and lists, as well as more complex types that are specific to the framework. """ + from typing import Union, Callable, Tuple from enum import Enum, auto from typing_extensions import TypeAlias @@ -13,7 +14,8 @@ from .keywords import Keywords LabelType: TypeAlias = Union[str, Keywords] -"""Label can be a casual string or :py:class:`~dff.script.Keywords`.""" +"""Label can be a casual string or :py:class:`~chatsky.script.Keywords`.""" +# todo: rename these to identifiers NodeLabel1Type: TypeAlias = Tuple[str, float] """Label type for transitions can be `[node_name, transition_priority]`.""" @@ -26,19 +28,17 @@ NodeLabelTupledType: TypeAlias = Union[NodeLabel1Type, NodeLabel2Type, NodeLabel3Type] """Label type for transitions can be one of three different types.""" +# todo: group all these types into a class -NodeLabelType: TypeAlias = Union[Callable, NodeLabelTupledType, str] -"""Label type for transitions can be one of three different types.""" +ConstLabel: TypeAlias = Union[NodeLabelTupledType, str] +"""Label functions should be annotated with this type only.""" + +Label: TypeAlias = Union[ConstLabel, Callable] +"""Label type for transitions should be of this type only.""" ConditionType: TypeAlias = Callable """Condition type can be only `Callable`.""" -ModuleName: TypeAlias = "str" -""" -Module name names addon state, or your own module state. For example module name can be `"dff_context_storages"`. -""" -# TODO: change example - class ActorStage(Enum): """ diff --git a/dff/script/extras/__init__.py b/chatsky/script/extras/__init__.py similarity index 100% rename from dff/script/extras/__init__.py rename to chatsky/script/extras/__init__.py diff --git a/dff/script/extras/conditions/__init__.py b/chatsky/script/extras/conditions/__init__.py similarity index 100% rename from dff/script/extras/conditions/__init__.py rename to chatsky/script/extras/conditions/__init__.py diff --git a/dff/script/extras/slots/__init__.py b/chatsky/script/extras/slots/__init__.py similarity index 100% rename from dff/script/extras/slots/__init__.py rename to chatsky/script/extras/slots/__init__.py diff --git a/dff/script/labels/__init__.py b/chatsky/script/labels/__init__.py similarity index 100% rename from dff/script/labels/__init__.py rename to chatsky/script/labels/__init__.py diff --git a/dff/script/labels/std_labels.py b/chatsky/script/labels/std_labels.py similarity index 73% rename from dff/script/labels/std_labels.py rename to chatsky/script/labels/std_labels.py index 7409fc21b..a52aa37fc 100644 --- a/dff/script/labels/std_labels.py +++ b/chatsky/script/labels/std_labels.py @@ -1,35 +1,36 @@ """ Labels ------ -:py:const:`Labels ` are one of the important components of the dialog graph, +:py:const:`Labels ` are one of the important components of the dialog graph, which determine the targeted node name of the transition. They are used to identify the next step in the conversation. Labels can also be used in combination with other conditions, such as the current context or user data, to create more complex and dynamic conversations. -This module contains a standard set of scripting :py:const:`labels ` that +This module contains a standard set of scripting :py:const:`labels ` that can be used by developers to define the conversation flow. """ + from __future__ import annotations from typing import Optional, Callable, TYPE_CHECKING -from dff.script import Context, NodeLabel3Type +from chatsky.script import Context, ConstLabel if TYPE_CHECKING: - from dff.pipeline.pipeline.pipeline import Pipeline + from chatsky.pipeline.pipeline.pipeline import Pipeline -def repeat(priority: Optional[float] = None) -> Callable: +def repeat(priority: Optional[float] = None) -> Callable[[Context, Pipeline], ConstLabel]: """ Returns transition handler that takes :py:class:`.Context`, - :py:class:`~dff.pipeline.Pipeline` and :py:const:`priority `. - This handler returns a :py:const:`label ` + :py:class:`~chatsky.pipeline.Pipeline` and :py:const:`priority `. + This handler returns a :py:const:`label ` to the last node with a given :py:const:`priority `. If the priority is not given, `Pipeline.actor.label_priority` is used as default. :param priority: Priority of transition. Uses `Pipeline.actor.label_priority` if priority not defined. """ - def repeat_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def repeat_transition_handler(ctx: Context, pipeline: Pipeline) -> ConstLabel: current_priority = pipeline.actor.label_priority if priority is None else priority if len(ctx.labels) >= 1: flow_label, label = list(ctx.labels.values())[-1] @@ -40,11 +41,11 @@ def repeat_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Typ return repeat_transition_handler -def previous(priority: Optional[float] = None) -> Callable: +def previous(priority: Optional[float] = None) -> Callable[[Context, Pipeline], ConstLabel]: """ - Returns transition handler that takes :py:class:`~dff.script.Context`, - :py:class:`~dff.pipeline.Pipeline` and :py:const:`priority `. - This handler returns a :py:const:`label ` + Returns transition handler that takes :py:class:`~chatsky.script.Context`, + :py:class:`~chatsky.pipeline.Pipeline` and :py:const:`priority `. + This handler returns a :py:const:`label ` to the previous node with a given :py:const:`priority `. If the priority is not given, `Pipeline.actor.label_priority` is used as default. If the current node is the start node, fallback is returned. @@ -52,7 +53,7 @@ def previous(priority: Optional[float] = None) -> Callable: :param priority: Priority of transition. Uses `Pipeline.actor.label_priority` if priority not defined. """ - def previous_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def previous_transition_handler(ctx: Context, pipeline: Pipeline) -> ConstLabel: current_priority = pipeline.actor.label_priority if priority is None else priority if len(ctx.labels) >= 2: flow_label, label = list(ctx.labels.values())[-2] @@ -65,36 +66,36 @@ def previous_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3T return previous_transition_handler -def to_start(priority: Optional[float] = None) -> Callable: +def to_start(priority: Optional[float] = None) -> Callable[[Context, Pipeline], ConstLabel]: """ - Returns transition handler that takes :py:class:`~dff.script.Context`, - :py:class:`~dff.pipeline.Pipeline` and :py:const:`priority `. - This handler returns a :py:const:`label ` + Returns transition handler that takes :py:class:`~chatsky.script.Context`, + :py:class:`~chatsky.pipeline.Pipeline` and :py:const:`priority `. + This handler returns a :py:const:`label ` to the start node with a given :py:const:`priority `. If the priority is not given, `Pipeline.actor.label_priority` is used as default. :param priority: Priority of transition. Uses `Pipeline.actor.label_priority` if priority not defined. """ - def to_start_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def to_start_transition_handler(ctx: Context, pipeline: Pipeline) -> ConstLabel: current_priority = pipeline.actor.label_priority if priority is None else priority return (*pipeline.actor.start_label[:2], current_priority) return to_start_transition_handler -def to_fallback(priority: Optional[float] = None) -> Callable: +def to_fallback(priority: Optional[float] = None) -> Callable[[Context, Pipeline], ConstLabel]: """ - Returns transition handler that takes :py:class:`~dff.script.Context`, - :py:class:`~dff.pipeline.Pipeline` and :py:const:`priority `. - This handler returns a :py:const:`label ` + Returns transition handler that takes :py:class:`~chatsky.script.Context`, + :py:class:`~chatsky.pipeline.Pipeline` and :py:const:`priority `. + This handler returns a :py:const:`label ` to the fallback node with a given :py:const:`priority `. If the priority is not given, `Pipeline.actor.label_priority` is used as default. :param priority: Priority of transition. Uses `Pipeline.actor.label_priority` if priority not defined. """ - def to_fallback_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def to_fallback_transition_handler(ctx: Context, pipeline: Pipeline) -> ConstLabel: current_priority = pipeline.actor.label_priority if priority is None else priority return (*pipeline.actor.fallback_label[:2], current_priority) @@ -107,7 +108,7 @@ def _get_label_by_index_shifting( priority: Optional[float] = None, increment_flag: bool = True, cyclicality_flag: bool = True, -) -> NodeLabel3Type: +) -> ConstLabel: """ Function that returns node label from the context and pipeline after shifting the index. @@ -136,11 +137,13 @@ def _get_label_by_index_shifting( return (flow_label, labels[label_index], current_priority) -def forward(priority: Optional[float] = None, cyclicality_flag: bool = True) -> Callable: +def forward( + priority: Optional[float] = None, cyclicality_flag: bool = True +) -> Callable[[Context, Pipeline], ConstLabel]: """ - Returns transition handler that takes :py:class:`~dff.script.Context`, - :py:class:`~dff.pipeline.Pipeline` and :py:const:`priority `. - This handler returns a :py:const:`label ` + Returns transition handler that takes :py:class:`~chatsky.script.Context`, + :py:class:`~chatsky.pipeline.Pipeline` and :py:const:`priority `. + This handler returns a :py:const:`label ` to the forward node with a given :py:const:`priority ` and :py:const:`cyclicality_flag `. If the priority is not given, `Pipeline.actor.label_priority` is used as default. @@ -149,7 +152,7 @@ def forward(priority: Optional[float] = None, cyclicality_flag: bool = True) -> (e.g the element with `index = len(labels)` has `index = 0`). Defaults to `True`. """ - def forward_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def forward_transition_handler(ctx: Context, pipeline: Pipeline) -> ConstLabel: return _get_label_by_index_shifting( ctx, pipeline, priority, increment_flag=True, cyclicality_flag=cyclicality_flag ) @@ -157,11 +160,13 @@ def forward_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Ty return forward_transition_handler -def backward(priority: Optional[float] = None, cyclicality_flag: bool = True) -> Callable: +def backward( + priority: Optional[float] = None, cyclicality_flag: bool = True +) -> Callable[[Context, Pipeline], ConstLabel]: """ - Returns transition handler that takes :py:class:`~dff.script.Context`, - :py:class:`~dff.pipeline.Pipeline` and :py:const:`priority `. - This handler returns a :py:const:`label ` + Returns transition handler that takes :py:class:`~chatsky.script.Context`, + :py:class:`~chatsky.pipeline.Pipeline` and :py:const:`priority `. + This handler returns a :py:const:`label ` to the backward node with a given :py:const:`priority ` and :py:const:`cyclicality_flag `. If the priority is not given, `Pipeline.actor.label_priority` is used as default. @@ -170,7 +175,7 @@ def backward(priority: Optional[float] = None, cyclicality_flag: bool = True) -> (e.g the element with `index = len(labels)` has `index = 0`). Defaults to `True`. """ - def back_transition_handler(ctx: Context, pipeline: Pipeline) -> NodeLabel3Type: + def back_transition_handler(ctx: Context, pipeline: Pipeline) -> ConstLabel: return _get_label_by_index_shifting( ctx, pipeline, priority, increment_flag=False, cyclicality_flag=cyclicality_flag ) diff --git a/dff/script/responses/__init__.py b/chatsky/script/responses/__init__.py similarity index 100% rename from dff/script/responses/__init__.py rename to chatsky/script/responses/__init__.py diff --git a/dff/script/responses/std_responses.py b/chatsky/script/responses/std_responses.py similarity index 90% rename from dff/script/responses/std_responses.py rename to chatsky/script/responses/std_responses.py index d8c509369..3f5abf39d 100644 --- a/dff/script/responses/std_responses.py +++ b/chatsky/script/responses/std_responses.py @@ -8,11 +8,12 @@ This module provides only one predefined response function that can be used to quickly respond to the user and keep the conversation flowing. """ + import random from typing import List -from dff.pipeline import Pipeline -from dff.script import Context, Message +from chatsky.pipeline import Pipeline +from chatsky.script import Context, Message def choice(*responses): diff --git a/chatsky/slots/__init__.py b/chatsky/slots/__init__.py new file mode 100644 index 000000000..c0a22623c --- /dev/null +++ b/chatsky/slots/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# flake8: noqa: F401 + +from chatsky.slots.slots import GroupSlot, ValueSlot, RegexpSlot, FunctionSlot +from chatsky.slots.conditions import slots_extracted +from chatsky.slots.processing import extract, extract_all, unset, unset_all, fill_template +from chatsky.slots.response import filled_template diff --git a/chatsky/slots/conditions.py b/chatsky/slots/conditions.py new file mode 100644 index 000000000..d2e3f9d33 --- /dev/null +++ b/chatsky/slots/conditions.py @@ -0,0 +1,32 @@ +""" +Conditions +--------------------------- +Provides slot-related conditions. +""" + +from __future__ import annotations +from typing import TYPE_CHECKING, Literal + +if TYPE_CHECKING: + from chatsky.script import Context + from chatsky.slots.slots import SlotName + from chatsky.pipeline import Pipeline + + +def slots_extracted(*slots: SlotName, mode: Literal["any", "all"] = "all"): + """ + Conditions that checks if slots are extracted. + + :param slots: Names for slots that need to be checked. + :param mode: Whether to check if all slots are extracted or any slot is extracted. + """ + + def check_slot_state(ctx: Context, pipeline: Pipeline) -> bool: + manager = ctx.framework_data.slot_manager + if mode == "all": + return all(manager.is_slot_extracted(slot) for slot in slots) + elif mode == "any": + return any(manager.is_slot_extracted(slot) for slot in slots) + raise ValueError(f"{mode!r} not in ['any', 'all'].") + + return check_slot_state diff --git a/chatsky/slots/processing.py b/chatsky/slots/processing.py new file mode 100644 index 000000000..df3df43f9 --- /dev/null +++ b/chatsky/slots/processing.py @@ -0,0 +1,98 @@ +""" +Processing +--------------------------- +This module provides wrappers for :py:class:`~chatsky.slots.slots.SlotManager`'s API. +""" + +from __future__ import annotations + +import logging +from typing import Awaitable, Callable, TYPE_CHECKING + +if TYPE_CHECKING: + from chatsky.slots.slots import SlotName + from chatsky.script import Context + from chatsky.pipeline import Pipeline + +logger = logging.getLogger(__name__) + + +def extract(*slots: SlotName) -> Callable[[Context, Pipeline], Awaitable[None]]: + """ + Extract slots listed slots. + This will override all slots even if they are already extracted. + + :param slots: List of slot names to extract. + """ + + async def inner(ctx: Context, pipeline: Pipeline) -> None: + manager = ctx.framework_data.slot_manager + for slot in slots: # todo: maybe gather + await manager.extract_slot(slot, ctx, pipeline) + + return inner + + +def extract_all(): + """ + Extract all slots defined in the pipeline. + """ + + async def inner(ctx: Context, pipeline: Pipeline): + manager = ctx.framework_data.slot_manager + await manager.extract_all(ctx, pipeline) + + return inner + + +def unset(*slots: SlotName) -> Callable[[Context, Pipeline], None]: + """ + Mark specified slots as not extracted and clear extracted values. + + :param slots: List of slot names to extract. + """ + + def unset_inner(ctx: Context, pipeline: Pipeline) -> None: + manager = ctx.framework_data.slot_manager + for slot in slots: + manager.unset_slot(slot) + + return unset_inner + + +def unset_all(): + """ + Mark all slots as not extracted and clear all extracted values. + """ + + def inner(ctx: Context, pipeline: Pipeline): + manager = ctx.framework_data.slot_manager + manager.unset_all_slots() + + return inner + + +def fill_template() -> Callable[[Context, Pipeline], None]: + """ + Fill the response template in the current node. + + Response message of the current node should be a format-string: e.g. "Your username is {profile.username}". + """ + + def inner(ctx: Context, pipeline: Pipeline) -> None: + manager = ctx.framework_data.slot_manager + # get current node response + response = ctx.current_node.response + + if response is None: + return + + if callable(response): + response = response(ctx, pipeline) + + new_text = manager.fill_template(response.text) + + response.text = new_text + ctx.current_node.response = response + + return inner diff --git a/chatsky/slots/response.py b/chatsky/slots/response.py new file mode 100644 index 000000000..473960704 --- /dev/null +++ b/chatsky/slots/response.py @@ -0,0 +1,34 @@ +""" +Response +--------------------------- +Slot-related Chatsky responses. +""" + +from __future__ import annotations +from typing import Callable, TYPE_CHECKING + +if TYPE_CHECKING: + from chatsky.script import Context, Message + from chatsky.pipeline import Pipeline + + +def filled_template(template: Message) -> Callable[[Context, Pipeline], Message]: + """ + Fill template with slot values. + The `text` attribute of the template message should be a format-string: + e.g. "Your username is {profile.username}". + + For the example above, if ``profile.username`` slot has value "admin", + it would return a copy of the message with the following text: + "Your username is admin". + + :param template: Template message with a format-string text. + """ + + def fill_inner(ctx: Context, pipeline: Pipeline) -> Message: + message = template.model_copy() + new_text = ctx.framework_data.slot_manager.fill_template(template.text) + message.text = new_text + return message + + return fill_inner diff --git a/chatsky/slots/slots.py b/chatsky/slots/slots.py new file mode 100644 index 000000000..29dc44b9a --- /dev/null +++ b/chatsky/slots/slots.py @@ -0,0 +1,418 @@ +""" +Slots +----- +This module defines base classes for slots and some concrete implementations of them. +""" + +from __future__ import annotations + +import asyncio +import re +from abc import ABC, abstractmethod +from typing import Callable, Any, Awaitable, TYPE_CHECKING, Union +from typing_extensions import TypeAlias +import logging +from functools import reduce + +from pydantic import BaseModel, model_validator, Field + +from chatsky.utils.devel.async_helpers import wrap_sync_function_in_async +from chatsky.utils.devel.json_serialization import PickleEncodedValue + +if TYPE_CHECKING: + from chatsky.script import Context, Message + from chatsky.pipeline.pipeline.pipeline import Pipeline + + +logger = logging.getLogger(__name__) + + +SlotName: TypeAlias = str +""" +A string to identify slots. + +Top-level slots are identified by their key in a :py:class:`~.GroupSlot`. + +E.g. + +.. code:: python + + GroupSlot( + user=RegexpSlot(), + password=FunctionSlot, + ) + +Has two slots with names "user" and "password". + +For nested group slots use dots to separate names: + +.. code:: python + + GroupSlot( + user=GroupSlot( + name=FunctionSlot, + password=FunctionSlot, + ) + ) + +Has two slots with names "user.name" and "user.password". +""" + + +def recursive_getattr(obj, slot_name: SlotName): + def two_arg_getattr(__o, name): + # pydantic handles exception when accessing a non-existing extra-field on its own + # return None by default to avoid that + return getattr(__o, name, None) + + return reduce(two_arg_getattr, [obj, *slot_name.split(".")]) + + +def recursive_setattr(obj, slot_name: SlotName, value): + parent_slot, _, slot = slot_name.rpartition(".") + + if parent_slot: + setattr(recursive_getattr(obj, parent_slot), slot, value) + else: + setattr(obj, slot, value) + + +class SlotNotExtracted(Exception): + """This exception can be returned or raised by slot extractor if slot extraction is unsuccessful.""" + + pass + + +class ExtractedSlot(BaseModel, ABC): + """ + Represents value of an extracted slot. + + Instances of this class are managed by framework and + are stored in :py:attr:`~chatsky.script.core.context.FrameworkData.slot_manager`. + They can be accessed via the ``ctx.framework_data.slot_manager.get_extracted_slot`` method. + """ + + @property + @abstractmethod + def __slot_extracted__(self) -> bool: + """Whether the slot is extracted.""" + raise NotImplementedError + + def __unset__(self): + """Mark slot as not extracted and clear extracted data (except for default value).""" + raise NotImplementedError + + @abstractmethod + def __str__(self): + """String representation is used to fill templates.""" + raise NotImplementedError + + +class ExtractedValueSlot(ExtractedSlot): + """Value extracted from :py:class:`~.ValueSlot`.""" + + is_slot_extracted: bool + extracted_value: PickleEncodedValue + default_value: PickleEncodedValue = None + + @property + def __slot_extracted__(self) -> bool: + return self.is_slot_extracted + + def __unset__(self): + self.is_slot_extracted = False + self.extracted_value = SlotNotExtracted("Slot manually unset.") + + @property + def value(self): + """Extracted value or the default value if the slot is not extracted.""" + return self.extracted_value if self.is_slot_extracted else self.default_value + + def __str__(self): + return str(self.value) + + +class ExtractedGroupSlot(ExtractedSlot, extra="allow"): + __pydantic_extra__: dict[str, Union["ExtractedValueSlot", "ExtractedGroupSlot"]] + + @property + def __slot_extracted__(self) -> bool: + return all([slot.__slot_extracted__ for slot in self.__pydantic_extra__.values()]) + + def __unset__(self): + for child in self.__pydantic_extra__.values(): + child.__unset__() + + def __str__(self): + return str({key: str(value) for key, value in self.__pydantic_extra__.items()}) + + def update(self, old: "ExtractedGroupSlot"): + """ + Rebase this extracted groups slot on top of another one. + This is required to merge slot storage in-context + with a potentially different slot configuration passed to pipeline. + + :param old: An instance of :py:class:`~.ExtractedGroupSlot` stored in-context. + Extracted values will be transferred to this object. + """ + for slot in old.__pydantic_extra__: + if slot in self.__pydantic_extra__: + new_slot = self.__pydantic_extra__[slot] + old_slot = old.__pydantic_extra__[slot] + if isinstance(new_slot, ExtractedGroupSlot) and isinstance(old_slot, ExtractedGroupSlot): + new_slot.update(old_slot) + if isinstance(new_slot, ExtractedValueSlot) and isinstance(old_slot, ExtractedValueSlot): + self.__pydantic_extra__[slot] = old_slot + + +class BaseSlot(BaseModel, frozen=True): + """ + BaseSlot is a base class for all slots. + """ + + @abstractmethod + async def get_value(self, ctx: Context, pipeline: Pipeline) -> ExtractedSlot: + """ + Extract slot value from :py:class:`~.Context` and return an instance of :py:class:`~.ExtractedSlot`. + """ + raise NotImplementedError + + @abstractmethod + def init_value(self) -> ExtractedSlot: + """ + Provide an initial value to fill slot storage with. + """ + raise NotImplementedError + + +class ValueSlot(BaseSlot, frozen=True): + """ + Value slot is a base class for all slots that are designed to extract concrete values. + Subclass it, if you want to declare your own slot type. + """ + + default_value: Any = None + + @abstractmethod + async def extract_value(self, ctx: Context, pipeline: Pipeline) -> Union[Any, SlotNotExtracted]: + """ + Return value extracted from context. + + Return :py:exc:`~.SlotNotExtracted` to mark extraction as unsuccessful. + + Raising exceptions is also allowed and will result in an unsuccessful extraction as well. + """ + raise NotImplementedError + + async def get_value(self, ctx: Context, pipeline: Pipeline) -> ExtractedValueSlot: + """Wrapper for :py:meth:`~.ValueSlot.extract_value` to handle exceptions.""" + extracted_value = SlotNotExtracted("Caught an exit exception.") + is_slot_extracted = False + + try: + extracted_value = await self.extract_value(ctx, pipeline) + is_slot_extracted = not isinstance(extracted_value, SlotNotExtracted) + except Exception as error: + logger.exception(f"Exception occurred during {self.__class__.__name__!r} extraction.", exc_info=error) + extracted_value = error + finally: + return ExtractedValueSlot.model_construct( + is_slot_extracted=is_slot_extracted, + extracted_value=extracted_value, + default_value=self.default_value, + ) + + def init_value(self) -> ExtractedValueSlot: + return ExtractedValueSlot.model_construct( + is_slot_extracted=False, + extracted_value=SlotNotExtracted("Initial slot extraction."), + default_value=self.default_value, + ) + + +class GroupSlot(BaseSlot, extra="allow", frozen=True): + """ + Base class for :py:class:`~.RootSlot` and :py:class:`~.GroupSlot`. + """ + + __pydantic_extra__: dict[str, Union["ValueSlot", "GroupSlot"]] + + def __init__(self, **kwargs): # supress unexpected argument warnings + super().__init__(**kwargs) + + @model_validator(mode="after") + def __check_extra_field_names__(self): + """ + Extra field names cannot be dunder names or contain dots. + """ + for field in self.__pydantic_extra__.keys(): + if "." in field: + raise ValueError(f"Extra field name cannot contain dots: {field!r}") + if field.startswith("__") and field.endswith("__"): + raise ValueError(f"Extra field names cannot be dunder: {field!r}") + return self + + async def get_value(self, ctx: Context, pipeline: Pipeline) -> ExtractedGroupSlot: + child_values = await asyncio.gather( + *(child.get_value(ctx, pipeline) for child in self.__pydantic_extra__.values()) + ) + return ExtractedGroupSlot( + **{child_name: child_value for child_value, child_name in zip(child_values, self.__pydantic_extra__.keys())} + ) + + def init_value(self) -> ExtractedGroupSlot: + return ExtractedGroupSlot( + **{child_name: child.init_value() for child_name, child in self.__pydantic_extra__.items()} + ) + + +class RegexpSlot(ValueSlot, frozen=True): + """ + RegexpSlot is a slot type that extracts its value using a regular expression. + You can pass a compiled or a non-compiled pattern to the `regexp` argument. + If you want to extract a particular group, but not the full match, + change the `match_group_idx` parameter. + """ + + regexp: str + match_group_idx: int = 0 + "Index of the group to match." + + async def extract_value(self, ctx: Context, _: Pipeline) -> Union[str, SlotNotExtracted]: + request_text = ctx.last_request.text + search = re.search(self.regexp, request_text) + return ( + search.group(self.match_group_idx) + if search + else SlotNotExtracted(f"Failed to match pattern {self.regexp!r} in {request_text!r}.") + ) + + +class FunctionSlot(ValueSlot, frozen=True): + """ + A simpler version of :py:class:`~.ValueSlot`. + + Uses a user-defined `func` to extract slot value from the :py:attr:`~.Context.last_request` Message. + """ + + func: Callable[[Message], Union[Awaitable[Union[Any, SlotNotExtracted]], Any, SlotNotExtracted]] + + async def extract_value(self, ctx: Context, _: Pipeline) -> Union[Any, SlotNotExtracted]: + return await wrap_sync_function_in_async(self.func, ctx.last_request) + + +class SlotManager(BaseModel): + """ + Provides API for managing slots. + + An instance of this class can be accessed via ``ctx.framework_data.slot_manager``. + """ + + slot_storage: ExtractedGroupSlot = Field(default_factory=ExtractedGroupSlot) + """Slot storage. Stored inside ctx.framework_data.""" + root_slot: GroupSlot = Field(default_factory=GroupSlot, exclude=True) + """Slot configuration passed during pipeline initialization.""" + + def set_root_slot(self, root_slot: GroupSlot): + """ + Set root_slot configuration from pipeline. + Update extracted slots with the new configuration: + + New slots are added with their :py:meth:`~.BaseSlot.init_value`. + Old extracted slot values are preserved only if their configuration did not change. + That is if they are still present in the config and if their fundamental type did not change + (i.e. `GroupSlot` did not turn into a `ValueSlot` or vice versa). + + This method is called by pipeline and is not supposed to be used otherwise. + """ + self.root_slot = root_slot + new_slot_storage = root_slot.init_value() + new_slot_storage.update(self.slot_storage) + self.slot_storage = new_slot_storage + + def get_slot(self, slot_name: SlotName) -> BaseSlot: + """ + Get slot configuration from the slot name. + + :raises KeyError: If the slot with the specified name does not exist. + """ + try: + slot = recursive_getattr(self.root_slot, slot_name) + if isinstance(slot, BaseSlot): + return slot + except (AttributeError, KeyError): + pass + raise KeyError(f"Could not find slot {slot_name!r}.") + + async def extract_slot(self, slot_name: SlotName, ctx: Context, pipeline: Pipeline) -> None: + """ + Extract slot `slot_name` and store extracted value in `slot_storage`. + + :raises KeyError: If the slot with the specified name does not exist. + """ + slot = self.get_slot(slot_name) + value = await slot.get_value(ctx, pipeline) + + recursive_setattr(self.slot_storage, slot_name, value) + + async def extract_all(self, ctx: Context, pipeline: Pipeline): + """ + Extract all slots from slot configuration `root_slot` and set `slot_storage` to the extracted value. + """ + self.slot_storage = await self.root_slot.get_value(ctx, pipeline) + + def get_extracted_slot(self, slot_name: SlotName) -> ExtractedSlot: + """ + Retrieve extracted value from `slot_storage`. + + :raises KeyError: If the slot with the specified name does not exist. + """ + try: + slot = recursive_getattr(self.slot_storage, slot_name) + if isinstance(slot, ExtractedSlot): + return slot + except (AttributeError, KeyError): + pass + raise KeyError(f"Could not find slot {slot_name!r}.") + + def is_slot_extracted(self, slot_name: str) -> bool: + """ + Return if the specified slot is extracted. + + :raises KeyError: If the slot with the specified name does not exist. + """ + return self.get_extracted_slot(slot_name).__slot_extracted__ + + def all_slots_extracted(self) -> bool: + """ + Return if all slots are extracted. + """ + return self.slot_storage.__slot_extracted__ + + def unset_slot(self, slot_name: SlotName) -> None: + """ + Mark specified slot as not extracted and clear extracted value. + + :raises KeyError: If the slot with the specified name does not exist. + """ + self.get_extracted_slot(slot_name).__unset__() + + def unset_all_slots(self) -> None: + """ + Mark all slots as not extracted and clear all extracted values. + """ + self.slot_storage.__unset__() + + def fill_template(self, template: str) -> str: + """ + Fill `template` string with extracted slot values and return a formatted string. + + `template` should be a format-string: + + E.g. "Your username is {profile.username}". + + For the example above, if ``profile.username`` slot has value "admin", + it would return the following text: + "Your username is admin". + """ + return template.format(**dict(self.slot_storage.__pydantic_extra__.items())) diff --git a/dff/stats/__init__.py b/chatsky/stats/__init__.py similarity index 83% rename from dff/stats/__init__.py rename to chatsky/stats/__init__.py index a93d7dbf8..0ef77234e 100644 --- a/dff/stats/__init__.py +++ b/chatsky/stats/__init__.py @@ -4,5 +4,5 @@ from opentelemetry.sdk.trace.export import ConsoleSpanExporter from opentelemetry.sdk._logs.export import InMemoryLogExporter, ConsoleLogExporter from opentelemetry.sdk.metrics.export import InMemoryMetricReader, ConsoleMetricExporter -from .utils import get_wrapper_field, set_logger_destination, set_tracer_destination +from .utils import get_extra_handler_name, set_logger_destination, set_tracer_destination from .instrumentor import OtelInstrumentor, OTLPMetricExporter, OTLPLogExporter, OTLPSpanExporter diff --git a/dff/stats/__main__.py b/chatsky/stats/__main__.py similarity index 98% rename from dff/stats/__main__.py rename to chatsky/stats/__main__.py index 92ca42e87..e22220119 100644 --- a/dff/stats/__main__.py +++ b/chatsky/stats/__main__.py @@ -13,7 +13,7 @@ # Create and import a configuration archive. # The import overrides existing dashboard configurations. - dff.stats config.yaml \\ + chatsky.stats config.yaml \\ -U superset_user \\ -P superset_password \\ -dP database_password \\ @@ -25,6 +25,7 @@ --outfile=config_artifact.zip """ + import sys import argparse from typing import Optional @@ -46,7 +47,7 @@ def main(parsed_args: Optional[argparse.Namespace] = None): The function accepts a yaml file; also, all of the options can also be overridden via the command line. Setting passwords interactively is supported. - dff.stats config.yaml \\ + chatsky.stats config.yaml \\ -U superset_user \\ -P superset_password \\ -dP database_password \\ diff --git a/dff/stats/cli.py b/chatsky/stats/cli.py similarity index 93% rename from dff/stats/cli.py rename to chatsky/stats/cli.py index 2b881b00d..ede565cb6 100644 --- a/dff/stats/cli.py +++ b/chatsky/stats/cli.py @@ -4,6 +4,7 @@ This modules defines commands that can be called via the command line interface. """ + from uuid import uuid4 import tempfile import shutil @@ -19,26 +20,26 @@ from omegaconf import OmegaConf from .utils import get_superset_session, drop_superset_assets except ImportError: - raise ImportError("Some packages are not found. Run `pip install dff[stats]`") + raise ImportError("Some packages are not found. Run `pip install chatsky[stats]`") logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -DFF_DIR = Path(__file__).absolute().parent.parent +CHATSKY_DIR = Path(__file__).absolute().parent.parent """ -Root directory of the local `dff` installation. +Root directory of the local `chatsky` installation. :meta hide-value: """ -DASHBOARD_DIR = str(DFF_DIR / "config" / "superset_dashboard") +DASHBOARD_DIR = str(CHATSKY_DIR / "config" / "superset_dashboard") """ Local path to superset dashboard files to import. :meta hide-value: """ -DASHBOARD_SLUG = "dff-stats" +DASHBOARD_SLUG = "chatsky-stats" """ -This variable stores a slug used for building the http address of the DFF dashboard. +This variable stores a slug used for building the http address of the Chatsky dashboard. """ DEFAULT_SUPERSET_URL = parse.urlunsplit(("http", "localhost:8088", "/", "", "")) """ @@ -58,7 +59,7 @@ :meta hide-value: """ -DFF_NODE_STATS_STATEMENT = """ +CHATSKY_NODE_STATS_STATEMENT = """ WITH main AS ( SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id, toUInt64OrNull({table}.LogAttributes['request_id']) as request_id, @@ -84,7 +85,7 @@ node_label FROM main """ -DFF_STATS_STATEMENT = """ +CHATSKY_STATS_STATEMENT = """ WITH main AS ( SELECT DISTINCT {table}.LogAttributes['context_id'] as context_id, toUInt64OrNull({table}.LogAttributes['request_id']) as request_id, @@ -110,7 +111,7 @@ node_label FROM main """ -DFF_FINAL_NODES_STATEMENT = """ +CHATSKY_FINAL_NODES_STATEMENT = """ WITH main AS ( SELECT LogAttributes['context_id'] AS context_id, max(toUInt64OrNull(LogAttributes['request_id'])) AS max_history @@ -132,9 +133,9 @@ """ SQL_STATEMENT_MAPPING = { - "dff_stats.yaml": DFF_STATS_STATEMENT, - "dff_node_stats.yaml": DFF_NODE_STATS_STATEMENT, - "dff_final_nodes.yaml": DFF_FINAL_NODES_STATEMENT, + "chatsky_stats.yaml": CHATSKY_STATS_STATEMENT, + "chatsky_node_stats.yaml": CHATSKY_NODE_STATS_STATEMENT, + "chatsky_final_nodes.yaml": CHATSKY_FINAL_NODES_STATEMENT, } """ Select statements for dashboard configuration with names and types represented as placeholders. @@ -170,7 +171,7 @@ def import_dashboard(parsed_args: Optional[argparse.Namespace] = None, zip_file: import_dashboard_url, headers=headers, data={ - "passwords": '{"databases/dff_database.yaml":"' + db_password + '"}', + "passwords": '{"databases/chatsky_database.yaml":"' + db_password + '"}', "overwrite": "true", }, files=[("formData", (zip_filename, f, "application/zip"))], @@ -235,7 +236,7 @@ def make_zip_config(parsed_args: argparse.Namespace) -> Path: shutil.copytree(DASHBOARD_DIR, nested_temp_dir) database_dir = Path(os.path.join(nested_temp_dir, "databases")) - dataset_dir = Path(os.path.join(nested_temp_dir, "datasets/dff_database")) + dataset_dir = Path(os.path.join(nested_temp_dir, "datasets/chatsky_database")) logger.info("Overriding the initial configuration.") # overwrite sqlalchemy uri diff --git a/dff/stats/default_extractors.py b/chatsky/stats/default_extractors.py similarity index 75% rename from dff/stats/default_extractors.py rename to chatsky/stats/default_extractors.py index f90a3e3ad..e390148f5 100644 --- a/dff/stats/default_extractors.py +++ b/chatsky/stats/default_extractors.py @@ -10,11 +10,12 @@ Detailed examples can be found in the `tutorials` section. """ + from datetime import datetime -from dff.script import Context -from dff.pipeline import ExtraHandlerRuntimeInfo, Pipeline -from .utils import get_wrapper_field +from chatsky.script import Context +from chatsky.pipeline import ExtraHandlerRuntimeInfo, Pipeline +from .utils import get_extra_handler_name async def get_current_label(ctx: Context, pipeline: Pipeline, info: ExtraHandlerRuntimeInfo): @@ -35,27 +36,22 @@ async def get_current_label(ctx: Context, pipeline: Pipeline, info: ExtraHandler async def get_timing_before(ctx: Context, _, info: ExtraHandlerRuntimeInfo): """ - Extract the pipeline component's start time. + Store the pipeline component's start time inside the context. This function is required for running the dashboard with the default configuration. - - The function leverages the `framework_states` field of the context to store results. - As a result, the function output is cleared on every turn and does not get persisted - to the context storage. """ start_time = datetime.now() - ctx.framework_states[get_wrapper_field(info, "time")] = start_time + ctx.framework_data.stats[get_extra_handler_name(info, "time")] = start_time async def get_timing_after(ctx: Context, _, info: ExtraHandlerRuntimeInfo): # noqa: F811 """ - Extract the pipeline component's finish time. + Extract the pipeline component's execution time. + Requires :py:func:`~.get_timing_before` to be called previously in order to calculate the time. This function is required for running the dashboard with the default configuration. - - The function leverages the `framework_states` field of the context to store results. - As a result, the function output is cleared on every turn and does not get persisted - to the context storage. """ - start_time = ctx.framework_states[get_wrapper_field(info, "time")] + start_time = ctx.framework_data.stats.pop(get_extra_handler_name(info, "time"), None) + if start_time is None: + return None data = {"execution_time": str(datetime.now() - start_time)} return data diff --git a/dff/stats/instrumentor.py b/chatsky/stats/instrumentor.py similarity index 92% rename from dff/stats/instrumentor.py rename to chatsky/stats/instrumentor.py index 41f7c2ca5..a395c7b4c 100644 --- a/dff/stats/instrumentor.py +++ b/chatsky/stats/instrumentor.py @@ -3,11 +3,12 @@ ------------- This modules contains the :py:class:`~OtelInstrumentor` class that implements Opentelemetry's `BaseInstrumentor` interface and allows for automated -instrumentation of Dialog Flow Framework applications, +instrumentation of Chatsky applications, e.g. for automated logging and log export. For detailed reference, see `~OtelInstrumentor` class. """ + import asyncio from typing import Collection, Optional @@ -25,23 +26,23 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter -from dff.script.core.context import get_last_index -from dff.stats.utils import ( +from chatsky.script.core.context import get_last_index +from chatsky.stats.utils import ( resource, - get_wrapper_field, + get_extra_handler_name, set_logger_destination, set_meter_destination, set_tracer_destination, ) -from dff.stats import default_extractors +from chatsky.stats import default_extractors -INSTRUMENTS = ["dff"] +INSTRUMENTS = ["chatsky"] class OtelInstrumentor(BaseInstrumentor): """ - Utility class for instrumenting DFF-related functions + Utility class for instrumenting Chatsky-related functions that implements the :py:class:`~BaseInstrumentor` interface. :py:meth:`~instrument` and :py:meth:`~uninstrument` methods are available to apply and revert the instrumentation effects, @@ -49,9 +50,9 @@ class OtelInstrumentor(BaseInstrumentor): .. code-block:: - dff_instrumentor = OtelInstrumentor() - dff_instrumentor.instrument() - dff_instrumentor.uninstrument() + chatsky_instrumentor = OtelInstrumentor() + chatsky_instrumentor.instrument() + chatsky_instrumentor.uninstrument() Opentelemetry provider instances can be optionally passed to the class constructor. Otherwise, the global logger, tracer and meter providers are leveraged. @@ -61,7 +62,7 @@ class OtelInstrumentor(BaseInstrumentor): .. code-block:: - @dff_instrumentor + @chatsky_instrumentor async def function(context, pipeline, runtime_info): ... @@ -144,7 +145,7 @@ def _configure_providers(self, logger_provider, tracer_provider, meter_provider) @decorator async def __call__(self, wrapped, _, args, kwargs): """ - Regular functions that match the :py:class:`~dff.pipeline.types.ExtraHandlerFunction` + Regular functions that match the :py:class:`~chatsky.pipeline.types.ExtraHandlerFunction` signature can be decorated with the class instance to log the returned value. This method implements the logging procedure. The returned value is assumed to be `dict` or `NoneType`. @@ -157,7 +158,7 @@ async def __call__(self, wrapped, _, args, kwargs): :param kwargs: Keyword arguments of the decorated function. """ ctx, _, info = args - pipeline_component = get_wrapper_field(info) + pipeline_component = get_extra_handler_name(info) attributes = { "context_id": str(ctx.id), "request_id": get_last_index(ctx.requests), diff --git a/dff/stats/utils.py b/chatsky/stats/utils.py similarity index 96% rename from dff/stats/utils.py rename to chatsky/stats/utils.py index c4b3c472a..51ac9ad4d 100644 --- a/dff/stats/utils.py +++ b/chatsky/stats/utils.py @@ -11,6 +11,7 @@ set_tracer_destination(OTLPSpanExporter("grpc://localhost:4317", insecure=True)) """ + import json import getpass from urllib import parse @@ -32,9 +33,9 @@ from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter, LogExporter from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter, MetricExporter -from dff.pipeline import ExtraHandlerRuntimeInfo +from chatsky.pipeline import ExtraHandlerRuntimeInfo -SERVICE_NAME = "dialog_flow_framework" +SERVICE_NAME = "chatsky" resource = Resource.create({"service.name": SERVICE_NAME}) """ @@ -42,11 +43,11 @@ """ tracer_provider = TracerProvider(resource=resource) """ -Global tracer provider bound to the DFF resource. +Global tracer provider bound to the Chatsky resource. """ logger_provider = LoggerProvider(resource=resource) """ -Global logger provider bound to the DFF resource. +Global logger provider bound to the Chatsky resource. """ set_logger_provider(logger_provider) set_tracer_provider(tracer_provider) @@ -88,7 +89,7 @@ def set_tracer_destination(exporter: Optional[SpanExporter] = None): get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter)) -def get_wrapper_field(info: ExtraHandlerRuntimeInfo, postfix: str = "") -> str: +def get_extra_handler_name(info: ExtraHandlerRuntimeInfo, postfix: str = "") -> str: """ This function can be used to obtain a key, under which the wrapper data will be stored in the context. @@ -96,6 +97,7 @@ def get_wrapper_field(info: ExtraHandlerRuntimeInfo, postfix: str = "") -> str: :param info: Handler runtime info obtained from the pipeline. :param postfix: Field-specific postfix that will be appended to the field name. """ + path = info.component.path.replace(".", "-") return f"{path}" + (f"-{postfix}" if postfix else "") diff --git a/dff/utils/__init__.py b/chatsky/utils/__init__.py similarity index 100% rename from dff/utils/__init__.py rename to chatsky/utils/__init__.py diff --git a/chatsky/utils/db_benchmark/__init__.py b/chatsky/utils/db_benchmark/__init__.py new file mode 100644 index 000000000..9b464f464 --- /dev/null +++ b/chatsky/utils/db_benchmark/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +from chatsky.utils.db_benchmark.benchmark import ( + time_context_read_write, + DBFactory, + BenchmarkConfig, + BenchmarkCase, + save_results_to_file, + benchmark_all, +) +from chatsky.utils.db_benchmark.report import report +from chatsky.utils.db_benchmark.basic_config import BasicBenchmarkConfig, basic_configurations diff --git a/dff/utils/db_benchmark/basic_config.py b/chatsky/utils/db_benchmark/basic_config.py similarity index 98% rename from dff/utils/db_benchmark/basic_config.py rename to chatsky/utils/db_benchmark/basic_config.py index 2e84f6b80..11e744dd0 100644 --- a/dff/utils/db_benchmark/basic_config.py +++ b/chatsky/utils/db_benchmark/basic_config.py @@ -7,6 +7,7 @@ as well as a set of configurations that covers different dialogs a user might have and some edge-cases (:py:data:`~.basic_configurations`). """ + from typing import Tuple, Optional import string import random @@ -14,8 +15,8 @@ from humanize import naturalsize from pympler import asizeof -from dff.script import Message, Context -from dff.utils.db_benchmark.benchmark import BenchmarkConfig +from chatsky.script import Message, Context +from chatsky.utils.db_benchmark.benchmark import BenchmarkConfig def get_dict(dimensions: Tuple[int, ...]): diff --git a/dff/utils/db_benchmark/benchmark.py b/chatsky/utils/db_benchmark/benchmark.py similarity index 97% rename from dff/utils/db_benchmark/benchmark.py rename to chatsky/utils/db_benchmark/benchmark.py index 12e784fb2..f1132d283 100644 --- a/dff/utils/db_benchmark/benchmark.py +++ b/chatsky/utils/db_benchmark/benchmark.py @@ -12,12 +12,13 @@ Wrappers use :py:class:`~.BenchmarkConfig` interface to configure benchmarks. A simple configuration class as well as a configuration set are provided by -:py:mod:`dff.utils.db_benchmark.basic_config`. +:py:mod:`chatsky.utils.db_benchmark.basic_config`. To view files generated by :py:func:`~.save_results_to_file` use either -:py:func:`~dff.utils.db_benchmark.report.report` or +:py:func:`~chatsky.utils.db_benchmark.report.report` or `our streamlit app <../_misc/benchmark_streamlit.py>`_. """ + from uuid import uuid4 from pathlib import Path from time import perf_counter @@ -31,8 +32,8 @@ from pydantic import BaseModel, Field from tqdm.auto import tqdm -from dff.context_storages import DBContextStorage -from dff.script import Context +from chatsky.context_storages import DBContextStorage +from chatsky.script import Context def time_context_read_write( @@ -59,7 +60,7 @@ def time_context_read_write( (to emulate context updating during dialog). The function should return `None` to stop updating contexts. For an example of such function, see implementation of - :py:meth:`dff.utils.db_benchmark.basic_config.BasicBenchmarkConfig.context_updater`. + :py:meth:`chatsky.utils.db_benchmark.basic_config.BasicBenchmarkConfig.context_updater`. To avoid keeping many contexts in memory, this function will be called repeatedly at least `context_num` times. @@ -133,7 +134,7 @@ class DBFactory(BaseModel): uri: str """URI of the context storage.""" - factory_module: str = "dff.context_storages" + factory_module: str = "chatsky.context_storages" """A module containing `factory`.""" factory: str = "context_storage_factory" """Name of the context storage factory. (function that creates context storages from URIs)""" @@ -154,7 +155,7 @@ class BenchmarkConfig(BaseModel, abc.ABC, frozen=True): Also defines a method (`info`) for displaying information about this configuration. A simple way to configure benchmarks is provided by - :py:class:`~.dff.utils.db_benchmark.basic_config.BasicBenchmarkConfig`. + :py:class:`~.chatsky.utils.db_benchmark.basic_config.BasicBenchmarkConfig`. Inherit from this class only if `BasicBenchmarkConfig` is not enough for your benchmarking needs. """ @@ -343,7 +344,7 @@ def save_results_to_file( Result are saved in json format with this schema: `utils/db_benchmark/benchmark_schema.json <../_misc/benchmark_schema.json>`_. - Files created by this function cen be viewed either by using :py:func:`~dff.utils.db_benchmark.report.report` or + Files created by this function cen be viewed either by using :py:func:`~chatsky.utils.db_benchmark.report.report` or streamlit app located in the utils directory: `utils/db_benchmark/benchmark_streamlit.py <../_misc/benchmark_streamlit.py>`_. diff --git a/dff/utils/db_benchmark/report.py b/chatsky/utils/db_benchmark/report.py similarity index 85% rename from dff/utils/db_benchmark/report.py rename to chatsky/utils/db_benchmark/report.py index e65778966..1bccf777a 100644 --- a/dff/utils/db_benchmark/report.py +++ b/chatsky/utils/db_benchmark/report.py @@ -3,6 +3,7 @@ -------- This method contains a function to print benchmark results to console. """ + from pathlib import Path from typing import Union, Set, Literal import json @@ -20,7 +21,7 @@ def report( :param file: File with benchmark results generated by - :py:func:`~dff.utils.db_benchmark.benchmark.save_results_to_file`. + :py:func:`~chatsky.utils.db_benchmark.benchmark.save_results_to_file`. :param display: A set of objects to display in results. Values allowed inside the set: @@ -44,9 +45,11 @@ def report( "config": "\n".join(f"{k}: {v}" for k, v in benchmark["benchmark_config"].items()), "metrics": "".join( [ - f"{metric.title() + ': ' + str(benchmark['average_results']['pretty_' + metric]):20}" - if benchmark["success"] - else benchmark["result"] + ( + f"{metric.title() + ': ' + str(benchmark['average_results']['pretty_' + metric]):20}" + if benchmark["success"] + else benchmark["result"] + ) for metric in ("write", "read", "update", "read+update") ] ), diff --git a/chatsky/utils/devel/__init__.py b/chatsky/utils/devel/__init__.py new file mode 100644 index 000000000..affbce004 --- /dev/null +++ b/chatsky/utils/devel/__init__.py @@ -0,0 +1,14 @@ +""" +Devel Utils +----------- +These utils contain useful classes/functions that are often used in various +parts of the framework. +""" + +from .json_serialization import ( + JSONSerializableDict, + PickleEncodedValue, + JSONSerializableExtras, +) +from .extra_field_helpers import grab_extra_fields +from .async_helpers import wrap_sync_function_in_async diff --git a/chatsky/utils/devel/async_helpers.py b/chatsky/utils/devel/async_helpers.py new file mode 100644 index 000000000..13cbc640b --- /dev/null +++ b/chatsky/utils/devel/async_helpers.py @@ -0,0 +1,24 @@ +""" +Async Helpers +------------- +Tools to help with async. +""" + +import asyncio +from typing import Callable, Any + + +async def wrap_sync_function_in_async(func: Callable, *args, **kwargs) -> Any: + """ + Utility function, that wraps both functions and coroutines in coroutines. + Invokes `func` if it is just a callable and awaits, if this is a coroutine. + + :param func: Callable to wrap. + :param \\*args: Function args. + :param \\**kwargs: Function kwargs. + :return: What function returns. + """ + if asyncio.iscoroutinefunction(func): + return await func(*args, **kwargs) + else: + return func(*args, **kwargs) diff --git a/chatsky/utils/devel/extra_field_helpers.py b/chatsky/utils/devel/extra_field_helpers.py new file mode 100644 index 000000000..13f457d27 --- /dev/null +++ b/chatsky/utils/devel/extra_field_helpers.py @@ -0,0 +1,22 @@ +""" +Extra field helpers +------------------- +Helpers for managing pydantic extra fields. +""" + +from typing import List + +from pydantic import BaseModel + + +def grab_extra_fields(attachment: BaseModel, extra_fields: List[str]): + """ + Convenience method for passing attachment extras as named arguments to API functions. + This might be useful for making sure no typos appear in code. + Accepts a list of extra names and makes a dictionary of extras mathing these names. + + :param attachment: attachment whose extras will be used. + :param extra_fields: list of extras that will be used. + """ + + return {extra_field: attachment.__pydantic_extra__.get(extra_field, None) for extra_field in extra_fields} diff --git a/chatsky/utils/devel/json_serialization.py b/chatsky/utils/devel/json_serialization.py new file mode 100644 index 000000000..f198dc47c --- /dev/null +++ b/chatsky/utils/devel/json_serialization.py @@ -0,0 +1,193 @@ +""" +Serialization +------------- +Tools that provide JSON serialization via Pickle for unserializable objects. + +- :py:data:`~.PickleEncodedValue`: + A field annotated with this will be pickled/unpickled during JSON-serialization/validation. +- :py:data:`~.JSONSerializableDict`: + A dictionary field annotated with this will make all its items smart-serializable: + If an item is serializable -- nothing would change. + Otherwise -- it will be serialized via pickle. +- :py:class:`~.JSONSerializableExtras`: + A pydantic base class that makes its extra fields a `JSONSerializableDict`. +""" + +from base64 import decodebytes, encodebytes +from copy import deepcopy +from pickle import dumps, loads +from typing import Any, Dict, List, Union +from typing_extensions import Annotated, TypeAlias +from pydantic import ( + JsonValue, + PlainSerializer, + PlainValidator, + RootModel, + BaseModel, + model_validator, + model_serializer, +) +from pydantic_core import PydanticSerializationError + +_JSON_EXTRA_FIELDS_KEYS = "__pickled_extra_fields__" +""" +This key is used in :py:data:`~.JSONSerializableDict` to remember pickled items. +""" + +Serializable: TypeAlias = Dict[str, Union[JsonValue, List[Any], Dict[str, Any], Any]] +"""Type annotation for objects supported by :py:func:`~.json_pickle_serializer`.""" + + +class _WrapperModel(RootModel): + """ + Wrapper model for testing whether an object is serializable to JSON. + """ + + root: Any + + +def pickle_serializer(value: Any) -> str: + """ + Serializer function that serializes any pickle-serializable value into JSON-serializable. + Serializes value with pickle and encodes bytes as base64 string. + + :param value: Pickle-serializable object. + :return: String-encoded object. + """ + + return encodebytes(dumps(value)).decode() + + +def pickle_validator(value: str) -> Any: + """ + Validator function that validates base64 string encoded bytes as a pickle-serializable value. + Decodes base64 string and validates value with pickle. + + :param value: String-encoded string. + :return: Pickle-serializable object. + """ + + return loads(decodebytes(value.encode())) + + +def json_pickle_serializer(model: Serializable) -> Serializable: + """ + Serializer function that serializes a dictionary or Pydantic object to JSON. + For every object field, it checks whether the field is JSON serializable, + and if it's not, serializes it using pickle. + It also keeps track of pickle-serializable field names in a special list. + + :param model: Pydantic model object or a dictionary. + :original_serializer: Original serializer function for model. + :return: model with all the fields serialized to JSON. + """ + + extra_fields = list() + model_copy = deepcopy(model) + + for field_name, field_value in model_copy.items(): + try: + if isinstance(field_value, bytes): + raise PydanticSerializationError("") + else: + model_copy[field_name] = _WrapperModel(root=field_value).model_dump(mode="json") + except PydanticSerializationError: + model_copy[field_name] = pickle_serializer(field_value) + extra_fields += [field_name] + + if len(extra_fields) > 0: + model_copy[_JSON_EXTRA_FIELDS_KEYS] = extra_fields + return model_copy + + +def json_pickle_validator(model: Serializable) -> Serializable: + """ + Validator function that validates a JSON dictionary to a python dictionary. + For every object field, it checks if the field is pickle-serialized, + and if it is, validates it using pickle. + + :param model: Pydantic model object or a dictionary. + :return: model with all the fields serialized to JSON. + """ + + model_copy = deepcopy(model) + + if _JSON_EXTRA_FIELDS_KEYS in model.keys(): + for extra_key in model[_JSON_EXTRA_FIELDS_KEYS]: + extra_value = model[extra_key] + model_copy[extra_key] = pickle_validator(extra_value) + del model_copy[_JSON_EXTRA_FIELDS_KEYS] + + return model_copy + + +PickleSerializer = PlainSerializer(pickle_serializer, when_used="json") +"""Pydantic wrapper of :py:func:`~.pickle_serializer`.""" +PickleValidator = PlainValidator(pickle_validator) +"""Pydantic wrapper of :py:func:`~.pickle_validator`.""" +PickleEncodedValue = Annotated[Any, PickleSerializer, PickleValidator] +""" +Annotation for field that makes it JSON serializable via pickle: + +This field is always a normal object when inside its class but is a string encoding of the object +outside of the class -- either after serialization or before initialization. +As such this field cannot be used during initialization and the only way to use it is to bypass validation. + +.. code:: python + + class MyClass(BaseModel): + my_field: Optional[PickleEncodedValue] = None # the field must have a default value + + my_obj = MyClass() # the field cannot be set during init + my_obj.my_field = unserializable_object # can be set manually to avoid validation + +Alternatively, ``BaseModel.model_construct`` may be used to bypass validation, +though it would bypass validation of all fields. +""" + +JSONPickleSerializer = PlainSerializer(json_pickle_serializer, when_used="json") +"""Pydantic wrapper of :py:func:`~.json_pickle_serializer`.""" +JSONPickleValidator = PlainValidator(json_pickle_validator) +"""Pydantic wrapper of :py:func:`~.json_pickle_validator`.""" +JSONSerializableDict = Annotated[Serializable, JSONPickleSerializer, JSONPickleValidator] +""" +Annotation for dictionary or Pydantic model that makes all its fields JSON serializable. + +This uses a reserved dictionary key :py:data:`~._JSON_EXTRA_FIELDS_KEYS` to store +fields serialized that way. +""" + + +class JSONSerializableExtras(BaseModel, extra="allow"): + """ + This model makes extra fields pickle-serializable. + Do not use :py:data:`~._JSON_EXTRA_FIELDS_KEYS` as an extra field name. + """ + + def __init__(self, **kwargs): # supress unknown arg warnings + super().__init__(**kwargs) + + @model_validator(mode="after") + def extra_validator(self): + """ + Validate model along with the `extras` field: i.e. all the fields not listed in the model. + + :return: Validated model. + """ + self.__pydantic_extra__ = json_pickle_validator(self.__pydantic_extra__) + return self + + @model_serializer(mode="wrap", when_used="json") + def extra_serializer(self, original_serializer) -> Dict[str, Any]: + """ + Serialize model along with the `extras` field: i.e. all the fields not listed in the model. + + :param original_serializer: Function originally used for serialization by Pydantic. + :return: Serialized model. + """ + model_copy = self.model_copy(deep=True) + for extra_name in self.model_extra.keys(): + delattr(model_copy, extra_name) + model_dict = original_serializer(model_copy) + model_dict.update(json_pickle_serializer(self.model_extra)) + return model_dict diff --git a/chatsky/utils/docker/README.md b/chatsky/utils/docker/README.md new file mode 100644 index 000000000..5b4457a54 --- /dev/null +++ b/chatsky/utils/docker/README.md @@ -0,0 +1,11 @@ +# Chatsky Docker utils + +## Description + +This directory provides Docker files, necessary for deployment +of various Chatsky utilities. + +## Contents + +* dockerfile_stats - Dockerfile for Chatsky statistics dashboard. +* entrypoint_stats.sh - Entrypoint script for Chatsky statistics dashboard. \ No newline at end of file diff --git a/dff/utils/docker/dockerfile_stats b/chatsky/utils/docker/dockerfile_stats similarity index 100% rename from dff/utils/docker/dockerfile_stats rename to chatsky/utils/docker/dockerfile_stats diff --git a/dff/utils/docker/entrypoint_stats.sh b/chatsky/utils/docker/entrypoint_stats.sh similarity index 100% rename from dff/utils/docker/entrypoint_stats.sh rename to chatsky/utils/docker/entrypoint_stats.sh diff --git a/dff/utils/docker/superset_config_docker.py b/chatsky/utils/docker/superset_config_docker.py similarity index 100% rename from dff/utils/docker/superset_config_docker.py rename to chatsky/utils/docker/superset_config_docker.py diff --git a/dff/utils/otel/otelcol-config-extras.yml b/chatsky/utils/otel/otelcol-config-extras.yml similarity index 100% rename from dff/utils/otel/otelcol-config-extras.yml rename to chatsky/utils/otel/otelcol-config-extras.yml diff --git a/dff/utils/otel/otelcol-config.yml b/chatsky/utils/otel/otelcol-config.yml similarity index 100% rename from dff/utils/otel/otelcol-config.yml rename to chatsky/utils/otel/otelcol-config.yml diff --git a/dff/utils/parser/__init__.py b/chatsky/utils/parser/__init__.py similarity index 100% rename from dff/utils/parser/__init__.py rename to chatsky/utils/parser/__init__.py diff --git a/dff/utils/testing/__init__.py b/chatsky/utils/testing/__init__.py similarity index 79% rename from dff/utils/testing/__init__.py rename to chatsky/utils/testing/__init__.py index 4e1de7c35..2e13da083 100644 --- a/dff/utils/testing/__init__.py +++ b/chatsky/utils/testing/__init__.py @@ -6,6 +6,6 @@ try: import pytest - pytest.register_assert_rewrite("dff.utils.testing.telegram") + pytest.register_assert_rewrite("chatsky.utils.testing.telegram") except ImportError: ... diff --git a/dff/utils/testing/cleanup_db.py b/chatsky/utils/testing/cleanup_db.py similarity index 98% rename from dff/utils/testing/cleanup_db.py rename to chatsky/utils/testing/cleanup_db.py index 85cf9364d..fdc8f4635 100644 --- a/dff/utils/testing/cleanup_db.py +++ b/chatsky/utils/testing/cleanup_db.py @@ -4,9 +4,10 @@ This module defines functions that allow to delete data in various types of databases, including JSON, MongoDB, Pickle, Redis, Shelve, SQL, and YDB databases. """ + import os -from dff.context_storages import ( +from chatsky.context_storages import ( JSONContextStorage, MongoContextStorage, PickleContextStorage, diff --git a/dff/utils/testing/common.py b/chatsky/utils/testing/common.py similarity index 85% rename from dff/utils/testing/common.py rename to chatsky/utils/testing/common.py index 607e7cda2..6f8890ff8 100644 --- a/dff/utils/testing/common.py +++ b/chatsky/utils/testing/common.py @@ -3,13 +3,14 @@ ------ This module contains several functions which are used to run demonstrations in tutorials. """ + from os import getenv -from typing import Callable, Tuple, Optional +from typing import Callable, Tuple, Optional, Union from uuid import uuid4 -from dff.script import Context, Message -from dff.pipeline import Pipeline -from dff.utils.testing.response_comparers import default_comparer +from chatsky.script import Context, Message +from chatsky.pipeline import Pipeline +from chatsky.utils.testing.response_comparers import default_comparer def is_interactive_mode() -> bool: # pragma: no cover @@ -31,7 +32,7 @@ def is_interactive_mode() -> bool: # pragma: no cover def check_happy_path( pipeline: Pipeline, - happy_path: Tuple[Tuple[Message, Message], ...], + happy_path: Tuple[Tuple[Union[str, Message], Union[str, Message]], ...], # This optional argument is used for additional processing of candidate responses and reference responses response_comparer: Callable[[Message, Message, Context], Optional[str]] = default_comparer, printout_enable: bool = True, @@ -50,7 +51,11 @@ def check_happy_path( """ ctx_id = uuid4() # get random ID for current context - for step_id, (request, reference_response) in enumerate(happy_path): + for step_id, (request_raw, reference_response_raw) in enumerate(happy_path): + request = Message(text=request_raw) if isinstance(request_raw, str) else request_raw + reference_response = ( + Message(text=reference_response_raw) if isinstance(reference_response_raw, str) else reference_response_raw + ) ctx = pipeline(request, ctx_id) candidate_response = ctx.last_response if printout_enable: diff --git a/dff/utils/testing/response_comparers.py b/chatsky/utils/testing/response_comparers.py similarity index 93% rename from dff/utils/testing/response_comparers.py rename to chatsky/utils/testing/response_comparers.py index a8a36c1d3..dd6c9189a 100644 --- a/dff/utils/testing/response_comparers.py +++ b/chatsky/utils/testing/response_comparers.py @@ -3,9 +3,10 @@ ----------------- This module defines function used to compare two response objects. """ + from typing import Any, Optional -from dff.script import Context, Message +from chatsky.script import Context, Message def default_comparer(candidate: Message, reference: Message, _: Context) -> Optional[Any]: diff --git a/dff/utils/testing/toy_script.py b/chatsky/utils/testing/toy_script.py similarity index 62% rename from dff/utils/testing/toy_script.py rename to chatsky/utils/testing/toy_script.py index 27e7f94be..1f0c38dd4 100644 --- a/dff/utils/testing/toy_script.py +++ b/chatsky/utils/testing/toy_script.py @@ -4,31 +4,32 @@ This module contains a simple script and a dialog which are used in tutorials. """ -from dff.script.conditions import exact_match -from dff.script import TRANSITIONS, RESPONSE, Message + +from chatsky.script.conditions import exact_match +from chatsky.script import TRANSITIONS, RESPONSE, Message TOY_SCRIPT = { "greeting_flow": { "start_node": { RESPONSE: Message(), - TRANSITIONS: {"node1": exact_match(Message("Hi"))}, + TRANSITIONS: {"node1": exact_match("Hi")}, }, "node1": { RESPONSE: Message("Hi, how are you?"), - TRANSITIONS: {"node2": exact_match(Message("i'm fine, how are you?"))}, + TRANSITIONS: {"node2": exact_match("i'm fine, how are you?")}, }, "node2": { RESPONSE: Message("Good. What do you want to talk about?"), - TRANSITIONS: {"node3": exact_match(Message("Let's talk about music."))}, + TRANSITIONS: {"node3": exact_match("Let's talk about music.")}, }, "node3": { RESPONSE: Message("Sorry, I can not talk about music now."), - TRANSITIONS: {"node4": exact_match(Message("Ok, goodbye."))}, + TRANSITIONS: {"node4": exact_match("Ok, goodbye.")}, }, - "node4": {RESPONSE: Message("bye"), TRANSITIONS: {"node1": exact_match(Message("Hi"))}}, + "node4": {RESPONSE: Message("bye"), TRANSITIONS: {"node1": exact_match("Hi")}}, "fallback_node": { RESPONSE: Message("Ooops"), - TRANSITIONS: {"node1": exact_match(Message("Hi"))}, + TRANSITIONS: {"node1": exact_match("Hi")}, }, } } @@ -40,7 +41,7 @@ TOY_SCRIPT_ARGS = (TOY_SCRIPT, ("greeting_flow", "start_node"), ("greeting_flow", "fallback_node")) """ -Arguments to pass to :py:meth:`~dff.pipeline.pipeline.pipeline.Pipeline.from_script` in order to +Arguments to pass to :py:meth:`~chatsky.pipeline.pipeline.pipeline.Pipeline.from_script` in order to use :py:data:`~.TOY_SCRIPT`: .. code-block:: @@ -51,11 +52,11 @@ """ HAPPY_PATH = ( - (Message("Hi"), Message("Hi, how are you?")), - (Message("i'm fine, how are you?"), Message("Good. What do you want to talk about?")), - (Message("Let's talk about music."), Message("Sorry, I can not talk about music now.")), - (Message("Ok, goodbye."), Message("bye")), - (Message("Hi"), Message("Hi, how are you?")), + ("Hi", "Hi, how are you?"), + ("i'm fine, how are you?", "Good. What do you want to talk about?"), + ("Let's talk about music.", "Sorry, I can not talk about music now."), + ("Ok, goodbye.", "bye"), + ("Hi", "Hi, how are you?"), ) """ An example of a simple dialog. @@ -68,10 +69,10 @@ "start": { RESPONSE: Message("Hi"), TRANSITIONS: { - ("small_talk", "ask_some_questions"): exact_match(Message("hi")), - ("animals", "have_pets"): exact_match(Message("i like animals")), - ("animals", "like_animals"): exact_match(Message("let's talk about animals")), - ("news", "what_news"): exact_match(Message("let's talk about news")), + ("small_talk", "ask_some_questions"): exact_match("hi"), + ("animals", "have_pets"): exact_match("i like animals"), + ("animals", "like_animals"): exact_match("let's talk about animals"), + ("news", "what_news"): exact_match("let's talk about news"), }, }, "fallback": {RESPONSE: Message("Oops")}, @@ -79,26 +80,26 @@ "animals": { "have_pets": { RESPONSE: Message("do you have pets?"), - TRANSITIONS: {"what_animal": exact_match(Message("yes"))}, + TRANSITIONS: {"what_animal": exact_match("yes")}, }, "like_animals": { RESPONSE: Message("do you like it?"), - TRANSITIONS: {"what_animal": exact_match(Message("yes"))}, + TRANSITIONS: {"what_animal": exact_match("yes")}, }, "what_animal": { RESPONSE: Message("what animals do you have?"), TRANSITIONS: { - "ask_about_color": exact_match(Message("bird")), - "ask_about_breed": exact_match(Message("dog")), + "ask_about_color": exact_match("bird"), + "ask_about_breed": exact_match("dog"), }, }, "ask_about_color": {RESPONSE: Message("what color is it")}, "ask_about_breed": { RESPONSE: Message("what is this breed?"), TRANSITIONS: { - "ask_about_breed": exact_match(Message("pereat")), - "tell_fact_about_breed": exact_match(Message("bulldog")), - "ask_about_training": exact_match(Message("I don't know")), + "ask_about_breed": exact_match("pereat"), + "tell_fact_about_breed": exact_match("bulldog"), + "ask_about_training": exact_match("I don't know"), }, }, "tell_fact_about_breed": { @@ -110,36 +111,36 @@ "what_news": { RESPONSE: Message("what kind of news do you prefer?"), TRANSITIONS: { - "ask_about_science": exact_match(Message("science")), - "ask_about_sport": exact_match(Message("sport")), + "ask_about_science": exact_match("science"), + "ask_about_sport": exact_match("sport"), }, }, "ask_about_science": { RESPONSE: Message("i got news about science, do you want to hear?"), TRANSITIONS: { - "science_news": exact_match(Message("yes")), - ("small_talk", "ask_some_questions"): exact_match(Message("let's change the topic")), + "science_news": exact_match("yes"), + ("small_talk", "ask_some_questions"): exact_match("let's change the topic"), }, }, "science_news": { RESPONSE: Message("This is science news"), TRANSITIONS: { - "what_news": exact_match(Message("ok")), - ("small_talk", "ask_some_questions"): exact_match(Message("let's change the topic")), + "what_news": exact_match("ok"), + ("small_talk", "ask_some_questions"): exact_match("let's change the topic"), }, }, "ask_about_sport": { RESPONSE: Message("i got news about sport, do you want to hear?"), TRANSITIONS: { - "sport_news": exact_match(Message("yes")), - ("small_talk", "ask_some_questions"): exact_match(Message("let's change the topic")), + "sport_news": exact_match("yes"), + ("small_talk", "ask_some_questions"): exact_match("let's change the topic"), }, }, "sport_news": { RESPONSE: Message("This is sport news"), TRANSITIONS: { - "what_news": exact_match(Message("ok")), - ("small_talk", "ask_some_questions"): exact_match(Message("let's change the topic")), + "what_news": exact_match("ok"), + ("small_talk", "ask_some_questions"): exact_match("let's change the topic"), }, }, }, @@ -147,16 +148,16 @@ "ask_some_questions": { RESPONSE: Message("how are you"), TRANSITIONS: { - "ask_talk_about": exact_match(Message("fine")), - ("animals", "like_animals"): exact_match(Message("let's talk about animals")), - ("news", "what_news"): exact_match(Message("let's talk about news")), + "ask_talk_about": exact_match("fine"), + ("animals", "like_animals"): exact_match("let's talk about animals"), + ("news", "what_news"): exact_match("let's talk about news"), }, }, "ask_talk_about": { RESPONSE: Message("what do you want to talk about"), TRANSITIONS: { - ("animals", "like_animals"): exact_match(Message("dog")), - ("news", "what_news"): exact_match(Message("let's talk about news")), + ("animals", "like_animals"): exact_match("dog"), + ("news", "what_news"): exact_match("let's talk about news"), }, }, }, diff --git a/dff/utils/turn_caching/__init__.py b/chatsky/utils/turn_caching/__init__.py similarity index 100% rename from dff/utils/turn_caching/__init__.py rename to chatsky/utils/turn_caching/__init__.py diff --git a/dff/utils/turn_caching/singleton_turn_caching.py b/chatsky/utils/turn_caching/singleton_turn_caching.py similarity index 99% rename from dff/utils/turn_caching/singleton_turn_caching.py rename to chatsky/utils/turn_caching/singleton_turn_caching.py index 450dacf5d..06ae53ff0 100644 --- a/dff/utils/turn_caching/singleton_turn_caching.py +++ b/chatsky/utils/turn_caching/singleton_turn_caching.py @@ -3,6 +3,7 @@ ---------------------- This module contains functions for caching function results on each dialog turn. """ + import functools from typing import Callable, List, Optional diff --git a/dff/utils/viewer/__init__.py b/chatsky/utils/viewer/__init__.py similarity index 100% rename from dff/utils/viewer/__init__.py rename to chatsky/utils/viewer/__init__.py diff --git a/compose.yml b/compose.yml index a753b946b..dd3f17c53 100644 --- a/compose.yml +++ b/compose.yml @@ -91,10 +91,22 @@ services: retries: 5 start_period: 30s + ner: + image: deeppavlov/deeppavlov:latest + profiles: + - extras + environment: + - CONFIG=ner_conll2003_bert + restart: unless-stopped + ports: + - 5000:5000 + volumes: + - ~/.deeppavlov:/root/.deeppavlov/ + - ~/.cache:/root/.cache/ dashboard: env_file: [.env_file] build: - context: ./dff/utils/docker + context: ./chatsky/utils/docker dockerfile: dockerfile_stats image: ghcr.io/deeppavlov/superset_df_dashboard:latest depends_on: @@ -147,7 +159,7 @@ services: start_period: 30s otelcol: - image: otel/opentelemetry-collector-contrib:latest + image: otel/opentelemetry-collector-contrib:0.103.1 profiles: - stats container_name: otel-col @@ -157,8 +169,8 @@ services: clickhouse: condition: service_healthy volumes: - - ./dff/utils/otel/otelcol-config.yml:/etc/otelcol-config.yml:ro - - ./dff/utils/otel/otelcol-config-extras.yml:/etc/otelcol-config-extras.yml:ro + - ./chatsky/utils/otel/otelcol-config.yml:/etc/otelcol-config.yml:ro + - ./chatsky/utils/otel/otelcol-config-extras.yml:/etc/otelcol-config-extras.yml:ro ports: - "4317:4317" # OTLP over gRPC receiver - "4318:4318" # OTLP over HTTP receiver diff --git a/dff/cnd.py b/dff/cnd.py deleted file mode 100644 index c6cbbd966..000000000 --- a/dff/cnd.py +++ /dev/null @@ -1 +0,0 @@ -from dff.script.conditions import * diff --git a/dff/lbl.py b/dff/lbl.py deleted file mode 100644 index e1ac05d88..000000000 --- a/dff/lbl.py +++ /dev/null @@ -1 +0,0 @@ -from dff.script.labels import * diff --git a/dff/messengers/common/__init__.py b/dff/messengers/common/__init__.py deleted file mode 100644 index d9c66d921..000000000 --- a/dff/messengers/common/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# -*- coding: utf-8 -*- - -from .interface import MessengerInterface, PollingMessengerInterface, CallbackMessengerInterface, CLIMessengerInterface -from .types import PollingInterfaceLoopFunction diff --git a/dff/messengers/telegram/__init__.py b/dff/messengers/telegram/__init__.py deleted file mode 100644 index cb7e38305..000000000 --- a/dff/messengers/telegram/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -try: - import telebot -except ImportError: - raise ImportError("telebot is not installed. Run `pip install dff[telegram]`") - -from .messenger import TelegramMessenger -from .interface import PollingTelegramInterface, CallbackTelegramInterface -from .message import TelegramUI, TelegramMessage, RemoveKeyboard, ParseMode -from .messenger import ( - UpdateType, - telegram_condition, -) diff --git a/dff/messengers/telegram/interface.py b/dff/messengers/telegram/interface.py deleted file mode 100644 index ba482f01b..000000000 --- a/dff/messengers/telegram/interface.py +++ /dev/null @@ -1,222 +0,0 @@ -""" -Interface ------------- -This module implements various interfaces for :py:class:`~dff.messengers.telegram.messenger.TelegramMessenger` -that can be used to interact with the Telegram API. -""" -import asyncio -from typing import Any, Optional, List, Tuple, Callable - -from telebot import types, apihelper - -from dff.messengers.common import MessengerInterface, CallbackMessengerInterface -from dff.pipeline.types import PipelineRunnerFunction -from .messenger import TelegramMessenger -from .message import TelegramMessage - -try: - from flask import Flask, request, abort - - flask_imported = True -except ImportError: - flask_imported = False - Flask = Any - request, abort = None, None - - -apihelper.ENABLE_MIDDLEWARE = True - - -def extract_telegram_request_and_id( - update: types.Update, messenger: Optional[TelegramMessenger] = None -) -> Tuple[TelegramMessage, int]: # pragma: no cover - """ - Utility function that extracts parameters from a telegram update. - Changes the messenger state, setting the last update id. - - Returned message has the following fields: - - - | `update_id` -- this field stores `update.update_id`, - - | `update` -- this field stores the first non-empty field of `update`, - - | `update_type` -- this field stores the name of the first non-empty field of `update`, - - | `text` -- this field stores `update.message.text`, - - | `callback_query` -- this field stores `update.callback_query.data`. - - Also return context id which is `chat`, `from_user` or `user` of the update. - - :param update: Update to process. - :param messenger: - Messenger instance. If passed updates `last_update_id`. - Defaults to None. - """ - if messenger is not None: - if update.update_id > messenger.last_update_id: - messenger.last_update_id = update.update_id - - message = TelegramMessage(update_id=update.update_id) - ctx_id = None - - for update_field, update_value in vars(update).items(): - if update_field != "update_id" and update_value is not None: - if message.update is not None: - raise RuntimeError(f"Two update fields. First: {message.update_type}; second: {update_field}") - message.update_type = update_field - message.update = update_value - if isinstance(update_value, types.Message): - message.text = update_value.text - - if isinstance(update_value, types.CallbackQuery): - data = update_value.data - if data is not None: - message.callback_query = data - - dict_update = vars(update_value) - # if 'chat' is not available, fall back to 'from_user', then to 'user' - user = dict_update.get("chat", dict_update.get("from_user", dict_update.get("user"))) - ctx_id = getattr(user, "id", None) - if message.update is None: - raise RuntimeError(f"No update fields found: {update}") - - return message, ctx_id - - -class PollingTelegramInterface(MessengerInterface): # pragma: no cover - """ - Telegram interface that retrieves updates by polling. - Multi-threaded polling is currently not supported. - - :param token: Bot token - :param messenger: - :py:class:`~dff.messengers.telegram.messenger.TelegramMessenger` instance. - If not `None` will be used instead of creating messenger from token. - Token value does not matter in that case. - Defaults to None. - :param interval: - Polling interval. See `link `__. - Defaults to 2. - :param allowed_updates: - Processed updates. See `link `__. - Defaults to None. - :param timeout: - General timeout. See `link `__. - Defaults to 20. - :param long_polling_timeout: - Polling timeout. See `link `__. - Defaults to 20. - """ - - def __init__( - self, - token: str, - interval: int = 2, - allowed_updates: Optional[List[str]] = None, - timeout: int = 20, - long_polling_timeout: int = 20, - messenger: Optional[TelegramMessenger] = None, - ): - self.messenger = ( - messenger if messenger is not None else TelegramMessenger(token, suppress_middleware_excepions=True) - ) - self.allowed_updates = allowed_updates - self.interval = interval - self.timeout = timeout - self.long_polling_timeout = long_polling_timeout - - async def connect(self, callback: PipelineRunnerFunction, loop: Optional[Callable] = None, *args, **kwargs): - def dff_middleware(bot_instance, update): - message, ctx_id = extract_telegram_request_and_id(update, self.messenger) - - ctx = asyncio.run(callback(message, ctx_id)) - - bot_instance.send_response(ctx_id, ctx.last_response) - - self.messenger.middleware_handler()(dff_middleware) - - self.messenger.infinity_polling( - timeout=self.timeout, long_polling_timeout=self.long_polling_timeout, interval=self.interval - ) - - -class CallbackTelegramInterface(CallbackMessengerInterface): # pragma: no cover - """ - Asynchronous Telegram interface that retrieves updates via webhook. - Any Flask server can be passed to set up a webhook on a separate endpoint. - - :param token: Bot token - :param messenger: - :py:class:`~dff.messengers.telegram.messenger.TelegramMessenger` instance. - If not `None` will be used instead of creating messenger from token. - Token value does not matter in that case. - Defaults to None. - :param app: - Flask instance. - Defaults to `Flask(__name__)`. - :param endpoint: - Webhook endpoint. Should be prefixed with "/". - Defaults to "/telegram-webhook". - :param host: - Host IP. - Defaults to "localhost". - :param port: - Port of the app. - Defaults to 8443. - :param debug: - Run the Flask app in debug mode. - :param load_dotenv: - Whether or not the .env file in the project folder - should be used to set environment variables. - :param full_uri: - Full public IP of your webhook that is accessible by https. - Defaults to `"https://{host}:{port}{endpoint}"`. - :param wsgi_options: - Keyword arguments to forward to `Flask.run` method. - Use these to set `ssl_context` and other WSGI options. - """ - - def __init__( - self, - token: str, - app: Optional[Flask] = None, - host: str = "localhost", - port: int = 8443, - debug: Optional[bool] = None, - load_dotenv: bool = True, - endpoint: str = "/telegram-webhook", - full_uri: Optional[str] = None, - messenger: Optional[TelegramMessenger] = None, - **wsgi_options, - ): - if not flask_imported: - raise ModuleNotFoundError("Flask is not installed. Install it with `pip install flask`.") - - self.messenger = messenger if messenger is not None else TelegramMessenger(token) - self.app = app if app else Flask(__name__) - self.host = host - self.port = port - self.debug = debug - self.load_dotenv = load_dotenv - self.wsgi_options = wsgi_options - self.endpoint = endpoint - self.full_uri = full_uri if full_uri is not None else "".join([f"https://{host}:{port}", endpoint]) - - async def endpoint(): - if not request.headers.get("content-type") == "application/json": - abort(403) - - json_string = request.get_data().decode("utf-8") - update = types.Update.de_json(json_string) - resp = await self.on_request_async(*extract_telegram_request_and_id(update, self.messenger)) - self.messenger.send_response(resp.id, resp.last_response) - return "" - - self.app.route(self.endpoint, methods=["POST"])(endpoint) - - async def connect(self, callback: PipelineRunnerFunction): - await super().connect(callback) - - self.messenger.remove_webhook() - self.messenger.set_webhook(self.full_uri) - - self.app.run( - host=self.host, port=self.port, load_dotenv=self.load_dotenv, debug=self.debug, **self.wsgi_options - ) diff --git a/dff/messengers/telegram/message.py b/dff/messengers/telegram/message.py deleted file mode 100644 index bc47c4f21..000000000 --- a/dff/messengers/telegram/message.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Telegram Message ----------------- -This module implements inherited classes :py:mod:`dff.script.core.message` modified for usage with Telegram. -""" -from typing import Optional, Union -from enum import Enum - -from telebot.types import ( - ReplyKeyboardRemove, - ReplyKeyboardMarkup, - InlineKeyboardMarkup, - Message as tlMessage, - InlineQuery, - ChosenInlineResult, - CallbackQuery as tlCallbackQuery, - ShippingQuery, - PreCheckoutQuery, - Poll, - PollAnswer, - ChatMemberUpdated, - ChatJoinRequest, -) - -from dff.script.core.message import Message, Location, Keyboard, DataModel -from pydantic import model_validator - - -class TelegramUI(Keyboard): - is_inline: bool = True - """ - Whether to use `inline keyboard `__ or - a `keyboard `__. - """ - row_width: int = 3 - """Limits the maximum number of buttons in a row.""" - - @model_validator(mode="after") - def validate_buttons(self, _): - if not self.is_inline: - for button in self.buttons: - if button.payload is not None or button.source is not None: - raise AssertionError(f"`payload` and `source` are only used for inline keyboards: {button}") - return self - - -class _ClickButton(DataModel): - """This class is only used in telegram tests (to click buttons as a client).""" - - button_index: int - - -class RemoveKeyboard(DataModel): - """Pass an instance of this class to :py:attr:`~.TelegramMessage.ui` to remove current keyboard.""" - - ... - - -class ParseMode(Enum): - """ - Parse mode of the message. - More info: https://core.telegram.org/bots/api#formatting-options. - """ - - HTML = "HTML" - MARKDOWN = "MarkdownV2" - - -class TelegramMessage(Message): - ui: Optional[ - Union[TelegramUI, RemoveKeyboard, ReplyKeyboardRemove, ReplyKeyboardMarkup, InlineKeyboardMarkup] - ] = None - location: Optional[Location] = None - callback_query: Optional[Union[str, _ClickButton]] = None - update: Optional[ - Union[ - tlMessage, - InlineQuery, - ChosenInlineResult, - tlCallbackQuery, - ShippingQuery, - PreCheckoutQuery, - Poll, - PollAnswer, - ChatMemberUpdated, - ChatJoinRequest, - ] - ] = None - """This field stores an update representing this message.""" - update_id: Optional[int] = None - update_type: Optional[str] = None - """Name of the field that stores an update representing this message.""" - parse_mode: Optional[ParseMode] = None - """Parse mode of the message.""" - - def __eq__(self, other): - if isinstance(other, Message): - for field in self.model_fields: - if field not in ("parse_mode", "update_id", "update", "update_type"): - if field not in other.model_fields: - return False - if self.__getattribute__(field) != other.__getattribute__(field): - return False - return True - return NotImplemented diff --git a/dff/messengers/telegram/messenger.py b/dff/messengers/telegram/messenger.py deleted file mode 100644 index 07919c3d0..000000000 --- a/dff/messengers/telegram/messenger.py +++ /dev/null @@ -1,247 +0,0 @@ -""" -Messenger ------------------ -The Messenger module provides the :py:class:`~dff.messengers.telegram.messenger.TelegramMessenger` class. -The former inherits from the :py:class:`~TeleBot` class from the `pytelegrambotapi` library. -Using it, you can put Telegram update handlers inside your script and condition your transitions accordingly. - -""" -from pathlib import Path -from typing import Union, List, Optional, Callable -from enum import Enum - -from telebot import types, TeleBot - -from dff.script import Context -from dff.pipeline import Pipeline - -from .utils import batch_open_io -from .message import TelegramMessage, TelegramUI, RemoveKeyboard - -from dff.script import Message -from dff.script.core.message import Audio, Video, Image, Document - - -class TelegramMessenger(TeleBot): # pragma: no cover - """ - This class inherits from `Telebot` and implements framework-specific functionality - like sending generic responses. - - :param token: A Telegram API bot token. - :param kwargs: Arbitrary parameters that match the signature of the `Telebot` class. - For reference see: `link `_ . - - """ - - def __init__( - self, - token: str, - **kwargs, - ): - super().__init__(token, threaded=False, **kwargs) - - def send_response(self, chat_id: Union[str, int], response: Union[str, dict, Message]) -> None: - """ - Cast `response` to :py:class:`~dff.messengers.telegram.types.TelegramMessage` and send it. - Message fields are sent in separate messages in the following order: - - 1. Attachments - 2. Location - 3. Text with keyboard - - :param chat_id: Telegram chat ID. - :param response: Response data. String, dictionary or :py:class:`~dff.script.responses.generics.Response`. - will be cast to :py:class:`~dff.messengers.telegram.types.TelegramMessage`. - """ - if isinstance(response, TelegramMessage): - ready_response = response - elif isinstance(response, str): - ready_response = TelegramMessage(text=response) - elif isinstance(response, Message): - ready_response = TelegramMessage.model_validate(response.model_dump()) - elif isinstance(response, dict): - ready_response = TelegramMessage.model_validate(response) - else: - raise TypeError( - "Type of the response argument should be one of the following:" - " `str`, `dict`, `Message`, or `TelegramMessage`." - ) - parse_mode = ready_response.parse_mode.value if ready_response.parse_mode is not None else None - if ready_response.attachments is not None: - if len(ready_response.attachments.files) == 1: - attachment = ready_response.attachments.files[0] - if isinstance(attachment, Audio): - method = self.send_audio - elif isinstance(attachment, Document): - method = self.send_document - elif isinstance(attachment, Video): - method = self.send_video - elif isinstance(attachment, Image): - method = self.send_photo - else: - raise TypeError(type(attachment)) - params = {"caption": attachment.title, "parse_mode": parse_mode} - if isinstance(attachment.source, Path): - with open(attachment.source, "rb") as file: - method(chat_id, file, **params) - else: - method(chat_id, str(attachment.source or attachment.id), **params) - else: - - def cast(file): - if isinstance(file, Image): - cast_to_media_type = types.InputMediaPhoto - elif isinstance(file, Audio): - cast_to_media_type = types.InputMediaAudio - elif isinstance(file, Document): - cast_to_media_type = types.InputMediaDocument - elif isinstance(file, Video): - cast_to_media_type = types.InputMediaVideo - else: - raise TypeError(type(file)) - return cast_to_media_type(media=str(file.source or file.id), caption=file.title) - - files = map(cast, ready_response.attachments.files) - with batch_open_io(files) as media: - self.send_media_group(chat_id=chat_id, media=media) - - if ready_response.location: - self.send_location( - chat_id=chat_id, - latitude=ready_response.location.latitude, - longitude=ready_response.location.longitude, - ) - - if ready_response.ui is not None: - if isinstance(ready_response.ui, RemoveKeyboard): - keyboard = types.ReplyKeyboardRemove() - elif isinstance(ready_response.ui, TelegramUI): - if ready_response.ui.is_inline: - keyboard = types.InlineKeyboardMarkup(row_width=ready_response.ui.row_width) - buttons = [ - types.InlineKeyboardButton( - text=item.text, - url=item.source, - callback_data=item.payload, - ) - for item in ready_response.ui.buttons - ] - else: - keyboard = types.ReplyKeyboardMarkup(row_width=ready_response.ui.row_width) - buttons = [ - types.KeyboardButton( - text=item.text, - ) - for item in ready_response.ui.buttons - ] - keyboard.add(*buttons, row_width=ready_response.ui.row_width) - else: - keyboard = ready_response.ui - else: - keyboard = None - - if ready_response.text is not None: - self.send_message( - chat_id=chat_id, - text=ready_response.text, - reply_markup=keyboard, - parse_mode=parse_mode, - ) - elif keyboard is not None: - self.send_message( - chat_id=chat_id, - text="", - reply_markup=keyboard, - parse_mode=parse_mode, - ) - - -_default_messenger = TeleBot("") - - -class UpdateType(Enum): - """ - Represents a type of the telegram update - (which field contains an update in :py:class:`telebot.types.Update`). - See `link `__. - """ - - ALL = "ALL" - MESSAGE = "message" - EDITED_MESSAGE = "edited_message" - CHANNEL_POST = "channel_post" - EDITED_CHANNEL_POST = "edited_channel_post" - INLINE_QUERY = "inline_query" - CHOSEN_INLINE_RESULT = "chosen_inline_result" - CALLBACK_QUERY = "callback_query" - SHIPPING_QUERY = "shipping_query" - PRE_CHECKOUT_QUERY = "pre_checkout_query" - POLL = "poll" - POLL_ANSWER = "poll_answer" - MY_CHAT_MEMBER = "my_chat_member" - CHAT_MEMBER = "chat_member" - CHAT_JOIN_REQUEST = "chat_join_request" - - -def telegram_condition( - messenger: TeleBot = _default_messenger, - update_type: UpdateType = UpdateType.MESSAGE, - commands: Optional[List[str]] = None, - regexp: Optional[str] = None, - func: Optional[Callable] = None, - content_types: Optional[List[str]] = None, - chat_types: Optional[List[str]] = None, - **kwargs, -): - """ - A condition triggered by updates that match the given parameters. - - :param messenger: - Messenger to test filters on. Used only for :py:attr:`Telebot.custom_filters`. - Defaults to :py:data:`._default_messenger`. - :param update_type: - If set to any `UpdateType` other than `UpdateType.ALL` - it will check that an update is of the same type. - Defaults to `UpdateType.Message`. - :param commands: - Telegram command trigger. - See `link `__. - :param regexp: - Regex trigger. - See `link `__. - :param func: - Callable trigger. - See `link `__. - :param content_types: - Content type trigger. - See `link `__. - :param chat_types: - Chat type trigger. - See `link `__. - """ - - update_handler = messenger._build_handler_dict( - None, - False, - commands=commands, - regexp=regexp, - func=func, - content_types=content_types, - chat_types=chat_types, - **kwargs, - ) - - def condition(ctx: Context, _: Pipeline, *__, **___): # pragma: no cover - last_request = ctx.last_request - if last_request is None: - return False - update = getattr(last_request, "update", None) - request_update_type = getattr(last_request, "update_type", None) - if update is None: - return False - if update_type != UpdateType.ALL and request_update_type != update_type.value: - return False - test_result = messenger._test_message_handler(update_handler, update) - return test_result - - return condition diff --git a/dff/messengers/telegram/utils.py b/dff/messengers/telegram/utils.py deleted file mode 100644 index f21dc0016..000000000 --- a/dff/messengers/telegram/utils.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -Utils ------- -This module contains utilities for connecting to Telegram. -""" -from typing import Union, Iterable -from contextlib import contextmanager -from pathlib import Path -from io import IOBase - -from telebot import types - - -def open_io(item: types.InputMedia): - """ - Returns `InputMedia` with an opened file descriptor instead of path. - - :param item: InputMedia object. - """ - if isinstance(item.media, Path): - item.media = item.media.open(mode="rb") - return item - - -def close_io(item: types.InputMedia): - """ - Closes an IO in an `InputMedia` object to perform the cleanup. - - :param item: InputMedia object. - """ - if isinstance(item.media, IOBase): - item.media.close() - - -@contextmanager -def batch_open_io(item: Union[types.InputMedia, Iterable[types.InputMedia]]): - """ - Context manager that controls the state of file descriptors inside `InputMedia`. - Can be used both for single objects and collections. - - :param item: InputMedia objects that contain file descriptors. - """ - if isinstance(item, Iterable): - resources = list(map(open_io, item)) - else: - resources = open_io(item) - try: - yield resources - finally: - if isinstance(resources, Iterable): - for resource in resources: - close_io(resource) - else: - close_io(resources) diff --git a/dff/msg.py b/dff/msg.py deleted file mode 100644 index ee953a136..000000000 --- a/dff/msg.py +++ /dev/null @@ -1,14 +0,0 @@ -from dff.script.core.message import ( - Location, - Attachment, - Audio, - Video, - Image, - Document, - Attachments, - Link, - Button, - Keyboard, - Message, - MultiMessage -) diff --git a/dff/rsp.py b/dff/rsp.py deleted file mode 100644 index 04137084b..000000000 --- a/dff/rsp.py +++ /dev/null @@ -1 +0,0 @@ -from dff.script.responses import * diff --git a/dff/script/core/message.py b/dff/script/core/message.py deleted file mode 100644 index 1d3f859d2..000000000 --- a/dff/script/core/message.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Message -------- -The :py:class:`.Message` class is a universal data model for representing a message that should be supported by -DFF. It only contains types and properties that are compatible with most messaging services. -""" -from typing import Any, Optional, List, Union -from enum import Enum, auto -from pathlib import Path -from urllib.request import urlopen - -from pydantic import field_validator, Field, FilePath, HttpUrl, BaseModel, model_validator - - -class Session(Enum): - """ - An enumeration that defines two possible states of a session. - """ - - ACTIVE = auto() - FINISHED = auto() - - -class DataModel(BaseModel, extra="allow", arbitrary_types_allowed=True): - """ - This class is a Pydantic BaseModel that serves as a base class for all DFF models. - """ - - ... - - -class Command(DataModel): - """ - This class is a subclass of DataModel and represents - a command that can be executed in response to a user input. - """ - - ... - - -class Location(DataModel): - """ - This class is a data model that represents a geographical - location on the Earth's surface. - It has two attributes, longitude and latitude, both of which are float values. - If the absolute difference between the latitude and longitude values of the two - locations is less than 0.00004, they are considered equal. - """ - - longitude: float - latitude: float - - def __eq__(self, other): - if isinstance(other, Location): - return abs(self.latitude - other.latitude) + abs(self.longitude - other.longitude) < 0.00004 - return NotImplemented - - -class Attachment(DataModel): - """ - This class represents an attachment that can be either - a file or a URL, along with an optional ID and title. - """ - - source: Optional[Union[HttpUrl, FilePath]] = None - id: Optional[str] = None # id field is made separate to simplify type validation - title: Optional[str] = None - - def get_bytes(self) -> Optional[bytes]: - if self.source is None: - return None - if isinstance(self.source, Path): - with open(self.source, "rb") as file: - return file.read() - else: - with urlopen(self.source.unicode_string()) as file: - return file.read() - - def __eq__(self, other): - if isinstance(other, Attachment): - if self.title != other.title: - return False - if self.id != other.id: - return False - return self.get_bytes() == other.get_bytes() - return NotImplemented - - @model_validator(mode="before") - @classmethod - def validate_source_or_id(cls, values: dict): - if not isinstance(values, dict): - raise AssertionError(f"Invalid constructor parameters: {str(values)}") - if bool(values.get("source")) == bool(values.get("id")): - raise AssertionError("Attachment type requires exactly one parameter, `source` or `id`, to be set.") - return values - - @field_validator("source", mode="before") - @classmethod - def validate_source(cls, value): - if isinstance(value, Path): - return Path(value) - return value - - -class Audio(Attachment): - """Represents an audio file attachment.""" - - pass - - -class Video(Attachment): - """Represents a video file attachment.""" - - pass - - -class Image(Attachment): - """Represents an image file attachment.""" - - pass - - -class Document(Attachment): - """Represents a document file attachment.""" - - pass - - -class Attachments(DataModel): - """This class is a data model that represents a list of attachments.""" - - files: List[Attachment] = Field(default_factory=list) - - def __eq__(self, other): - if isinstance(other, Attachments): - return self.files == other.files - return NotImplemented - - -class Link(DataModel): - """This class is a DataModel representing a hyperlink.""" - - source: HttpUrl - title: Optional[str] = None - - @property - def html(self): - return f'{self.title if self.title else self.source}' - - -class Button(DataModel): - """ - This class allows for the creation of a button object - with a source URL, a text description, and a payload. - """ - - source: Optional[HttpUrl] = None - text: str - payload: Optional[Any] = None - - def __eq__(self, other): - if isinstance(other, Button): - if self.source != other.source: - return False - if self.text != other.text: - return False - first_payload = bytes(self.payload, encoding="utf-8") if isinstance(self.payload, str) else self.payload - second_payload = bytes(other.payload, encoding="utf-8") if isinstance(other.payload, str) else other.payload - return first_payload == second_payload - return NotImplemented - - -class Keyboard(DataModel): - """ - This class is a DataModel that represents a keyboard object - that can be used for a chatbot or messaging application. - """ - - buttons: List[Button] = Field(default_factory=list, min_length=1) - - def __eq__(self, other): - if isinstance(other, Keyboard): - return self.buttons == other.buttons - return NotImplemented - - -class Message(DataModel): - """ - Class representing a message and contains several - class level variables to store message information. - """ - - text: Optional[str] = None - commands: Optional[List[Command]] = None - attachments: Optional[List[DataModel]] = None - annotations: Optional[dict] = None - misc: Optional[dict] = None - # commands and state options are required for integration with services - # that use an intermediate backend server, like Yandex's Alice - # state: Optional[Session] = Session.ACTIVE - # ui: Optional[Union[Keyboard, DataModel]] = None - - def __init__( - self, - text: Optional[str] = None, - commands: Optional[List[Command]] = None, - attachments: Optional[List[DataModel]] = None, - annotations: Optional[dict] = None, - misc: Optional[dict] = None, - ): - super().__init__( - text=text, commands=commands, attachments=attachments, annotations=annotations, misc=misc - ) - - def __eq__(self, other): - if isinstance(other, Message): - for field in self.model_fields: - if field not in other.model_fields: - return False - if self.__getattribute__(field) != other.__getattribute__(field): - return False - return True - return NotImplemented - - def __repr__(self) -> str: - return " ".join([f"{key}='{value}'" for key, value in self.model_dump(exclude_none=True).items()]) - - -class MultiMessage(Message): - """This class represents a message that contains multiple sub-messages.""" - - messages: Optional[List[Message]] = None diff --git a/dff/script/core/script.py b/dff/script/core/script.py deleted file mode 100644 index ccf326058..000000000 --- a/dff/script/core/script.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Script ------- -The Script module provides a set of `pydantic` models for representing the dialog graph. -These models are used to define the conversation flow, and to determine the appropriate response based on -the user's input and the current state of the conversation. -""" -# %% -from __future__ import annotations -import logging -from typing import Callable, Optional, Any, Dict, Union, TYPE_CHECKING - -from pydantic import BaseModel, field_validator, validate_call, Field - -from .types import LabelType, NodeLabelType, ConditionType, NodeLabel3Type -from .message import Message -from .keywords import Keywords -from .normalization import normalize_condition, normalize_label - -if TYPE_CHECKING: - from dff.script.core.context import Context - from dff.pipeline.pipeline.pipeline import Pipeline - -logger = logging.getLogger(__name__) - - -class Node(BaseModel, extra="forbid", validate_assignment=True): - """ - The class for the `Node` object. - """ - - transitions: Dict[NodeLabelType, ConditionType] = Field(default_factory=dict, alias="TRANSITIONS") - response: Union[Message, Callable[[Context, Pipeline], Message]] = Field(default_factory=Message, alias="RESPONSE") - pre_transitions_processing: Dict[Any, Callable] = Field(default_factory=dict, alias="PRE_TRANSITIONS_PROCESSING") - pre_response_processing: Dict[Any, Callable] = Field(default_factory=dict, alias="PRE_RESPONSE_PROCESSING") - misc: dict = Field(default_factory=dict, alias="MISC") - - @field_validator("transitions", mode="before") - @classmethod - @validate_call - def normalize_transitions( - cls, transitions: Dict[NodeLabelType, ConditionType] - ) -> Dict[Union[Callable, NodeLabel3Type], Callable]: - """ - The function which is used to normalize transitions and returns normalized dict. - - :param transitions: Transitions to normalize. - :return: Transitions with normalized label and condition. - """ - transitions = { - normalize_label(label): normalize_condition(condition) for label, condition in transitions.items() - } - return transitions - - -class Script(BaseModel, extra="forbid"): - """ - The class for the `Script` object. - """ - - script: Dict[LabelType, Dict[LabelType, Node]] - - @field_validator("script", mode="before") - @classmethod - @validate_call - def normalize_script(cls, script: Dict[LabelType, Any]) -> Dict[LabelType, Dict[LabelType, Dict[str, Any]]]: - """ - This function normalizes :py:class:`.Script`: it returns dict where the GLOBAL node is moved - into the flow with the GLOBAL name. The function returns the structure - - `{GLOBAL: {...NODE...}, ...}` -> `{GLOBAL: {GLOBAL: {...NODE...}}, ...}`. - - :param script: :py:class:`.Script` that describes the dialog scenario. - :return: Normalized :py:class:`.Script`. - """ - if isinstance(script, dict): - if Keywords.GLOBAL in script and all( - [isinstance(item, Keywords) for item in script[Keywords.GLOBAL].keys()] - ): - script[Keywords.GLOBAL] = {Keywords.GLOBAL: script[Keywords.GLOBAL]} - return script - - def __getitem__(self, key): - return self.script[key] - - def get(self, key, value=None): - return self.script.get(key, value) - - def keys(self): - return self.script.keys() - - def items(self): - return self.script.items() - - def values(self): - return self.script.values() - - def __iter__(self): - return self.script.__iter__() diff --git a/dff/utils/db_benchmark/__init__.py b/dff/utils/db_benchmark/__init__.py deleted file mode 100644 index 6d02f7a8d..000000000 --- a/dff/utils/db_benchmark/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- -from dff.utils.db_benchmark.benchmark import ( - time_context_read_write, - DBFactory, - BenchmarkConfig, - BenchmarkCase, - save_results_to_file, - benchmark_all, -) -from dff.utils.db_benchmark.report import report -from dff.utils.db_benchmark.basic_config import BasicBenchmarkConfig, basic_configurations diff --git a/dff/utils/docker/README.md b/dff/utils/docker/README.md deleted file mode 100644 index 6caf4490f..000000000 --- a/dff/utils/docker/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# DFF Docker utils - -## Description - -This directory provides Docker files, necessary for deployment -of various DFF utilities. - -## Contents - -* dockerfile_stats - Dockerfile for DFF statistics dashboard. -* entrypoint_stats.sh - Entrypoint script for DFF statistics dashboard. \ No newline at end of file diff --git a/dff/utils/testing/telegram.py b/dff/utils/testing/telegram.py deleted file mode 100644 index d06ba3bb6..000000000 --- a/dff/utils/testing/telegram.py +++ /dev/null @@ -1,278 +0,0 @@ -""" -Telegram testing utils ----------------------- -This module defines functions used to test Telegram interface. -""" -from typing import List, Optional, cast, Tuple -from contextlib import asynccontextmanager, nullcontext -import logging -import asyncio -from tempfile import TemporaryDirectory -from pathlib import Path -from copy import deepcopy - -from telethon.tl.types import ReplyKeyboardHide -from telethon import TelegramClient -from telethon.types import User -from telethon.custom import Message as TlMessage -from telebot import types - -from dff.pipeline.pipeline.pipeline import Pipeline -from dff.script.core.message import Message, Attachments, Attachment, Button, Location -from dff.messengers.telegram.interface import PollingTelegramInterface -from dff.messengers.telegram.message import TelegramMessage, TelegramUI, RemoveKeyboard, _ClickButton - - -def replace_click_button(happy_path): - """ - Replace all _ClickButton instances in `happy_path`. - This allows using :py:func:`~dff.utils.testing.common.check_happy_path` instead of - :py:meth:~dff.utils.testing.telegram.TelegramTesting.check_happy_path`. - - :return: A `happy_path` with all `_ClickButton` replaced with payload values of the buttons. - """ - result = deepcopy(happy_path) - for index in range(len(happy_path)): - user_request = happy_path[index][0] - if not isinstance(user_request, TelegramMessage): - continue - if isinstance(user_request.callback_query, _ClickButton): - callback_query = None - for _, bot_response in reversed(happy_path[:index]): - if isinstance(bot_response, TelegramMessage) and bot_response.ui is not None and callback_query is None: - callback_query = bot_response.ui.buttons[user_request.callback_query.button_index].payload - if callback_query is None: - raise RuntimeError("Bot response with buttons not found.") - result[index][0].callback_query = callback_query - return result - - -async def get_bot_user(client: TelegramClient, username: str): - async with client: - return await client.get_entity(username) - - -class TelegramTesting: # pragma: no cover - """ - Defines functions for testing. - - :param pipeline: - Pipeline with the telegram messenger interface. - Required for :py:meth:`~dff.utils.testing.telegram.TelegramTesting.send_and_check` and - :py:meth:`~dff.utils.testing.telegram.TelegramTesting.check_happy_path` with `run_bot=True` - :param api_credentials: - Telegram API id and hash. - Obtainable via https://core.telegram.org/api/obtaining_api_id. - :param session_file: - A `telethon` session file. - Obtainable by connecting to :py:class:`telethon.TelegramClient` and entering phone number and code. - :param client: - An alternative to passing `api_credentials` and `session_file`. - :param bot_username: - Either a link to the bot user or its handle. Used to determine whom to talk with as a client. - :param bot: - An alternative to passing `bot_username`. - Result of calling :py:func:`~dff.utils.testing.telegram.get_bot_user` with `bot_username` as parameter. - """ - - def __init__( - self, - pipeline: Pipeline, - api_credentials: Optional[Tuple[int, str]] = None, - session_file: Optional[str] = None, - client: Optional[TelegramClient] = None, - bot_username: Optional[str] = None, - bot: Optional[User] = None, - ): - if client is None: - if api_credentials is None or session_file is None: - raise RuntimeError("Pass either `client` or `api_credentials` and `session_file`.") - client = TelegramClient(session_file, *api_credentials) - self.client = client - """Telegram client (not bot). Needed to verify bot replies.""" - self.pipeline = pipeline - if bot is None: - if bot_username is None: - raise RuntimeError("Pass either `bot_username` or `bot`.") - bot = asyncio.run(get_bot_user(self.client, bot_username)) - self.bot = bot - """Bot user (to know whom to send messages to from client).""" - - async def send_message(self, message: TelegramMessage, last_bot_messages: List[TlMessage]): - """ - Send a message from client to bot. - If the message contains `callback_query`, only press the button, ignore other fields. - - :param message: Message to send. - :param last_bot_messages: - The last bot response. Accepts a list because messages with multiple fields are split in telegram. - Can only contain one keyboard in the list. - Used to determine which button to press when message contains - :py:class:`~dff.messengers.telegram.message._ClickButton`. - """ - if message.callback_query is not None: - query = message.callback_query - if not isinstance(query, _ClickButton): - raise RuntimeError(f"Use `_ClickButton` during tests: {query}") - for bot_message in last_bot_messages: - if bot_message.buttons is not None: - await bot_message.click(i=query.button_index) - return None - if message.attachments is None or len(message.attachments.files) == 0: - return await self.client.send_message(self.bot, message.text) - else: - if len(message.attachments.files) == 1: - attachment = message.attachments.files[0] - files = attachment.source - else: - files = [file.source for file in message.attachments.files] - return await self.client.send_file(self.bot, files, caption=message.text) - - @staticmethod - async def parse_responses(responses: List[TlMessage], file_download_destination) -> Message: - """ - Convert a list of bot responses into a single message. - This function accepts a list because messages with multiple attachments are split. - - :param responses: A list of bot responses that are considered to be a single message. - :param file_download_destination: A directory to download sent media to. - """ - msg = TelegramMessage() - for response in responses: - if response.text and response.file is None: - if msg.text: - raise RuntimeError(f"Several messages with text:\n{msg.text}\n{response.text}") - msg.text = response.text or msg.text - if response.file is not None: - file = Path(file_download_destination) / (str(response.file.media.id) + response.file.ext) - await response.download_media(file=file) - if msg.attachments is None: - msg.attachments = Attachments() - msg.attachments.files.append( - Attachment(source=file, id=None, title=response.file.title or response.text or None) - ) - if response.buttons is not None: - buttons = [] - for row in response.buttons: - for button in row: - buttons.append( - Button( - source=button.url, - text=button.text, - payload=button.data, - ) - ) - if msg.ui is not None: - raise RuntimeError(f"Several messages with ui:\n{msg.ui}\n{TelegramUI(buttons=buttons)}") - msg.ui = TelegramUI(buttons=buttons) - if isinstance(response.reply_markup, ReplyKeyboardHide): - if msg.ui is not None: - raise RuntimeError(f"Several messages with ui:\n{msg.ui}\n{types.ReplyKeyboardRemove()}") - msg.ui = RemoveKeyboard() - if response.geo is not None: - location = Location(latitude=response.geo.lat, longitude=response.geo.long) - if msg.location is not None: - raise RuntimeError(f"Several messages with location:\n{msg.location}\n{location}") - msg.location = location - return msg - - @asynccontextmanager - async def run_bot_loop(self): - """A context manager that returns a function to run one polling loop of a messenger interface.""" - self.pipeline.messenger_interface.timeout = 2 - self.pipeline.messenger_interface.long_polling_timeout = 2 - await self.forget_previous_updates() - - yield lambda: self.pipeline.messenger_interface._polling_loop(self.pipeline._run_pipeline) - - self.pipeline.messenger_interface.forget_processed_updates() - - async def send_and_check(self, message: Message, file_download_destination=None): - """ - Send a message from a bot, receive it as client, verify it. - - :param message: Message to send and check. - :param file_download_destination: - Temporary directory (used to download sent files). - Defaults to :py:class:`tempfile.TemporaryDirectory`. - """ - await self.forget_previous_updates() - - async with self.client: - messenger_interface = cast(PollingTelegramInterface, self.pipeline.messenger_interface) - - messages = await self.client.get_messages(self.bot, limit=1) - if len(messages) == 0: - last_message_id = 0 - else: - last_message_id = messages[0].id - - messenger_interface.messenger.send_response((await self.client.get_me(input_peer=True)).user_id, message) - - await asyncio.sleep(3) - bot_messages = [ - x async for x in self.client.iter_messages(self.bot, min_id=last_message_id, from_user=self.bot) - ] # iter_messages is used instead of get_messages because get_messages requires bot min_id and max_id - - if file_download_destination is None: - fd_context = TemporaryDirectory() - else: - fd_context = nullcontext(file_download_destination) - - with fd_context as file_download_destination: - result = await self.parse_responses(bot_messages, file_download_destination) - - assert result == message - - async def forget_previous_updates(self): - messenger_interface = cast(PollingTelegramInterface, self.pipeline.messenger_interface) - messenger = messenger_interface.messenger - updates = messenger.get_updates(offset=messenger.last_update_id + 1, timeout=1, long_polling_timeout=1) - max_update_id = max([*map(lambda x: x.update_id, updates), -1]) - messenger.get_updates(offset=max_update_id + 1, timeout=1, long_polling_timeout=1) - - async def check_happy_path(self, happy_path, file_download_destination=None, run_bot: bool = True): - """ - Play out a dialogue with the bot. Check that the dialogue is correct. - - :param happy_path: Expected dialogue - :param file_download_destination: Temporary directory (used to download sent files) - :param run_bot: Whether a bot inside pipeline should be running (disable this to test non-async bots) - :return: - """ - if run_bot: - bot = self.run_bot_loop() - else: - - async def null(): - ... - - bot = nullcontext(null) - - if file_download_destination is None: - fd_context = TemporaryDirectory() - else: - fd_context = nullcontext(file_download_destination) - - async with self.client, bot as boot_loop: - with fd_context as file_download_destination: - bot_messages = [] - last_message = None - for request, response in happy_path: - logging.info(f"Sending request {request}") - user_message = await self.send_message(TelegramMessage.model_validate(request), bot_messages) - if user_message is not None: - last_message = user_message - logging.info("Request sent") - await boot_loop() - await asyncio.sleep(2) - logging.info("Extracting responses") - bot_messages = [ - x async for x in self.client.iter_messages(self.bot, min_id=last_message.id, from_user=self.bot) - ] - # iter_messages is used instead of get_messages because get_messages requires bot min_id and max_id - if len(bot_messages) > 0: - last_message = bot_messages[0] - logging.info("Got responses") - result = await self.parse_responses(bot_messages, file_download_destination) - assert result == TelegramMessage.model_validate(response) diff --git a/docs/source/_templates/example-links.html b/docs/source/_templates/example-links.html index 6e03f6b22..5ba1edc8f 100644 --- a/docs/source/_templates/example-links.html +++ b/docs/source/_templates/example-links.html @@ -1,8 +1,8 @@ {% if meta is defined and meta is not none and 'tutorial_name' in meta %} {% set repo_path = "/".join(meta['tutorial_name'].split("/")[-1].split(".")) %} - {% set github_link = 'https://github.com/deeppavlov/dialog_flow_framework/blob/master/' ~ repo_path ~ '.py' %} + {% set github_link = 'https://github.com/deeppavlov/chatsky/blob/master/' ~ repo_path ~ '.py' %} {% set notebook_link = '../' ~ meta['tutorial_name'] ~ '.ipynb' %} - {% set colab_link = 'https://colab.research.google.com/github/deeppavlov/dialog_flow_framework/blob/gh-pages/' ~ meta['tutorial_name'] ~ '.ipynb' %} + {% set colab_link = 'https://colab.research.google.com/github/deeppavlov/chatsky/blob/gh-pages/' ~ meta['tutorial_name'] ~ '.ipynb' %}