From 67cbec264f150102e7056568ac5dd320207b4ba7 Mon Sep 17 00:00:00 2001 From: Mackay-Fisher Date: Fri, 29 Nov 2024 16:16:07 -0600 Subject: [PATCH 1/4] [py-tx] brought up ContentTypeConfig [py-tx] updated for CLI pass [py-tx] updated import path --- .../threatexchange/cli/storage/interfaces.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 python-threatexchange/threatexchange/cli/storage/interfaces.py diff --git a/python-threatexchange/threatexchange/cli/storage/interfaces.py b/python-threatexchange/threatexchange/cli/storage/interfaces.py new file mode 100644 index 000000000..672b6e708 --- /dev/null +++ b/python-threatexchange/threatexchange/cli/storage/interfaces.py @@ -0,0 +1,25 @@ +import abc +from dataclasses import dataclass +import typing as t +from content_type.content_base import ContentType + + +@dataclass +class ContentTypeConfig: + """ + Holder for ContentType configuration. + """ + + # Content types that are not enabled should not be used in hashing/matching + enabled: bool + content_type: t.Type[ContentType] + + +class IContentTypeConfigStore(metaclass=abc.ABCMeta): + """Interface for accessing ContentType configuration""" + + @abc.abstractmethod + def get_content_type_configs(self) -> t.Mapping[str, ContentTypeConfig]: + """ + Return all installed content types. + """ From 3673f96cea5bc57fd2470ea21cdc32faf30c41e3 Mon Sep 17 00:00:00 2001 From: Mackay-Fisher Date: Fri, 29 Nov 2024 16:28:30 -0600 Subject: [PATCH 2/4] [py-tx] made change to reflect IsignalType not IContent Type --- .../threatexchange/cli/storage/interfaces.py | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/python-threatexchange/threatexchange/cli/storage/interfaces.py b/python-threatexchange/threatexchange/cli/storage/interfaces.py index 672b6e708..62d310ec0 100644 --- a/python-threatexchange/threatexchange/cli/storage/interfaces.py +++ b/python-threatexchange/threatexchange/cli/storage/interfaces.py @@ -1,25 +1,68 @@ import abc from dataclasses import dataclass import typing as t -from content_type.content_base import ContentType - +from threatexchange.content_type.content_base import ContentType +from threatexchange.signal_type.signal_base import SignalType @dataclass -class ContentTypeConfig: +class SignalTypeConfig: """ - Holder for ContentType configuration. + Holder for SignalType configuration """ - # Content types that are not enabled should not be used in hashing/matching - enabled: bool - content_type: t.Type[ContentType] + # Signal types that are not enabled should not be used in hashing/matching + enabled_ratio: float + signal_type: t.Type[SignalType] + + @property + def enabled(self) -> bool: + # TODO do a coin flip here, but also refactor this to do seeding + return self.enabled_ratio >= 0.0 -class IContentTypeConfigStore(metaclass=abc.ABCMeta): - """Interface for accessing ContentType configuration""" +class ISignalTypeConfigStore(metaclass=abc.ABCMeta): + """Interface for accessing SignalType configuration""" + + @abc.abstractmethod + def get_signal_type_configs(self) -> t.Mapping[str, SignalTypeConfig]: + """Return all installed signal types.""" @abc.abstractmethod - def get_content_type_configs(self) -> t.Mapping[str, ContentTypeConfig]: - """ - Return all installed content types. - """ + def _create_or_update_signal_type_override( + self, signal_type: str, enabled_ratio: float + ) -> None: + """Create or update database entry for a signal type, setting a new value.""" + + @t.final + def create_or_update_signal_type_override( + self, signal_type: str, enabled_ratio: float + ) -> None: + """Update enabled ratio of an installed signal type.""" + installed_signal_types = self.get_signal_type_configs() + if signal_type not in installed_signal_types: + raise ValueError(f"Unknown signal type {signal_type}") + if not (0.0 <= enabled_ratio <= 1.0): + raise ValueError( + f"Invalid enabled ratio {enabled_ratio}. Must be in the range 0.0-1.0 inclusive." + ) + self._create_or_update_signal_type_override(signal_type, enabled_ratio) + + @t.final + def get_enabled_signal_types(self) -> t.Mapping[str, t.Type[SignalType]]: + """Helper shortcut for getting only enabled SignalTypes""" + return { + k: v.signal_type + for k, v in self.get_signal_type_configs().items() + if v.enabled + } + + @t.final + def get_enabled_signal_types_for_content_type( + self, content_type: t.Type[ContentType] + ) -> t.Mapping[str, t.Type[SignalType]]: + """Helper shortcut for getting enabled types for a piece of content""" + return { + k: v.signal_type + for k, v in self.get_signal_type_configs().items() + if v.enabled and content_type in v.signal_type.get_content_types() + } \ No newline at end of file From 9c81cb0c17dd037a96f018f80f7c6a1e929bdd22 Mon Sep 17 00:00:00 2001 From: Mackay-Fisher Date: Fri, 29 Nov 2024 16:32:33 -0600 Subject: [PATCH 3/4] [py-tx] formatting --- python-threatexchange/threatexchange/cli/storage/interfaces.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python-threatexchange/threatexchange/cli/storage/interfaces.py b/python-threatexchange/threatexchange/cli/storage/interfaces.py index 62d310ec0..8061d7fb4 100644 --- a/python-threatexchange/threatexchange/cli/storage/interfaces.py +++ b/python-threatexchange/threatexchange/cli/storage/interfaces.py @@ -4,6 +4,7 @@ from threatexchange.content_type.content_base import ContentType from threatexchange.signal_type.signal_base import SignalType + @dataclass class SignalTypeConfig: """ @@ -65,4 +66,4 @@ def get_enabled_signal_types_for_content_type( k: v.signal_type for k, v in self.get_signal_type_configs().items() if v.enabled and content_type in v.signal_type.get_content_types() - } \ No newline at end of file + } From 597ff938164a63e68ab43e0d20636d5f998637cd Mon Sep 17 00:00:00 2001 From: David Callies Date: Mon, 2 Dec 2024 10:16:05 -0500 Subject: [PATCH 4/4] add more comments --- .../threatexchange/cli/storage/interfaces.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/python-threatexchange/threatexchange/cli/storage/interfaces.py b/python-threatexchange/threatexchange/cli/storage/interfaces.py index 8061d7fb4..87dd519d4 100644 --- a/python-threatexchange/threatexchange/cli/storage/interfaces.py +++ b/python-threatexchange/threatexchange/cli/storage/interfaces.py @@ -1,3 +1,32 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +""" +Common interface for persisting pytx configuration and concepts. + +Most of the individual components of pytx are find to use piecemeal, and +the full interface covers the most complex and complete useage. A usecase +with one collection of hashes using one algorithm might be better off +hardcoding those things rather than fully implementing the interface. + +# Migration Notes +There's an earlier attempt at these interfaces used for CLI at +<@Mackay-Fisher add the right pointer to the CLI storage>. +During the development of Hasher-Matcher-Actioner 2.0 +(github.com/facebook/ThreatExchange/tree/main/hasher-matcher-actioner/) +we realized that the original attempt at this wouldn't meet the needs +of that code and wrote a new interface. + +As of 12/2024, we are now migrating that interface from HMA into pytx +proper as part of a migration to dbm +(github.com/facebook/ThreatExchange/issues/1687). The general approach is: +1. Copy the interface unchanged from HMA to pytx +2. Release a new version of pytx +3. Delete the copy in HMA and update all references to the pytx version + +In parallel, we intend to slowly migrate the CLI storage components to +follow the same interface. +""" + import abc from dataclasses import dataclass import typing as t