Skip to content

Commit

Permalink
Prototype partial(?) replacements relation-tree objects.
Browse files Browse the repository at this point in the history
The current daf_relation tree in our Query objects is too contaminated
with implementation details to serve us well in the RemoteButler
client, and we could only partially mitigate that by defining a new
daf_relation Engine.  The fundamental issue is that daf_relation
objects expect to know exactly which columns they have at any given
time, and that's at odds with user expectations that columns
"magically" appear whenever they're requested (e.g. referenced by a
`where` string), and that this joins in the tables that provide them
as-needed.

The two new files here *heavily* duplicate stuff in daf_relation, and
in addition to being more vague about what the columns are, they're
simpler for two key reasons:

- They're just data, with no logic for maintaining invariants,
  contructing trees, serialization, or anything else.  This will have
  to change as we actually start to use them.

- They fully enumerate the kinds of expressions and operations we care
  about in the butler query system rather than trying to define
  abstract versions of those upstream of daf_butler that could be
  specialized in daf_butler.  I had not appreciated how much of a
  simplification this could be when writing daf_relation as a separate
  package, and if it holds up it may suggest that the right way to
  resolve the duplication is to rip a lot of stuff out of daf_relation.
  serialization.
  • Loading branch information
TallJimbo committed Nov 30, 2023
1 parent e2e7c40 commit 00946bf
Show file tree
Hide file tree
Showing 3 changed files with 520 additions and 0 deletions.
26 changes: 26 additions & 0 deletions python/lsst/daf/butler/queries/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
212 changes: 212 additions & 0 deletions python/lsst/daf/butler/queries/abstract_expressions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = (
"AbstractExpression",
"AbstractOrderExpression",
"AbstractPredicate",
)


import dataclasses
from typing import Literal, TypeAlias, Union, TYPE_CHECKING

import astropy.time

from lsst.sphgeom import Region

from ..dimensions import DataCoordinate
from .._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag, DatasetColumnTag
from .._timespan import Timespan

if TYPE_CHECKING:
from .abstract_relations import AbstractRelation


LiteralValue: TypeAlias = Union[int, bytes, str, float, astropy.time.Time, Timespan, Region]


@dataclasses.dataclass(frozen=True)
class ColumnLiteral:
"""A column expression that is a literal Python value."""

value: LiteralValue


@dataclasses.dataclass(frozen=True)
class ColumnReference:
"""A column expression that refers to a column obtainable from an abstract
relation.
"""

column: DimensionKeyColumnTag | DimensionRecordColumnTag | DatasetColumnTag


@dataclasses.dataclass(frozen=True)
class UnaryExpression:
"""A unary operation on a column expression that returns a non-bool."""

operand: AbstractExpression
operator: Literal["-", "begin_of", "end_of"]


@dataclasses.dataclass(frozen=True)
class BinaryExpression:
"""A binary operation on column expressions that returns a non-bool."""

a: AbstractExpression
b: AbstractExpression
operator: Literal["+", "-", "*", "/", "%"]


AbstractExpression: TypeAlias = Union[ColumnLiteral, ColumnReference, UnaryExpression, BinaryExpression]


@dataclasses.dataclass(frozen=True)
class Reversed:
"""A tag wrapper for `AbstractExpression` that indicate sorting in
reverse order.
"""

operand: AbstractExpression


AbstractOrderExpression: TypeAlias = Union[AbstractExpression, Reversed]


@dataclasses.dataclass(frozen=True)
class LogicalAnd:
"""A boolean column expression that is `True` only if all of its operands
are `True`.
"""

operands: tuple[AbstractPredicate]


@dataclasses.dataclass(frozen=True)
class LogicalOr:
"""A boolean column expression that is `True` if any of its operands are
`True`.
"""

operands: tuple[AbstractPredicate]


@dataclasses.dataclass(frozen=True)
class LogicalNot:
"""A boolean column expression that inverts its operand."""

operand: AbstractPredicate


@dataclasses.dataclass(frozen=True)
class IsNull:
"""A boolean column expression that tests whether its operand is NULL."""

operand: AbstractExpression


@dataclasses.dataclass(frozen=True)
class Comparison:
"""A boolean columns expression formed by comparing two non-boolean
expressions.
"""

a: AbstractExpression
b: AbstractExpression
operator: Literal["=", "!=", "<", ">", ">=", "<=", "overlaps"]


@dataclasses.dataclass(frozen=True)
class InContainer:
"""A boolean column expression that tests whether one expression is a
member of an explicit sequence of other expressions.
"""

member: AbstractExpression
container: tuple[AbstractExpression, ...]


@dataclasses.dataclass(frozen=True)
class InRange:
"""A boolean column expression that tests whether its expression is
included in an integer range.
"""

member: AbstractExpression
range: range


@dataclasses.dataclass(frozen=True)
class InRelation:
"""A boolean column expression that tests whether its expression is
included single-column projection of a relation.
This is primarily intended to be used on dataset ID columns, but it may
be useful for other columns as well.
"""

member: AbstractExpression
column: DimensionKeyColumnTag | DimensionRecordColumnTag | DatasetColumnTag
relation: AbstractRelation


@dataclasses.dataclass(frozen=True)
class StringPredicate:
"""A tag wrapper for boolean column expressions created by parsing a string
expression.
Remembering the original string is useful for error reporting.
"""

where: str
tree: AbstractPredicate


@dataclasses.dataclass(frozen=True)
class DataCoordinateConstraint:
"""A boolean column expression defined by interpreting data ID's key-value
pairs as a logical AND of equality constraints.
"""

data_coordinate: DataCoordinate


AbstractPredicate: TypeAlias = Union[
LogicalAnd,
LogicalOr,
LogicalNot,
IsNull,
Comparison,
InContainer,
InRange,
InRelation,
StringPredicate,
DataCoordinateConstraint,
]
Loading

0 comments on commit 00946bf

Please sign in to comment.