Add QA plots for source injection to analysis_tools

lsst · Jun 26, 2024 · 2d6848d · 2d6848d
1 parent ea42295
commit 2d6848d
Show file tree

Hide file tree

Showing 10 changed files with 644 additions and 3 deletions.
diff --git a/pipelines/injectedCoaddQualityCore.yaml b/pipelines/injectedCoaddQualityCore.yaml
@@ -0,0 +1,16 @@
+description: |
+  Tier1 plots and metrics to assess injected coadd quality
+tasks:
+  injectedObjectAnalysis:
+    class: lsst.analysis.tools.tasks.injectedObjectAnalysis.InjectedObjectAnalysisTask
+    config:
+      atools.completenessHist: CompletenessPurityTool
+      atools.astromDiffRAScatterPlot: TargetInjectedCatDeltaRAScatterPlot
+      atools.astromDiffDecScatterPlot: TargetInjectedCatDeltaDecScatterPlot
+      atools.astromDiffMetrics: TargetInjectedCatDeltaMetrics
+      atools.astromDiffMetrics.applyContext: CoaddContext
+      atools.targetInjectedCatDeltaPsfScatterPlot: TargetInjectedCatDeltaPsfScatterPlot
+      bands: ["g", "r", "i", "z", "y"]
+      python: |
+        from lsst.analysis.tools.atools import *
+        from lsst.analysis.tools.contexts import *
diff --git a/python/lsst/analysis/tools/actions/plot/__init__.py b/python/lsst/analysis/tools/actions/plot/__init__.py
@@ -1,6 +1,7 @@
 from .barPlots import *
 from .calculateRange import *
 from .colorColorFitPlot import *
+from .completenessPlot import *
 from .diaSkyPlot import *
 from .focalPlanePlot import *
 from .gridPlot import *

diff --git a/python/lsst/analysis/tools/actions/plot/completenessPlot.py b/python/lsst/analysis/tools/actions/plot/completenessPlot.py
@@ -0,0 +1,181 @@
+# This file is part of analysis_tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+from typing import Mapping
+
+import matplotlib.pyplot as plt
+import numpy as np
+from lsst.pex.config import Field, ListField
+from matplotlib.figure import Figure
+
+from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Scalar, ScalarType, Vector
+from .plotUtils import addPlotInfo
+
+__all__ = ("CompletenessHist",)
+
+
+class CompletenessHist(PlotAction):
+    """Makes a scatter plot of the data with a marginal
+    histogram for each axis.
+    """
+
+    magKey = Field[str](doc="Name of the magnitude column.", default="mag")
+    matchDistanceKey = Field[str](doc="Name of the match distance column.", default="matchDistance")
+    xAxisLabel = Field[str](doc="Label for the x axis.", default="Input Magnitude (mag)")
+    inputLabel = Field[str](doc="Label for the input source histogram.", default="Synthetic Inputs")
+    outputLabel = Field[str](doc="Label for the recovered source histogram.", default="Synthetic Recovered")
+    numBins = Field[int](doc="Number of bins to use for the histograms.", default=100)
+    completenessPercentiles = ListField[float](doc="Record the magnitudes at these percentiles", default=[16.0, 50.0, 84.0])
+
+    def getInputSchema(self) -> KeyedDataSchema:
+        base: list[tuple[str, type[Vector] | ScalarType]] = []
+        base.append((self.magKey, Vector))
+        base.append((self.matchDistanceKey, Vector))
+        return base
+
+    def __call__(self, data: KeyedData, **kwargs) -> Mapping[str, Figure] | Figure:
+        self._validateInput(data, **kwargs)
+        return self.makePlot(data, **kwargs)
+
+    def _validateInput(self, data: KeyedData, **kwargs) -> None:
+        """NOTE currently can only check that something is not a Scalar, not
+        check that the data is consistent with Vector
+        """
+        needed = self.getFormattedInputSchema(**kwargs)
+        if remainder := {key.format(**kwargs) for key, _ in needed} - {
+            key.format(**kwargs) for key in data.keys()
+        }:
+            raise ValueError(f"Task needs keys {remainder} but they were not found in input")
+        for name, typ in needed:
+            isScalar = issubclass((colType := type(data[name.format(**kwargs)])), Scalar)
+            if isScalar and typ != Scalar:
+                raise ValueError(f"Data keyed by {name} has type {colType} but action requires type {typ}")
+
+    def makePlot(self, data, plotInfo, **kwargs):
+        """Makes a plot showing the fraction of injected sources recovered by
+        input magnitude.
+
+        Parameters
+        ----------
+        data : `KeyedData`
+            All the data
+        plotInfo : `dict`
+            A dictionary of information about the data being plotted with keys:
+            ``camera``
+                The camera used to take the data (`lsst.afw.cameraGeom.Camera`)
+            ``"cameraName"``
+                The name of camera used to take the data (`str`).
+            ``"filter"``
+                The filter used for this data (`str`).
+            ``"ccdKey"``
+                The ccd/dectector key associated with this camera (`str`).
+            ``"visit"``
+                The visit of the data; only included if the data is from a
+                single epoch dataset (`str`).
+            ``"patch"``
+                The patch that the data is from; only included if the data is
+                from a coadd dataset (`str`).
+            ``"tract"``
+                The tract that the data comes from (`str`).
+            ``"photoCalibDataset"``
+                The dataset used for the calibration, e.g. "jointcal" or "fgcm"
+                (`str`).
+            ``"skyWcsDataset"``
+                The sky Wcs dataset used (`str`).
+            ``"rerun"``
+                The rerun the data is stored in (`str`).
+
+        Returns
+        ------
+        ``fig``
+            The figure to be saved (`matplotlib.figure.Figure`).
+
+        Notes
+        -----
+        Makes a histogram showing the fraction recovered in each magnitude
+        bin with the number input and recovered overplotted.
+        """
+
+        # Make plot showing the fraction recovered in magnitude bins
+        fig, axLeft = plt.subplots(dpi=300)
+        axLeft.tick_params(axis="y", labelcolor="C0")
+        axLeft.set_xlabel(self.xAxisLabel)
+        axLeft.set_ylabel("Fraction Recovered", color="C0")
+        axRight = axLeft.twinx()
+        axRight.set_ylabel("Number of Sources")
+        matched = np.isfinite(data[self.matchDistanceKey])
+        nInput, bins, _ = axRight.hist(
+            data[self.magKey],
+            range=(np.nanmin(data[self.magKey]), np.nanmax(data[self.magKey])),
+            bins=self.numBins,
+            log=True,
+            histtype="step",
+            label=self.inputLabel,
+            color="black",
+        )
+        nOutput, _, _ = axRight.hist(
+            data[self.magKey][matched],
+            range=(np.nanmin(data[self.magKey][matched]), np.nanmax(data[self.magKey][matched])),
+            bins=bins,
+            log=True,
+            histtype="step",
+            label=self.outputLabel,
+            color="grey",
+        )
+        xlims = plt.gca().get_xlim()
+        # TODO: put a box in the bottom corner for all the percentiles
+        # Find bin where the fraction recovered falls below a given percentile.
+        percentileInfo = []
+        for pct in self.completenessPercentiles:
+            pct /= 100
+            magArray = np.where((nOutput / nInput < pct))[0]
+            if len(magArray) == 0:
+                mag = np.nan
+            else:
+                mag = np.min(bins[magArray])
+                axLeft.plot([xlims[0], mag], [pct, pct], ls=":", color="grey")
+                axLeft.plot([mag, mag], [0, pct], ls=":", color="grey")
+                percentileInfo.append("Magnitude at {}% recovered: {:0.2f}".format(pct * 100, mag))
+        plt.xlim(xlims)
+        axLeft.set_ylim(0, 1.05)
+        axRight.legend(loc="lower left", ncol=2)
+        axLeft.axhline(1, color="grey", ls="--")
+        axLeft.bar(
+            bins[:-1],
+            nOutput / nInput,
+            width=np.diff(bins),
+            align="edge",
+            color="C0",
+            alpha=0.5,
+            zorder=10,
+        )
+        bboxDict = dict(boxstyle="round", facecolor="white", alpha=0.75)
+
+        spacing = 0
+        for info in percentileInfo:
+            axLeft.text(0.3, 0.2 + spacing, info, transform=fig.transFigure, bbox=bboxDict, zorder=11)
+            spacing += 0.1
+
+        # Add useful information to the plot
+        fig = plt.gcf()
+        addPlotInfo(fig, plotInfo)
+        return fig
diff --git a/python/lsst/analysis/tools/actions/scalar/scalarActions.py b/python/lsst/analysis/tools/actions/scalar/scalarActions.py
@@ -40,18 +40,20 @@
     "IqrHistAction",
     "DivideScalar",
     "RmsAction",
+    "MagPercentileAction",
 )
 
 import operator
 from math import nan
 from typing import cast
 
 import numpy as np
+from astropy import units as u
 from lsst.pex.config import ChoiceField, Field
 from lsst.pex.config.configurableActions import ConfigurableActionField
 
 from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
-from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
+from ...math import fluxToMag, isPercent, nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
 
 
 class ScalarFromVectorAction(ScalarAction):
@@ -434,3 +436,39 @@ def __call__(self, data: KeyedData, **kwargs) -> Scalar:
         if scalarB == 0:
             raise ValueError("Denominator is zero!")
         return scalarA / scalarB
+
+
+class MagPercentileAction(ScalarFromVectorAction):
+    """Calculates the magnitude at the given percentile for completeness"""
+
+    matchDistanceKey = Field[str]("Match distance Vector")
+    fluxUnits = Field[str](doc="Units for the column.", default="nanojansky")
+    percentile = Field[float](doc="The percentile to find the magnitude at.", default=50.0, check=isPercent)
+
+    def getInputSchema(self) -> KeyedDataSchema:
+        return (
+            (self.matchDistanceKey, Vector),
+            (self.vectorKey, Vector),
+        )
+
+    def __call__(self, data: KeyedData, **kwargs) -> Scalar:
+        matched = np.isfinite(data[self.matchDistanceKey])
+        fluxValues = data[self.vectorKey.format(**kwargs)]
+        values = fluxToMag(fluxValues, flux_unit=u.Unit(self.fluxUnits))
+        nInput, bins = np.histogram(
+            values,
+            range=(np.nanmin(values), np.nanmax(values)),
+            bins=100,
+        )
+        nOutput, _ = np.histogram(
+            values[matched],
+            range=(np.nanmin(values[matched]), np.nanmax(values[matched])),
+            bins=bins,
+        )
+        # Find bin where the fraction recovered first falls below 0.5
+        belowPercentile = np.where((nOutput / nInput < self.percentile / 100))[0]
+        if len(belowPercentile) == 0:
+            mag = np.nan
+        else:
+            mag = np.min(bins[belowPercentile])
+        return mag
diff --git a/python/lsst/analysis/tools/actions/vector/vectorActions.py b/python/lsst/analysis/tools/actions/vector/vectorActions.py
@@ -35,6 +35,7 @@
     "ResidualWithPerGroupStatistic",
     "RAcosDec",
     "AngularSeparation",
+    "MagPercentileAction",
 )
 
 import logging
@@ -44,11 +45,11 @@
 import pandas as pd
 from astropy import units as u
 from astropy.coordinates import SkyCoord
-from lsst.pex.config import DictField, Field
+from lsst.pex.config import DictField, Field, ListField
 from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField
 
 from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction
-from ...math import divide, fluxToMag, log10
+from ...math import divide, fluxToMag, isPercent, log10
 from .selectors import VectorSelector
 
 _LOG = logging.getLogger(__name__)
@@ -404,3 +405,42 @@ def __call__(self, data: KeyedData, **kwargs) -> Vector:
 
         result = joinedDf["value_individual"] - joinedDf["value_group"]
         return np.array(result)
+
+
+class MagPercentileAction(VectorAction):
+    """Calculates the magnitude at the given percentile for completeness"""
+
+    matchDistanceKey = Field[str]("Match distance Vector")
+    vectorKey = Field[str](doc="Key of vector which should be loaded")
+    fluxUnits = Field[str](doc="Units for the column.", default="nanojansky")
+    percentiles = ListField[float](doc="The percentiles to find the magnitude at.", default=[16.0, 50.0, 84.0], itemCheck=isPercent)
+
+    def getInputSchema(self) -> KeyedDataSchema:
+        return (
+            (self.matchDistanceKey, Vector),
+            (self.vectorKey, Vector),
+        )
+
+    def __call__(self, data: KeyedData, **kwargs) -> Scalar:
+        matched = np.isfinite(data[self.matchDistanceKey])
+        fluxValues = data[self.vectorKey.format(**kwargs)]
+        values = fluxToMag(fluxValues, flux_unit=u.Unit(self.fluxUnits))
+        nInput, bins = np.histogram(
+            values,
+            range=(np.nanmin(values), np.nanmax(values)),
+            bins=100,
+        )
+        nOutput, _ = np.histogram(
+            values[matched],
+            range=(np.nanmin(values[matched]), np.nanmax(values[matched])),
+            bins=bins,
+        )
+        # Find bin where the fraction recovered first falls below 0.5
+        mags = []
+        for pct in self.percentiles:
+            belowPercentile = np.where((nOutput / nInput < pct / 100))[0]
+            if len(belowPercentile) == 0:
+                mags.append(np.nan)
+            else:
+                mags.append(np.min(bins[belowPercentile]))
+        return np.array(mags)
diff --git a/python/lsst/analysis/tools/atools/__init__.py b/python/lsst/analysis/tools/atools/__init__.py
@@ -33,6 +33,7 @@
 from .skyFluxStatisticMetrics import *
 from .skyObject import *
 from .skySource import *
+from .sourceInjectionPlots import *
 from .sources import *
 from .stellarLocus import *
 from .wholeSkyPlotTool import *