Skip to content

Commit

Permalink
Add QA plots for source injection to analysis_tools
Browse files Browse the repository at this point in the history
  • Loading branch information
jtmccann committed Jun 26, 2024
1 parent ea42295 commit 2d6848d
Show file tree
Hide file tree
Showing 10 changed files with 644 additions and 3 deletions.
16 changes: 16 additions & 0 deletions pipelines/injectedCoaddQualityCore.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
description: |
Tier1 plots and metrics to assess injected coadd quality
tasks:
injectedObjectAnalysis:
class: lsst.analysis.tools.tasks.injectedObjectAnalysis.InjectedObjectAnalysisTask
config:
atools.completenessHist: CompletenessPurityTool
atools.astromDiffRAScatterPlot: TargetInjectedCatDeltaRAScatterPlot
atools.astromDiffDecScatterPlot: TargetInjectedCatDeltaDecScatterPlot
atools.astromDiffMetrics: TargetInjectedCatDeltaMetrics
atools.astromDiffMetrics.applyContext: CoaddContext
atools.targetInjectedCatDeltaPsfScatterPlot: TargetInjectedCatDeltaPsfScatterPlot
bands: ["g", "r", "i", "z", "y"]
python: |
from lsst.analysis.tools.atools import *
from lsst.analysis.tools.contexts import *
1 change: 1 addition & 0 deletions python/lsst/analysis/tools/actions/plot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .barPlots import *
from .calculateRange import *
from .colorColorFitPlot import *
from .completenessPlot import *
from .diaSkyPlot import *
from .focalPlanePlot import *
from .gridPlot import *
Expand Down
181 changes: 181 additions & 0 deletions python/lsst/analysis/tools/actions/plot/completenessPlot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# This file is part of analysis_tools.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.


from typing import Mapping

import matplotlib.pyplot as plt
import numpy as np
from lsst.pex.config import Field, ListField
from matplotlib.figure import Figure

from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Scalar, ScalarType, Vector
from .plotUtils import addPlotInfo

__all__ = ("CompletenessHist",)


class CompletenessHist(PlotAction):
"""Makes a scatter plot of the data with a marginal
histogram for each axis.
"""

magKey = Field[str](doc="Name of the magnitude column.", default="mag")
matchDistanceKey = Field[str](doc="Name of the match distance column.", default="matchDistance")
xAxisLabel = Field[str](doc="Label for the x axis.", default="Input Magnitude (mag)")
inputLabel = Field[str](doc="Label for the input source histogram.", default="Synthetic Inputs")
outputLabel = Field[str](doc="Label for the recovered source histogram.", default="Synthetic Recovered")
numBins = Field[int](doc="Number of bins to use for the histograms.", default=100)
completenessPercentiles = ListField[float](doc="Record the magnitudes at these percentiles", default=[16.0, 50.0, 84.0])

Check failure on line 47 in python/lsst/analysis/tools/actions/plot/completenessPlot.py

View workflow job for this annotation

GitHub Actions / call-workflow / lint

E501

line too long (124 > 110 characters)

def getInputSchema(self) -> KeyedDataSchema:
base: list[tuple[str, type[Vector] | ScalarType]] = []
base.append((self.magKey, Vector))
base.append((self.matchDistanceKey, Vector))
return base

def __call__(self, data: KeyedData, **kwargs) -> Mapping[str, Figure] | Figure:
self._validateInput(data, **kwargs)
return self.makePlot(data, **kwargs)

def _validateInput(self, data: KeyedData, **kwargs) -> None:
"""NOTE currently can only check that something is not a Scalar, not
check that the data is consistent with Vector
"""
needed = self.getFormattedInputSchema(**kwargs)
if remainder := {key.format(**kwargs) for key, _ in needed} - {
key.format(**kwargs) for key in data.keys()
}:
raise ValueError(f"Task needs keys {remainder} but they were not found in input")
for name, typ in needed:
isScalar = issubclass((colType := type(data[name.format(**kwargs)])), Scalar)
if isScalar and typ != Scalar:
raise ValueError(f"Data keyed by {name} has type {colType} but action requires type {typ}")

def makePlot(self, data, plotInfo, **kwargs):
"""Makes a plot showing the fraction of injected sources recovered by
input magnitude.
Parameters
----------
data : `KeyedData`
All the data
plotInfo : `dict`
A dictionary of information about the data being plotted with keys:
``camera``
The camera used to take the data (`lsst.afw.cameraGeom.Camera`)
``"cameraName"``
The name of camera used to take the data (`str`).
``"filter"``
The filter used for this data (`str`).
``"ccdKey"``
The ccd/dectector key associated with this camera (`str`).
``"visit"``
The visit of the data; only included if the data is from a
single epoch dataset (`str`).
``"patch"``
The patch that the data is from; only included if the data is
from a coadd dataset (`str`).
``"tract"``
The tract that the data comes from (`str`).
``"photoCalibDataset"``
The dataset used for the calibration, e.g. "jointcal" or "fgcm"
(`str`).
``"skyWcsDataset"``
The sky Wcs dataset used (`str`).
``"rerun"``
The rerun the data is stored in (`str`).
Returns
------
``fig``
The figure to be saved (`matplotlib.figure.Figure`).
Notes
-----
Makes a histogram showing the fraction recovered in each magnitude
bin with the number input and recovered overplotted.
"""

# Make plot showing the fraction recovered in magnitude bins
fig, axLeft = plt.subplots(dpi=300)
axLeft.tick_params(axis="y", labelcolor="C0")
axLeft.set_xlabel(self.xAxisLabel)
axLeft.set_ylabel("Fraction Recovered", color="C0")
axRight = axLeft.twinx()
axRight.set_ylabel("Number of Sources")
matched = np.isfinite(data[self.matchDistanceKey])
nInput, bins, _ = axRight.hist(
data[self.magKey],
range=(np.nanmin(data[self.magKey]), np.nanmax(data[self.magKey])),
bins=self.numBins,
log=True,
histtype="step",
label=self.inputLabel,
color="black",
)
nOutput, _, _ = axRight.hist(
data[self.magKey][matched],
range=(np.nanmin(data[self.magKey][matched]), np.nanmax(data[self.magKey][matched])),
bins=bins,
log=True,
histtype="step",
label=self.outputLabel,
color="grey",
)
xlims = plt.gca().get_xlim()
# TODO: put a box in the bottom corner for all the percentiles
# Find bin where the fraction recovered falls below a given percentile.
percentileInfo = []
for pct in self.completenessPercentiles:
pct /= 100
magArray = np.where((nOutput / nInput < pct))[0]
if len(magArray) == 0:
mag = np.nan
else:
mag = np.min(bins[magArray])
axLeft.plot([xlims[0], mag], [pct, pct], ls=":", color="grey")
axLeft.plot([mag, mag], [0, pct], ls=":", color="grey")
percentileInfo.append("Magnitude at {}% recovered: {:0.2f}".format(pct * 100, mag))
plt.xlim(xlims)
axLeft.set_ylim(0, 1.05)
axRight.legend(loc="lower left", ncol=2)
axLeft.axhline(1, color="grey", ls="--")
axLeft.bar(
bins[:-1],
nOutput / nInput,
width=np.diff(bins),
align="edge",
color="C0",
alpha=0.5,
zorder=10,
)
bboxDict = dict(boxstyle="round", facecolor="white", alpha=0.75)

spacing = 0
for info in percentileInfo:
axLeft.text(0.3, 0.2 + spacing, info, transform=fig.transFigure, bbox=bboxDict, zorder=11)
spacing += 0.1

# Add useful information to the plot
fig = plt.gcf()
addPlotInfo(fig, plotInfo)
return fig
40 changes: 39 additions & 1 deletion python/lsst/analysis/tools/actions/scalar/scalarActions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,20 @@
"IqrHistAction",
"DivideScalar",
"RmsAction",
"MagPercentileAction",
)

import operator
from math import nan
from typing import cast

import numpy as np
from astropy import units as u
from lsst.pex.config import ChoiceField, Field
from lsst.pex.config.configurableActions import ConfigurableActionField

from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
from ...math import fluxToMag, isPercent, nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd


class ScalarFromVectorAction(ScalarAction):
Expand Down Expand Up @@ -434,3 +436,39 @@ def __call__(self, data: KeyedData, **kwargs) -> Scalar:
if scalarB == 0:
raise ValueError("Denominator is zero!")
return scalarA / scalarB


class MagPercentileAction(ScalarFromVectorAction):
"""Calculates the magnitude at the given percentile for completeness"""

matchDistanceKey = Field[str]("Match distance Vector")
fluxUnits = Field[str](doc="Units for the column.", default="nanojansky")
percentile = Field[float](doc="The percentile to find the magnitude at.", default=50.0, check=isPercent)

def getInputSchema(self) -> KeyedDataSchema:
return (
(self.matchDistanceKey, Vector),
(self.vectorKey, Vector),
)

def __call__(self, data: KeyedData, **kwargs) -> Scalar:
matched = np.isfinite(data[self.matchDistanceKey])
fluxValues = data[self.vectorKey.format(**kwargs)]
values = fluxToMag(fluxValues, flux_unit=u.Unit(self.fluxUnits))
nInput, bins = np.histogram(
values,
range=(np.nanmin(values), np.nanmax(values)),
bins=100,
)
nOutput, _ = np.histogram(
values[matched],
range=(np.nanmin(values[matched]), np.nanmax(values[matched])),
bins=bins,
)
# Find bin where the fraction recovered first falls below 0.5
belowPercentile = np.where((nOutput / nInput < self.percentile / 100))[0]
if len(belowPercentile) == 0:
mag = np.nan
else:
mag = np.min(bins[belowPercentile])
return mag
44 changes: 42 additions & 2 deletions python/lsst/analysis/tools/actions/vector/vectorActions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"ResidualWithPerGroupStatistic",
"RAcosDec",
"AngularSeparation",
"MagPercentileAction",
)

import logging
Expand All @@ -44,11 +45,11 @@
import pandas as pd
from astropy import units as u
from astropy.coordinates import SkyCoord
from lsst.pex.config import DictField, Field
from lsst.pex.config import DictField, Field, ListField
from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField

from ...interfaces import KeyedData, KeyedDataSchema, Vector, VectorAction
from ...math import divide, fluxToMag, log10
from ...math import divide, fluxToMag, isPercent, log10
from .selectors import VectorSelector

_LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -404,3 +405,42 @@ def __call__(self, data: KeyedData, **kwargs) -> Vector:

result = joinedDf["value_individual"] - joinedDf["value_group"]
return np.array(result)


class MagPercentileAction(VectorAction):
"""Calculates the magnitude at the given percentile for completeness"""

matchDistanceKey = Field[str]("Match distance Vector")
vectorKey = Field[str](doc="Key of vector which should be loaded")
fluxUnits = Field[str](doc="Units for the column.", default="nanojansky")
percentiles = ListField[float](doc="The percentiles to find the magnitude at.", default=[16.0, 50.0, 84.0], itemCheck=isPercent)

Check failure on line 416 in python/lsst/analysis/tools/actions/vector/vectorActions.py

View workflow job for this annotation

GitHub Actions / call-workflow / lint

E501

line too long (132 > 110 characters)

def getInputSchema(self) -> KeyedDataSchema:
return (
(self.matchDistanceKey, Vector),
(self.vectorKey, Vector),
)

def __call__(self, data: KeyedData, **kwargs) -> Scalar:

Check failure on line 424 in python/lsst/analysis/tools/actions/vector/vectorActions.py

View workflow job for this annotation

GitHub Actions / call-workflow / lint

F821

undefined name 'Scalar'
matched = np.isfinite(data[self.matchDistanceKey])
fluxValues = data[self.vectorKey.format(**kwargs)]
values = fluxToMag(fluxValues, flux_unit=u.Unit(self.fluxUnits))
nInput, bins = np.histogram(
values,
range=(np.nanmin(values), np.nanmax(values)),
bins=100,
)
nOutput, _ = np.histogram(
values[matched],
range=(np.nanmin(values[matched]), np.nanmax(values[matched])),
bins=bins,
)
# Find bin where the fraction recovered first falls below 0.5
mags = []
for pct in self.percentiles:
belowPercentile = np.where((nOutput / nInput < pct / 100))[0]
if len(belowPercentile) == 0:
mags.append(np.nan)
else:
mags.append(np.min(bins[belowPercentile]))
return np.array(mags)
1 change: 1 addition & 0 deletions python/lsst/analysis/tools/atools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .skyFluxStatisticMetrics import *
from .skyObject import *
from .skySource import *
from .sourceInjectionPlots import *
from .sources import *
from .stellarLocus import *
from .wholeSkyPlotTool import *
Loading

0 comments on commit 2d6848d

Please sign in to comment.