Skip to content

Commit

Permalink
txbb weights bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Mar 5, 2024
1 parent 25f5175 commit ef415fa
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 38 deletions.
17 changes: 9 additions & 8 deletions src/HHbbVV/postprocessing/PostProcessRes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"# res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n",
"year = \"2017\"\n",
"\n",
"date = \"24Mar5\"\n",
"date = \"24Mar5_update_lp\"\n",
"plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}/\"\n",
"templates_dir = Path(f\"templates/{date}/\")\n",
"\n",
Expand Down Expand Up @@ -350,25 +350,26 @@
"metadata": {},
"outputs": [],
"source": [
"h = postprocessing.get_templates(\n",
"ht = postprocessing.get_templates(\n",
" events_dict,\n",
" bb_masks,\n",
" year,\n",
" nonres_sig_keys + res_sig_keys,\n",
" [\"HHbbVV\"],\n",
" # nonres_sig_keys + res_sig_keys,\n",
" # res_sig_keys,\n",
" selection_regions,\n",
" # res_shape_vars[:1],\n",
" nonres_shape_vars,\n",
" systematics,\n",
" templates_dir,\n",
" # bg_keys=[\"QCD\", \"TT\", \"V+Jets\", \"Diboson\", \"Hbb\"],\n",
" plot_dir=f\"{plot_dir}/templates/\",\n",
" plot_dir=plot_dir / \"templates\",\n",
" prev_cutflow=cutflow,\n",
" sig_scale_dict={\"HHbbVV\": 1e3, \"VBFHHbbVV\": 1e4} | {key: 1e2 for key in res_sig_keys},\n",
" # sig_splits=sig_splits[:2],\n",
" weight_shifts={},\n",
" jshift=\"\",\n",
" lpsfs=False,\n",
" lpsfs=True,\n",
" plot_shifts=False,\n",
" pass_ylim=500,\n",
" fail_ylim=40000,\n",
Expand Down Expand Up @@ -398,12 +399,12 @@
" # res_shape_vars,\n",
" systematics,\n",
" templates_dir,\n",
" plot_dir=f\"{plot_dir}/templates\",\n",
" plot_dir=plot_dir / \"templates\",\n",
" prev_cutflow=cutflow,\n",
" sig_scale_dict={\"HHbbVV\": 1e3, \"VBFHHbbVV\": 1e4} | {key: 1e2 for key in res_sig_keys},\n",
" sig_scale_dict={\"HHbbVV\": 1e3, \"VBFHHbbVV\": 2e4} | {key: 1e2 for key in res_sig_keys},\n",
" weight_shifts=postprocessing.weight_shifts,\n",
" jshift=jshift,\n",
" lpsfs=False,\n",
" lpsfs=True,\n",
" pass_ylim=500,\n",
" fail_ylim=40000,\n",
" # blind_pass=True,\n",
Expand Down
13 changes: 8 additions & 5 deletions src/HHbbVV/postprocessing/corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _load_txbb_sfs(year: str):
"""Create 2D lookup tables in [Txbb, pT] for Txbb SFs from given year"""

# https://coli.web.cern.ch/coli/.cms/btv/boohft-calib/20221201_bb_ULNanoV9_PNetXbbVsQCD_ak8_ext_2016APV/4_fit/
with (package_path / f"/corrections/txbb_sfs/txbb_sf_ul_{year}.json").open() as f:
with (package_path / f"corrections/txbb_sfs/txbb_sf_ul_{year}.json").open() as f:
txbb_sf = json.load(f)

wps = ["LP", "MP", "HP"]
Expand Down Expand Up @@ -81,9 +81,12 @@ def apply_txbb_sfs(
events[f"{weight_key}_txbb_{var}"] = events[weight_key]

if len(events[weight_key]):
events[weight_key] = events[weight_key] * txbb_sf_lookups[year]["nom"](bb_txbb, bb_pt)
else:
events[weight_key] = events[weight_key]
txbb_nom = txbb_sf_lookups[year]["nom"](bb_txbb, bb_pt)
for wkey in utils.get_all_weights(events):
if len(events[wkey].shape) > 1:
events[wkey] *= txbb_nom[:, np.newaxis]
else:
events[wkey] *= txbb_nom


trig_effs = {}
Expand Down Expand Up @@ -334,7 +337,7 @@ def get_lpsf(
# pt extrapolation uncertainty is the std of all pt param variations
uncs["sj_pt_unc"] = (
np.std(
np.sum(weight[:, np.newaxis] * events[f"{jet}_lp_pt_extrap_vars"].to_numpy(), axis=0)
np.sum(weight[:, np.newaxis] * events[f"{jet}_lp_sf_pt_extrap_vars"].to_numpy(), axis=0)
)
/ tot_post
)
Expand Down
41 changes: 24 additions & 17 deletions src/HHbbVV/postprocessing/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def ratioHistPlot(
divide_bin_width: bool = False,
plot_significance: bool = False,
significance_dir: str = "right",
plot_ratio: bool = True,
axrax: tuple = None,
):
"""
Expand Down Expand Up @@ -320,10 +321,12 @@ def ratioHistPlot(
gridspec_kw={"height_ratios": [3, 1, 1], "hspace": 0},
sharex=True,
)
else:
elif plot_ratio:
fig, (ax, rax) = plt.subplots(
2, 1, figsize=(12, 14), gridspec_kw={"height_ratios": [3, 1], "hspace": 0}, sharex=True
)
else:
fig, ax = plt.subplots(1, 1, figsize=(12, 11))

plt.rcParams.update({"font.size": 24})

Expand Down Expand Up @@ -385,6 +388,7 @@ def ratioHistPlot(
alpha=0.2,
hatch="//",
linewidth=0,
label="Total Background Uncertainty",
)

# plot data
Expand Down Expand Up @@ -412,24 +416,27 @@ def ratioHistPlot(
ax.set_ylim(y_lowlim)

# plot ratio below
if plot_data:
bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys])
yerr = ratio_uncertainty(pre_divide_hists[data_key, :].values(), bg_tot.values(), "poisson")
if plot_ratio:
if plot_data:
bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys])
yerr = ratio_uncertainty(
pre_divide_hists[data_key, :].values(), bg_tot.values(), "poisson"
)

hep.histplot(
pre_divide_hists[data_key, :] / (bg_tot.values() + 1e-5),
yerr=yerr,
ax=rax,
histtype="errorbar",
color="black",
capsize=4,
)
else:
rax.set_xlabel(hists.axes[1].label)
hep.histplot(
pre_divide_hists[data_key, :] / (bg_tot.values() + 1e-5),
yerr=yerr,
ax=rax,
histtype="errorbar",
color="black",
capsize=4,
)
else:
rax.set_xlabel(hists.axes[1].label)

rax.set_ylabel("Data/MC")
rax.set_ylim(ratio_ylims)
rax.grid()
rax.set_ylabel("Data/MC")
rax.set_ylim(ratio_ylims)
rax.grid()

if plot_significance:
bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys]).values()
Expand Down
24 changes: 16 additions & 8 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,7 @@ def get_lpsf_all_years(

bb_masks = bb_VV_assignment(events_dict)

if "finalWeight_noTrigEffs" not in events_dict[sig_key]:
if "weight_noTrigEffs" not in events_dict[sig_key]:
apply_weights(events_dict, year)
derive_variables(events_dict)

Expand Down Expand Up @@ -1636,9 +1636,13 @@ def get_templates(

# make selection, taking JEC/JMC variations into account
sel, cf = utils.make_selection(
region.cuts, events_dict, bb_masks, prev_cutflow=prev_cutflow, jshift=jshift
region.cuts,
events_dict,
bb_masks,
prev_cutflow=prev_cutflow,
jshift=jshift,
weight_key=weight_key,
)
# print(cf)

if template_dir != "":
cf.to_csv(f"{template_dir}/cutflows/{year}/{rname}_cutflow{jlabel}.csv")
Expand All @@ -1658,9 +1662,9 @@ def get_templates(
sig_bb_mask = bb_masks[sig_key][sel[sig_key]]

if pass_region:
# scale signal by LP SF
# scale all signal weights by LP SF
if lpsfs:
for wkey in [weight_key, f"{weight_key}_noTrigEffs"]:
for wkey in utils.get_all_weights(sig_events[sig_key]):
sig_events[sig_key][wkey] *= systematics[sig_key]["lp_sf"]

corrections.apply_txbb_sfs(sig_events[sig_key], sig_bb_mask, year, weight_key)
Expand All @@ -1672,7 +1676,7 @@ def get_templates(
hist_samples = list(events_dict.keys())

if not do_jshift:
# set up weight-based variations
# add all weight-based variations to histogram axis
for shift in ["down", "up"]:
if pass_region:
for sig_key in sig_keys:
Expand Down Expand Up @@ -1730,7 +1734,7 @@ def get_templates(
nom_vals = h[sample, :].values()
abs_unc = np.linalg.norm(
(whists.values() - nom_vals), axis=0
) / np.sqrt(103)
) # / np.sqrt(103)
# cap at 100% uncertainty
rel_unc = np.clip(abs_unc / nom_vals, 0, 1)
shape_up = nom_vals * (1 + rel_unc)
Expand Down Expand Up @@ -1802,7 +1806,6 @@ def get_templates(
plot_params = {
"hists": h.project(0, i + 1),
"sig_keys": p_sig_keys,
"bg_keys": p_bg_keys,
"sig_scale_dict": (
{key: sig_scale_dict[key] for key in p_sig_keys}
if pass_region
Expand All @@ -1822,6 +1825,7 @@ def get_templates(

plotting.ratioHistPlot(
**plot_params,
bg_keys=bg_keys,
title=title,
name=f"{plot_name}{jlabel}.pdf",
)
Expand All @@ -1834,6 +1838,7 @@ def get_templates(
for wshift, wsyst in weight_shifts.items():
plotting.ratioHistPlot(
**plot_params,
bg_keys=bg_keys,
syst=(wshift, wsyst.samples),
title=f"{region.label} Region {wsyst.label} Unc.",
name=f"{plot_name}_{wshift}.pdf",
Expand All @@ -1842,15 +1847,18 @@ def get_templates(
for skey, shift in [("Down", "down"), ("Up", "up")]:
plotting.ratioHistPlot(
**plot_params,
bg_keys=p_bg_keys, # don't plot QCD
syst=(wshift, wsyst.samples),
variation=shift,
title=f"{region.label} Region {wsyst.label} Unc. {skey} Shapes",
name=f"{plot_name}_{wshift}_{shift}.pdf",
plot_ratio=False,
)

if pass_region:
plotting.ratioHistPlot(
**plot_params,
bg_keys=bg_keys,
sig_err="txbb",
title=rf"{region.label} Region $T_{{Xbb}}$ Shapes",
name=f"{plot_name}_txbb.pdf",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
,Preselection,QCD SF,bbFatJetParticleNetMD_Txbb >= 0.97,VVFatJetParTMD_THWWvsT >= 0.8
QCD,979287.6349132347,979287.634913235,210430.09414706996,2269.113453099677
TT,116428.4156578406,116428.4156578406,24255.315491153477,391.3575052230973
ST,10133.749871746222,10133.749871746222,2048.2546699879467,44.98804071000517
W+Jets,14863.589391946809,14863.589391946809,2704.0098010873708,84.0506933145966
Z+Jets,21810.249247296022,21810.249247296022,9830.13553043351,141.05219142274115
Diboson,944.5571650558329,944.5571650558329,412.484656415579,8.185628548079201
ggFHbb,226.95685088670757,226.95685088670757,157.27752402118352,1.6976052291726098
VBFHbb,66.97275111602431,66.97275111602431,47.449174893882,0.5332721672841487
ZHbb,63.96246914780236,63.96246914780236,45.379215471990825,0.844136317284847
WHbb,109.68223866342207,109.68223866342207,73.99520784233742,2.311863067588508
ggZHbb,9.637138831976987,9.637138831976987,5.872520883724087,0.08202381657508848
ttHbb,318.24018721235785,318.24018721235785,121.05255814898767,1.8803545549795848
HWW,153.35211702134148,153.35211702134148,31.224751489102488,2.6994308124911046
Data,1144417.0,1144417.0,228352.0,4042.0
HHbbVV,1.676130328580837,1.676130328580837,1.1511317721054473,0.40900950627726057
VBFHHbbVV,0.03380955089741052,0.03380955089741052,0.022778253615771286,0.007133390360331839
X[900]->H(bb)Y[80](VV),11.19980821914222,11.19980821914222,7.789032088198821,2.978145363696696
X[1200]->H(bb)Y[190](VV),20.852666558466492,20.852666558466492,14.663466510313835,5.163467006134891
X[2000]->H(bb)Y[125](VV),28.53386346388635,28.53386346388635,20.820717608067813,8.851038292379796
X[3000]->H(bb)Y[250](VV),31.012733160854424,31.012733160854424,23.554959281771087,9.016848443837965
X[4000]->H(bb)Y[150](VV),31.35180296475921,31.35180296475921,24.33204003610424,11.035967295901486
5 changes: 5 additions & 0 deletions src/HHbbVV/postprocessing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,11 @@ def make_selection(
return selection, cutflow


def get_all_weights(events: pd.DataFrame):
"""Get all weight columns in the events dataframe"""
return np.unique([key for (key, ind) in events.columns if "weight" in key or "Weight" in key])


def getSigSidebandBGYields(
mass_key: str,
sig_key: str,
Expand Down

0 comments on commit ef415fa

Please sign in to comment.