-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
387 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,376 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pickle, json, gzip\n", | ||
"import numpy as np\n", | ||
"import hist\n", | ||
"from hist import Hist\n", | ||
"\n", | ||
"from typing import Optional, List, Dict\n", | ||
"from copy import copy\n", | ||
"\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import mplhep as hep\n", | ||
"from matplotlib import colors\n", | ||
"\n", | ||
"from tqdm import tqdm\n", | ||
"\n", | ||
"from pathlib import Path\n", | ||
"import os\n", | ||
"\n", | ||
"from HHbbVV.hh_vars import years, bg_keys\n", | ||
"from HHbbVV.postprocessing import datacardHelpers\n", | ||
"from postprocessing import nonres_shape_vars as shape_vars\n", | ||
"import plotting\n", | ||
"\n", | ||
"plt.rcParams.update({\"font.size\": 16})\n", | ||
"plt.style.use(hep.style.CMS)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%load_ext autoreload\n", | ||
"%autoreload 2" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"plot_dir = Path(\"../../plots/Interpolate/24Aug1\")\n", | ||
"plot_dir.mkdir(parents=True, exist_ok=True)\n", | ||
"\n", | ||
"templates_dir = Path(\"templates/24Apr26NonresBDT995AllSigs\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Load and process templates" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"templates_dict: dict[str, dict[str, Hist]] = {}\n", | ||
"\n", | ||
"for year in years:\n", | ||
" with (templates_dir / f\"{year}_templates.pkl\").open(\"rb\") as f:\n", | ||
" templates_dict[year] = datacardHelpers.rem_neg(pickle.load(f))\n", | ||
"\n", | ||
"templates = datacardHelpers.sum_templates(templates_dict, years)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"vbf_keys = [\n", | ||
" \"VBFHHbbVV\",\n", | ||
" \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n", | ||
" \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n", | ||
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n", | ||
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n", | ||
" \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n", | ||
"]\n", | ||
"\n", | ||
"vbf_hists = {}\n", | ||
"\n", | ||
"for key, h in templates.items():\n", | ||
" vbf_hists[key] = []\n", | ||
" for vbf_key in vbf_keys:\n", | ||
" vbf_hists[key].append(h[vbf_key, ...])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Interpolation coefficients" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sympy\n", | ||
"\n", | ||
"csamples = [\n", | ||
" # CV, C2V, kl\n", | ||
" (1.0, 1.0, 1.0),\n", | ||
" (1.0, 1.0, 0.0),\n", | ||
" (1.0, 1.0, 2.0),\n", | ||
" (1.0, 0.0, 1.0),\n", | ||
" (1.0, 2.0, 1.0),\n", | ||
" # (0.5, 1.0, 1.0),\n", | ||
" (1.5, 1.0, 1.0),\n", | ||
"]\n", | ||
"\n", | ||
"M = sympy.Matrix(\n", | ||
" [\n", | ||
" [\n", | ||
" CV**2 * kl**2,\n", | ||
" CV**4,\n", | ||
" C2V**2,\n", | ||
" CV**3 * kl,\n", | ||
" CV * C2V * kl,\n", | ||
" CV**2 * C2V,\n", | ||
" ]\n", | ||
" for i, (CV, C2V, kl) in enumerate(csamples)\n", | ||
" ]\n", | ||
")\n", | ||
"\n", | ||
"# the vector of couplings\n", | ||
"CV, C2V, kl = sympy.symbols(\"CV C2V kl\")\n", | ||
"c = sympy.Matrix(\n", | ||
" [\n", | ||
" [CV**2 * kl**2],\n", | ||
" [CV**4],\n", | ||
" [C2V**2],\n", | ||
" [CV**3 * kl],\n", | ||
" [CV * C2V * kl],\n", | ||
" [CV**2 * C2V],\n", | ||
" ]\n", | ||
")\n", | ||
"\n", | ||
"# the vector of symbolic sample cross sections\n", | ||
"s = sympy.Matrix([[sympy.Symbol(\"xs{}\".format(i))] for i in range(len(csamples))])\n", | ||
"\n", | ||
"# actual computation, i.e., matrix inversion and multiplications with vectors\n", | ||
"M_inv = M.pinv()\n", | ||
"coeffs = c.transpose() * M_inv\n", | ||
"sigma = coeffs * s" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_hist_interp(cv, c2v, Kl, hists):\n", | ||
" sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})\n", | ||
" counts = []\n", | ||
" errs = []\n", | ||
" for i in range(len(hists[0].values())):\n", | ||
" count = np.array(\n", | ||
" sigma_val.subs(\n", | ||
" {sympy.Symbol(f\"xs{j}\"): hists[j].values()[i] for j in range(len(vbf_keys))}\n", | ||
" )\n", | ||
" )[0][0]\n", | ||
" err = np.array(\n", | ||
" sigma_val.subs(\n", | ||
" {\n", | ||
" sympy.Symbol(f\"xs{j}\"): np.nan_to_num(\n", | ||
" np.sqrt(hists[j].variances()[i]) / hists[j].values()[i]\n", | ||
" )\n", | ||
" for j in range(len(vbf_keys))\n", | ||
" }\n", | ||
" )\n", | ||
" )[0][0]\n", | ||
"\n", | ||
" if count < 1e-12:\n", | ||
" count = 0\n", | ||
"\n", | ||
" counts.append(count)\n", | ||
" errs.append(err)\n", | ||
"\n", | ||
" return np.array(counts), np.array(errs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Add interpolated signals to templates" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"interp_points = np.arange(-1.0, 3.1, 0.1)\n", | ||
"samples = [f\"qqHH_CV_1_C2V_{c:.1f}_kl_1_HHbbVV\" for c in interp_points]\n", | ||
"\n", | ||
"interp_hists = {}\n", | ||
"\n", | ||
"for region in [\"passvbf\", \"passggf\", \"fail\"]:\n", | ||
" print(region)\n", | ||
" h = Hist(\n", | ||
" hist.axis.StrCategory(samples, name=\"Sample\"),\n", | ||
" *templates[\"passvbf\"].axes[1:],\n", | ||
" storage=hist.storage.Weight(),\n", | ||
" )\n", | ||
" for i, c in tqdm(enumerate(interp_points)):\n", | ||
" c_h, c_err = get_hist_interp(1.0, c, 1.0, vbf_hists[region])\n", | ||
" h.values()[i, :] = c_h\n", | ||
" h.variances()[i, :] = (c_err * c_h) ** 2\n", | ||
"\n", | ||
" interp_hists[region] = h" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ctemplates = {}\n", | ||
"\n", | ||
"for region in [\"passvbf\", \"passggf\", \"fail\"]:\n", | ||
" template = templates[region]\n", | ||
" # combined sig + bg samples\n", | ||
" csamples = list(template.axes[0]) + samples\n", | ||
"\n", | ||
" # new hist with all samples\n", | ||
" ctemplate = Hist(\n", | ||
" hist.axis.StrCategory(csamples, name=\"Sample\"),\n", | ||
" *template.axes[1:],\n", | ||
" storage=\"weight\",\n", | ||
" )\n", | ||
"\n", | ||
" # add background hists\n", | ||
" for sample in template.axes[0]:\n", | ||
" sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n", | ||
" ctemplate.view(flow=True)[sample_key_index, ...] = template[sample, ...].view(flow=True)\n", | ||
"\n", | ||
" # add signal hists\n", | ||
" for sample in samples:\n", | ||
" sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n", | ||
" ctemplate.view(flow=True)[sample_key_index, ...] = interp_hists[region][sample, ...].view(\n", | ||
" flow=True\n", | ||
" )\n", | ||
"\n", | ||
" ctemplates[region] = ctemplate" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Plot" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from hist.intervals import poisson_interval\n", | ||
"\n", | ||
"(\n", | ||
" poisson_interval(ctemplates[region][\"Data\", ...].values())\n", | ||
" - ctemplates[region][\"Data\", ...].values()\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ctemplates[\"passvbf\"][\"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\", ...].values()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"selection_regions = {\n", | ||
" \"passvbf\": \"VBF\",\n", | ||
" \"passggf\": \"ggF\",\n", | ||
" # \"fail\": \"Fail\",\n", | ||
"}\n", | ||
"\n", | ||
"ylims = {\"passggf\": 200, \"passvbf\": 100, \"fail\": 7e5}\n", | ||
"\n", | ||
"sig_scale_dict = {\n", | ||
" # \"HHbbVV\": 100,\n", | ||
" # \"VBFHHbbVV\": 2000,\n", | ||
" \"qqHH_CV_1_C2V_1.6_kl_1_HHbbVV\": 1,\n", | ||
" \"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\": 1,\n", | ||
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n", | ||
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n", | ||
"}\n", | ||
"\n", | ||
"for region, region_label in selection_regions.items():\n", | ||
" pass_region = region.startswith(\"pass\")\n", | ||
" for i, shape_var in enumerate(shape_vars):\n", | ||
" plot_params = {\n", | ||
" \"hists\": ctemplates[region],\n", | ||
" \"sig_keys\": list(sig_scale_dict.keys()),\n", | ||
" \"bg_keys\": [],\n", | ||
" \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n", | ||
" \"show\": True,\n", | ||
" \"year\": \"all\",\n", | ||
" \"ylim\": ylims[region],\n", | ||
" \"title\": f\"Pre-fit {region_label} Region\",\n", | ||
" \"name\": f\"{plot_dir}/interp_{region}_{shape_var.var}_signal_log.pdf\",\n", | ||
" \"ncol\": 2, # if region == \"passvbf\" else 1,\n", | ||
" \"ratio_ylims\": [0, 5] if region == \"passvbf\" else [0, 2],\n", | ||
" \"cmslabel\": \"Preliminary\",\n", | ||
" \"plot_data\": False,\n", | ||
" \"log\": True,\n", | ||
" }\n", | ||
"\n", | ||
" plotting.ratioHistPlot(**plot_params, data_err=True)\n", | ||
"\n", | ||
"# break\n", | ||
"# break" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "python310", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.11" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.