From fb7bb984cd986b61dc4f1362181925949335b8b1 Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Tue, 9 Jul 2024 12:12:02 -0400 Subject: [PATCH 01/16] move run_analysis.py to new histEFT, taskvine --- analysis/topeft_run2/analysis_processor.py | 37 +++-- analysis/topeft_run2/run_analysis.py | 183 +++++++++++---------- tests/{test_futures.py => test_dask.py} | 8 +- tests/test_workqueue.py | 2 +- 4 files changed, 124 insertions(+), 106 deletions(-) rename tests/{test_futures.py => test_dask.py} (92%) diff --git a/analysis/topeft_run2/analysis_processor.py b/analysis/topeft_run2/analysis_processor.py index 59a325481..7cdda4b04 100644 --- a/analysis/topeft_run2/analysis_processor.py +++ b/analysis/topeft_run2/analysis_processor.py @@ -3,7 +3,8 @@ import copy import coffea import numpy as np -import awkward as ak +import dask_awkward as dak +import dask.array as da import hist from topcoffea.modules.histEFT import HistEFT @@ -91,24 +92,30 @@ def __init__(self, samples, wc_names_lst=[], hist_lst=None, ecut_threshold=None, *info["regular"], name=name+"_sumw2", label=info["label"] + " sum of w^2" ) histograms[name] = HistEFT( - proc_axis, - chan_axis, - syst_axis, - appl_axis, - dense_axis, + category_axes=[ + proc_axis, + chan_axis, + syst_axis, + appl_axis, + ], + dense_axis=dense_axis, wc_names=wc_names_lst, - label=r"Events", - rebin=rebin + # TODO: Add to HistEFT: + # label=r"Events", + # rebin=rebin ) histograms[name+"_sumw2"] = HistEFT( - proc_axis, - chan_axis, - syst_axis, - appl_axis, - sumw2_axis, + category_axes=[ + proc_axis, + chan_axis, + syst_axis, + appl_axis, + ], + dense_axis=sumw2_axis, wc_names=wc_names_lst, - label=r"Events", - rebin=rebin + # TODO: Add to HistEFT: + # label=r"Events", + # rebin=rebin ) self._accumulator = histograms diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index 719122089..e37b394f0 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -6,18 +6,28 @@ import cloudpickle import gzip import os +from functools import partial -from coffea import processor -from coffea.nanoevents import NanoAODSchema +import dask +import ndcctools.taskvine as vine import topcoffea.modules.utils as utils import topcoffea.modules.remote_environment as remote_environment +import warnings +warnings.filterwarnings("error", module="coffea.*") +from coffea.nanoevents import NanoEventsFactory, NanoAODSchema +from coffea.dataset_tools import preprocess, filter_files + from topeft.modules.dataDrivenEstimation import DataDrivenProducer from topeft.modules.get_renormfact_envelope import get_renormfact_envelope import analysis_processor -LST_OF_KNOWN_EXECUTORS = ["futures","work_queue"] + + + + +LST_OF_KNOWN_EXECUTORS = ["dask","taskvine"] WGT_VAR_LST = [ "nSumOfWeights_ISRUp", @@ -36,7 +46,7 @@ parser = argparse.ArgumentParser(description='You can customize your run') parser.add_argument('jsonFiles' , nargs='?', default='', help = 'Json file(s) containing files and metadata') - parser.add_argument('--executor','-x' , default='work_queue', help = 'Which executor to use') + parser.add_argument('--executor','-x' , default='taskvine', help = 'Which executor to use') parser.add_argument('--prefix', '-r' , nargs='?', default='', help = 'Prefix or redirector to look for the files') parser.add_argument('--test','-t' , action='store_true' , help = 'To perform a test, run over a few events in a couple of chunks') parser.add_argument('--pretend' , action='store_true', help = 'Read json files but, not execute the analysis') @@ -89,7 +99,7 @@ if not do_np: raise Exception("Error: Cannot specify do_renormfact_envelope if we have not already done the integration across the appl axis that occurs in the data driven estimator step.") if dotest: - if executor == "futures": + if executor == "dask": nchunks = 2 chunksize = 10000 nworkers = 1 @@ -102,7 +112,7 @@ ecut_threshold = args.ecut if ecut_threshold is not None: ecut_threshold = float(args.ecut) - if executor == "work_queue": + if executor == "taskvine": # construct wq port range port = list(map(int, args.port.split('-'))) if len(port) < 1: @@ -181,9 +191,11 @@ def LoadJsonToSampleName(jsonFile, prefix): flist = {} nevts_total = 0 - for sname in samplesdict.keys(): + for sname, info in samplesdict.items(): redirector = samplesdict[sname]['redirector'] - flist[sname] = [(redirector+f) for f in samplesdict[sname]['files']] + flist[sname] = {'files': {}} + for f in info['files']: + flist[sname]['files'][f"{redirector}/{f}"] = treename samplesdict[sname]['year'] = samplesdict[sname]['year'] samplesdict[sname]['xsec'] = float(samplesdict[sname]['xsec']) samplesdict[sname]['nEvents'] = int(samplesdict[sname]['nEvents']) @@ -240,40 +252,26 @@ def LoadJsonToSampleName(jsonFile, prefix): else: print('No Wilson coefficients specified') - processor_instance = analysis_processor.AnalysisProcessor(samplesdict,wc_lst,hist_lst,ecut_threshold,do_errors,do_systs,split_lep_flavor,skip_sr,skip_cr) - if executor == "work_queue": - executor_args = { - 'master_name': '{}-workqueue-coffea'.format(os.environ['USER']), - - # find a port to run work queue in this range: - 'port': port, - - 'debug_log': 'debug.log', - 'transactions_log': 'tr.log', - 'stats_log': 'stats.log', - 'tasks_accum_log': 'tasks.log', + scheduler_opts = {} + if executor == "taskvine": + manager_args = { + "name": "{}-taskvine-coffea".format(os.environ["USER"]), + "port": port, + } - 'environment_file': remote_environment.get_environment( - extra_pip_local = {"topeft": ["topeft", "setup.py"]}, + vine_mgr = vine.DaskVine(**manager_args) + executor_args = { + "environment": remote_environment.get_environment( + extra_pip_local={"topeft": ["topeft", "setup.py"]}, ), - 'extra_input_files': ["analysis_processor.py"], - - 'retries': 5, - - # use mid-range compression for chunks results. 9 is the default for work - # queue in coffea. Valid values are 0 (minimum compression, less memory - # usage) to 16 (maximum compression, more memory usage). - 'compression': 9, - - # automatically find an adequate resource allocation for tasks. - # tasks are first tried using the maximum resources seen of previously ran - # tasks. on resource exhaustion, they are retried with the maximum resource - # values, if specified below. if a maximum is not specified, the task waits - # forever until a larger worker connects. - 'resource_monitor': True, - 'resources_mode': 'auto', - + "extra_files": { + vine_mgr.declare_file( + "analysis_processor.py", cache=True + ): "analysis_processor.py" + }, + "retries": 5, + "resources_mode": None, # this resource values may be omitted when using # resources_mode: 'auto', but they do make the initial portion # of a workflow run a little bit faster. @@ -285,64 +283,73 @@ def LoadJsonToSampleName(jsonFile, prefix): # mode will use the values specified here, so workers need to be at least # this large. If left unspecified, tasks will use whole workers in the # exploratory mode. + # "resources": { # 'cores': 1, - # 'disk': 8000, #MB - # 'memory': 10000, #MB - - # control the size of accumulation tasks. Results are - # accumulated in groups of size chunks_per_accum, keeping at - # most chunks_per_accum at the same time in memory per task. - 'chunks_per_accum': 25, - 'chunks_accum_in_mem': 2, - - # terminate workers on which tasks have been running longer than average. - # This is useful for temporary conditions on worker nodes where a task will - # be finish faster is ran in another worker. - # the time limit is computed by multipliying the average runtime of tasks - # by the value of 'fast_terminate_workers'. Since some tasks can be - # legitimately slow, no task can trigger the termination of workers twice. - # - # warning: small values (e.g. close to 1) may cause the workflow to misbehave, - # as most tasks will be terminated. - # - # Less than 1 disables it. - 'fast_terminate_workers': 0, - - # print messages when tasks are submitted, finished, etc., - # together with their resource allocation and usage. If a task - # fails, its standard output is also printed, so we can turn - # off print_stdout for all tasks. - 'verbose': True, - 'print_stdout': False, + # 'disk': 8000, # MB + # 'memory': 10000, # MB + # } } + vine_sch = partial(vine_mgr.get, **executor_args) + scheduler_opts = {"scheduler": vine_sch} + # Run the processor and get the output tstart = time.time() - - if executor == "futures": - exec_instance = processor.futures_executor(workers=nworkers) - runner = processor.Runner(exec_instance, schema=NanoAODSchema, chunksize=chunksize, maxchunks=nchunks) - elif executor == "work_queue": - executor = processor.WorkQueueExecutor(**executor_args) - runner = processor.Runner(executor, schema=NanoAODSchema, chunksize=chunksize, maxchunks=nchunks, skipbadfiles=False, xrootdtimeout=180) - - output = runner(flist, treename, processor_instance) + dataset_runable, dataset_updated = preprocess( + flist, + align_clusters=False, + step_size=chunksize, + files_per_batch=10, + skip_bad_files=False, + save_form=True, + uproot_options={"xrootdtimeout": 180}, + **scheduler_opts + ) + dataset_runable = filter_files(dataset_runable) + + # trim steps if maximum number of chunks specified: + if nchunks: + for info in dataset_runable.values(): + for fdict in info["files"].values(): + fdict["steps"] = fdict["steps"][0:nchunks] + + events = {} + for name, info in dataset_runable.items(): + events[name] = NanoEventsFactory.from_root( + dataset_runable[name]["files"], + schemaclass=NanoAODSchema, + metadata={"dataset": name}, + ).events() + + processor_instance = analysis_processor.AnalysisProcessor( + samplesdict, + wc_lst, + hist_lst, + ecut_threshold, + do_errors, + do_systs, + split_lep_flavor, + skip_sr, + skip_cr, + ) + + to_compute = {} + # for name, events_of_name in events.items(): + # to_compute[name] = processor_instance.process(events_of_name) + + (output, ) = dask.compute(to_compute, **scheduler_opts) dt = time.time() - tstart - - if executor == "work_queue": - print('Processed {} events in {} seconds ({:.2f} evts/sec).'.format(nevts_total,dt,nevts_total/dt)) - - #nbins = sum(sum(arr.size for arr in h.eval({}).values()) for h in output.values() if isinstance(h, hist.Hist)) - #nfilled = sum(sum(np.sum(arr > 0) for arr in h.eval({}).values()) for h in output.values() if isinstance(h, hist.Hist)) - #print("Filled %.0f bins, nonzero bins: %1.1f %%" % (nbins, 100*nfilled/nbins,)) - - if executor == "futures": - print("Processing time: %1.2f s with %i workers (%.2f s cpu overall)" % (dt, nworkers, dt*nworkers, )) + print( + "Processed {} events in {} seconds ({:.2f} evts/sec).".format( + nevts_total, dt, nevts_total / dt + ) + ) # Save the output - if not os.path.isdir(outpath): os.system("mkdir -p %s"%outpath) - out_pkl_file = os.path.join(outpath,outname+".pkl.gz") + if not os.path.isdir(outpath): + os.system("mkdir -p %s" % outpath) + out_pkl_file = os.path.join(outpath, outname + ".pkl.gz") print(f"\nSaving output in {out_pkl_file}...") with gzip.open(out_pkl_file, "wb") as fout: cloudpickle.dump(output, fout) diff --git a/tests/test_futures.py b/tests/test_dask.py similarity index 92% rename from tests/test_futures.py rename to tests/test_dask.py index d87a77d52..62d6232b7 100644 --- a/tests/test_futures.py +++ b/tests/test_dask.py @@ -9,12 +9,16 @@ def test_topcoffea(): "python", "analysis/topeft_run2/run_analysis.py", "-x", - "futures", + "dask", "input_samples/sample_jsons/test_samples/UL17_private_ttH_for_CI.json", "-o", "output_check_yields", "-p", - "analysis/topeft_run2/histos/" + "analysis/topeft_run2/histos/", + "--nchunks", + "1", + "--chunksize", + "1000" ] # Run TopCoffea diff --git a/tests/test_workqueue.py b/tests/test_workqueue.py index b17d7400d..85ef0c7ab 100644 --- a/tests/test_workqueue.py +++ b/tests/test_workqueue.py @@ -33,6 +33,6 @@ def test_topcoffea_wq(): # Run TopCoffea with factory: - subprocess.run(args, cwd="analysis/topeft_run2", timeout=400) + subprocess.run(args, cwd="analysis/topeft_run2", timeout=600) assert (exists('analysis/topeft_run2/histos/output_check_yields_wq.pkl.gz')) From 0ec938a4ed666d6550e9f8ac88380b1c527ac527 Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Tue, 9 Jul 2024 12:13:41 -0400 Subject: [PATCH 02/16] move analysis_processor to dask --- analysis/topeft_run2/analysis_processor.py | 105 +++++++++++---------- 1 file changed, 55 insertions(+), 50 deletions(-) diff --git a/analysis/topeft_run2/analysis_processor.py b/analysis/topeft_run2/analysis_processor.py index 7cdda4b04..7bdddb064 100644 --- a/analysis/topeft_run2/analysis_processor.py +++ b/analysis/topeft_run2/analysis_processor.py @@ -227,16 +227,17 @@ def process(self, events): # An array of lenght events that is just 1 for each event # Probably there's a better way to do this, but we use this method elsewhere so I guess why not.. - events.nom = ak.ones_like(events.MET.pt) + events.nom = da.ones_like(events.MET.pt) ele["idEmu"] = te_os.ttH_idEmu_cuts_E3(ele.hoe, ele.eta, ele.deltaEtaSC, ele.eInvMinusPInv, ele.sieie) ele["conept"] = te_os.coneptElec(ele.pt, ele.mvaTTHUL, ele.jetRelIso) mu["conept"] = te_os.coneptMuon(mu.pt, mu.mvaTTHUL, mu.jetRelIso, mu.mediumId) - ele["btagDeepFlavB"] = ak.fill_none(ele.matched_jet.btagDeepFlavB, -99) - mu["btagDeepFlavB"] = ak.fill_none(mu.matched_jet.btagDeepFlavB, -99) + + ele["btagDeepFlavB"] = dak.fill_none(ele.matched_jet.btagDeepFlavB, -99) + mu["btagDeepFlavB"] = dak.fill_none(mu.matched_jet.btagDeepFlavB, -99) if not isData: - ele["gen_pdgId"] = ak.fill_none(ele.matched_gen.pdgId, 0) - mu["gen_pdgId"] = ak.fill_none(mu.matched_gen.pdgId, 0) + ele["gen_pdgId"] = dak.fill_none(ele.matched_gen.pdgId, 0) + mu["gen_pdgId"] = dak.fill_none(mu.matched_gen.pdgId, 0) # Get the lumi mask for data if year == "2016" or year == "2016APV": @@ -253,7 +254,11 @@ def process(self, events): # Extract the EFT quadratic coefficients and optionally use them to calculate the coefficients on the w**2 quartic function # eft_coeffs is never Jagged so convert immediately to numpy for ease of use. - eft_coeffs = ak.to_numpy(events["EFTfitCoefficients"]) if hasattr(events, "EFTfitCoefficients") else None + try: + eft_coeffs = events["EFTfitCoefficients"] + except KeyError: + eft_coeffs = None + if eft_coeffs is not None: # Check to see if the ordering of WCs for this sample matches what want if self._samples[dataset]["WCnames"] != self._wc_names_lst: @@ -281,7 +286,7 @@ def process(self, events): m_loose = mu[mu.isPres & mu.isLooseM] e_loose = ele[ele.isPres & ele.isLooseE] - l_loose = ak.with_name(ak.concatenate([e_loose, m_loose], axis=1), 'PtEtaPhiMCandidate') + l_loose = da.with_name(da.concatenate([e_loose, m_loose], axis=1), 'PtEtaPhiMCandidate') ################### Tau selection #################### @@ -292,8 +297,8 @@ def process(self, events): tau["isTight"] = te_os.isVLooseTau(tau.idDeepTau2017v2p1VSjet) # use these to veto # Compute pair invariant masses, for all flavors all signes - llpairs = ak.combinations(l_loose, 2, fields=["l0","l1"]) - events["minMllAFAS"] = ak.min( (llpairs.l0+llpairs.l1).mass, axis=-1) + llpairs = da.combinations(l_loose, 2, fields=["l0","l1"]) + events["minMllAFAS"] = da.min( (llpairs.l0+llpairs.l1).mass, axis=-1) # Build FO collection m_fo = mu[mu.isPres & mu.isLooseM & mu.isFO] @@ -306,10 +311,10 @@ def process(self, events): # Attach per lepton fake rates AttachPerLeptonFR(e_fo, flavor = "Elec", year=year) AttachPerLeptonFR(m_fo, flavor = "Muon", year=year) - m_fo['convVeto'] = ak.ones_like(m_fo.charge) - m_fo['lostHits'] = ak.zeros_like(m_fo.charge) - l_fo = ak.with_name(ak.concatenate([e_fo, m_fo], axis=1), 'PtEtaPhiMCandidate') - l_fo_conept_sorted = l_fo[ak.argsort(l_fo.conept, axis=-1,ascending=False)] + m_fo['convVeto'] = da.ones_like(m_fo.charge) + m_fo['lostHits'] = da.zeros_like(m_fo.charge) + l_fo = da.with_name(da.concatenate([e_fo, m_fo], axis=1), 'PtEtaPhiMCandidate') + l_fo_conept_sorted = l_fo[da.argsort(l_fo.conept, axis=-1,ascending=False)] ######### Systematics ########### @@ -374,10 +379,10 @@ def process(self, events): #################### Jets #################### # Jet cleaning, before any jet selection - #vetos_tocleanjets = ak.with_name( ak.concatenate([tau, l_fo], axis=1), "PtEtaPhiMCandidate") - vetos_tocleanjets = ak.with_name( l_fo, "PtEtaPhiMCandidate") - tmp = ak.cartesian([ak.local_index(jets.pt), vetos_tocleanjets.jetIdx], nested=True) - cleanedJets = jets[~ak.any(tmp.slot0 == tmp.slot1, axis=-1)] # this line should go before *any selection*, otherwise lep.jetIdx is not aligned with the jet index + #vetos_tocleanjets = da.with_name( da.concatenate([tau, l_fo], axis=1), "PtEtaPhiMCandidate") + vetos_tocleanjets = da.with_name( l_fo, "PtEtaPhiMCandidate") + tmp = da.cartesian([da.local_index(jets.pt), vetos_tocleanjets.jetIdx], nested=True) + cleanedJets = jets[~da.any(tmp.slot0 == tmp.slot1, axis=-1)] # this line should go before *any selection*, otherwise lep.jetIdx is not aligned with the jet index # Selecting jets and cleaning them jetptname = "pt_nom" if hasattr(cleanedJets, "pt_nom") else "pt" @@ -386,8 +391,8 @@ def process(self, events): if not isData: cleanedJets["pt_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.pt cleanedJets["mass_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.mass - cleanedJets["pt_gen"] =ak.values_astype(ak.fill_none(cleanedJets.matched_gen.pt, 0), np.float32) - cleanedJets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, cleanedJets.pt)[0] + cleanedJets["pt_gen"] =da.values_astype(da.fill_none(cleanedJets.matched_gen.pt, 0), np.float32) + cleanedJets["rho"] = da.broadcast_arrays(events.fixedGridRhoFastjetAll, cleanedJets.pt)[0] events_cache = events.caches[0] cleanedJets = ApplyJetCorrections(year, corr_type='jets').build(cleanedJets, lazy_cache=events_cache) # SYSTEMATICS @@ -397,9 +402,9 @@ def process(self, events): goodJets = cleanedJets[cleanedJets.isGood] # Count jets - njets = ak.num(goodJets) - ht = ak.sum(goodJets.pt,axis=-1) - j0 = goodJets[ak.argmax(goodJets.pt,axis=-1,keepdims=True)] + njets = da.num(goodJets) + ht = da.sum(goodJets.pt,axis=-1) + j0 = goodJets[da.argmax(goodJets.pt,axis=-1,keepdims=True)] # Loose DeepJet WP if year == "2017": @@ -414,7 +419,7 @@ def process(self, events): raise ValueError(f"Error: Unknown year \"{year}\".") isBtagJetsLoose = (goodJets.btagDeepFlavB > btagwpl) isNotBtagJetsLoose = np.invert(isBtagJetsLoose) - nbtagsl = ak.num(goodJets[isBtagJetsLoose]) + nbtagsl = da.num(goodJets[isBtagJetsLoose]) # Medium DeepJet WP if year == "2017": @@ -429,7 +434,7 @@ def process(self, events): raise ValueError(f"Error: Unknown year \"{year}\".") isBtagJetsMedium = (goodJets.btagDeepFlavB > btagwpm) isNotBtagJetsMedium = np.invert(isBtagJetsMedium) - nbtagsm = ak.num(goodJets[isBtagJetsMedium]) + nbtagsm = da.num(goodJets[isBtagJetsMedium]) #################### Add variables into event object so that they persist #################### @@ -445,7 +450,7 @@ def process(self, events): te_es.addLepCatMasks(events) # Convenient to have l0, l1, l2 on hand - l_fo_conept_sorted_padded = ak.pad_none(l_fo_conept_sorted, 3) + l_fo_conept_sorted_padded = da.pad_none(l_fo_conept_sorted, 3) l0 = l_fo_conept_sorted_padded[:,0] l1 = l_fo_conept_sorted_padded[:,1] l2 = l_fo_conept_sorted_padded[:,2] @@ -460,9 +465,9 @@ def process(self, events): bJetSF = [GetBTagSF(goodJets, year, 'LOOSE'),GetBTagSF(goodJets, year, 'MEDIUM')] bJetEff = [GetBtagEff(goodJets, year, 'loose'),GetBtagEff(goodJets, year, 'medium')] bJetEff_data = [bJetEff[0]*bJetSF[0],bJetEff[1]*bJetSF[1]] - pMC = ak.prod(bJetEff[1] [isBtagJetsMedium], axis=-1) * ak.prod((bJetEff[0] [isBtagJetsLooseNotMedium] - bJetEff[1] [isBtagJetsLooseNotMedium]), axis=-1) * ak.prod((1-bJetEff[0] [isNotBtagJetsLoose]), axis=-1) - pMC = ak.where(pMC==0,1,pMC) # removeing zeroes from denominator... - pData = ak.prod(bJetEff_data[1] [isBtagJetsMedium], axis=-1) * ak.prod((bJetEff_data[0] [isBtagJetsLooseNotMedium] - bJetEff_data[1] [isBtagJetsLooseNotMedium]), axis=-1) * ak.prod((1-bJetEff_data[0] [isNotBtagJetsLoose]), axis=-1) + pMC = da.prod(bJetEff[1] [isBtagJetsMedium], axis=-1) * da.prod((bJetEff[0] [isBtagJetsLooseNotMedium] - bJetEff[1] [isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff[0] [isNotBtagJetsLoose]), axis=-1) + pMC = da.where(pMC==0,1,pMC) # removeing zeroes from denominator... + pData = da.prod(bJetEff_data[1] [isBtagJetsMedium], axis=-1) * da.prod((bJetEff_data[0] [isBtagJetsLooseNotMedium] - bJetEff_data[1] [isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff_data[0] [isNotBtagJetsLoose]), axis=-1) weights_obj_base_for_kinematic_syst.add("btagSF", pData/pMC) if self._do_systematics and syst_var=='nominal': @@ -471,8 +476,8 @@ def process(self, events): bJetSFDo = [GetBTagSF(goodJets, year, 'LOOSE', syst=b_syst)[1],GetBTagSF(goodJets, year, 'MEDIUM', syst=b_syst)[1]] bJetEff_dataUp = [bJetEff[0]*bJetSFUp[0],bJetEff[1]*bJetSFUp[1]] bJetEff_dataDo = [bJetEff[0]*bJetSFDo[0],bJetEff[1]*bJetSFDo[1]] - pDataUp = ak.prod(bJetEff_dataUp[1][isBtagJetsMedium], axis=-1) * ak.prod((bJetEff_dataUp[0][isBtagJetsLooseNotMedium] - bJetEff_dataUp[1][isBtagJetsLooseNotMedium]), axis=-1) * ak.prod((1-bJetEff_dataUp[0][isNotBtagJetsLoose]), axis=-1) - pDataDo = ak.prod(bJetEff_dataDo[1][isBtagJetsMedium], axis=-1) * ak.prod((bJetEff_dataDo[0][isBtagJetsLooseNotMedium] - bJetEff_dataDo[1][isBtagJetsLooseNotMedium]), axis=-1) * ak.prod((1-bJetEff_dataDo[0][isNotBtagJetsLoose]), axis=-1) + pDataUp = da.prod(bJetEff_dataUp[1][isBtagJetsMedium], axis=-1) * da.prod((bJetEff_dataUp[0][isBtagJetsLooseNotMedium] - bJetEff_dataUp[1][isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff_dataUp[0][isNotBtagJetsLoose]), axis=-1) + pDataDo = da.prod(bJetEff_dataDo[1][isBtagJetsMedium], axis=-1) * da.prod((bJetEff_dataDo[0][isBtagJetsLooseNotMedium] - bJetEff_dataDo[1][isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff_dataDo[0][isNotBtagJetsLoose]), axis=-1) weights_obj_base_for_kinematic_syst.add(f"btagSF{b_syst}", events.nom, (pDataUp/pMC)/(pData/pMC),(pDataDo/pMC)/(pData/pMC)) # Trigger SFs @@ -541,12 +546,12 @@ def process(self, events): bmask_atleast3med = (nbtagsm>=3) # Used for tttt enriched # Charge masks - chargel0_p = ak.fill_none(((l0.charge)>0),False) - chargel0_m = ak.fill_none(((l0.charge)<0),False) - charge2l_0 = ak.fill_none(((l0.charge+l1.charge)==0),False) - charge2l_1 = ak.fill_none(((l0.charge+l1.charge)!=0),False) - charge3l_p = ak.fill_none(((l0.charge+l1.charge+l2.charge)>0),False) - charge3l_m = ak.fill_none(((l0.charge+l1.charge+l2.charge)<0),False) + chargel0_p = da.fill_none(((l0.charge)>0),False) + chargel0_m = da.fill_none(((l0.charge)<0),False) + charge2l_0 = da.fill_none(((l0.charge+l1.charge)==0),False) + charge2l_1 = da.fill_none(((l0.charge+l1.charge)!=0),False) + charge3l_p = da.fill_none(((l0.charge+l1.charge+l2.charge)>0),False) + charge3l_m = da.fill_none(((l0.charge+l1.charge+l2.charge)<0),False) ######### Store boolean masks with PackedSelection ########## @@ -625,37 +630,37 @@ def process(self, events): # Calculate ptbl ptbl_bjet = goodJets[(isBtagJetsMedium | isBtagJetsLoose)] - ptbl_bjet = ptbl_bjet[ak.argmax(ptbl_bjet.pt,axis=-1,keepdims=True)] # Only save hardest b-jet + ptbl_bjet = ptbl_bjet[da.argmax(ptbl_bjet.pt,axis=-1,keepdims=True)] # Only save hardest b-jet ptbl_lep = l_fo_conept_sorted ptbl = (ptbl_bjet.nearest(ptbl_lep) + ptbl_bjet).pt - ptbl = ak.values_astype(ak.fill_none(ptbl, -1), np.float32) + ptbl = da.values_astype(da.fill_none(ptbl, -1), np.float32) # Z pt (pt of the ll pair that form the Z for the onZ categories) ptz = te_es.get_Z_pt(l_fo_conept_sorted_padded[:,0:3],10.0) # Leading (b+l) pair pt - bjetsl = goodJets[isBtagJetsLoose][ak.argsort(goodJets[isBtagJetsLoose].pt, axis=-1, ascending=False)] - bl_pairs = ak.cartesian({"b":bjetsl,"l":l_fo_conept_sorted}) + bjetsl = goodJets[isBtagJetsLoose][da.argsort(goodJets[isBtagJetsLoose].pt, axis=-1, ascending=False)] + bl_pairs = da.cartesian({"b":bjetsl,"l":l_fo_conept_sorted}) blpt = (bl_pairs["b"] + bl_pairs["l"]).pt - bl0pt = ak.flatten(blpt[ak.argmax(blpt,axis=-1,keepdims=True)]) + bl0pt = da.flatten(blpt[da.argmax(blpt,axis=-1,keepdims=True)]) # Collection of all objects (leptons and jets) - l_j_collection = ak.with_name(ak.concatenate([l_fo_conept_sorted,goodJets], axis=1),"PtEtaPhiMCollection") + l_j_collection = da.with_name(da.concatenate([l_fo_conept_sorted,goodJets], axis=1),"PtEtaPhiMCollection") # Leading object (j or l) pt - o0pt = ak.max(l_j_collection.pt,axis=-1) + o0pt = da.max(l_j_collection.pt,axis=-1) # Pairs of l+j - l_j_pairs = ak.combinations(l_j_collection,2,fields=["o0","o1"]) + l_j_pairs = da.combinations(l_j_collection,2,fields=["o0","o1"]) l_j_pairs_pt = (l_j_pairs.o0 + l_j_pairs.o1).pt l_j_pairs_mass = (l_j_pairs.o0 + l_j_pairs.o1).mass - lj0pt = ak.max(l_j_pairs_pt,axis=-1) + lj0pt = da.max(l_j_pairs_pt,axis=-1) # Define invariant mass hists mll_0_1 = (l0+l1).mass # Invmass for leading two leps # ST (but "st" is too hard to search in the code, so call it ljptsum) - ljptsum = ak.sum(l_j_collection.pt,axis=-1) + ljptsum = da.sum(l_j_collection.pt,axis=-1) if self._ecut_threshold is not None: ecut_mask = (ljptsum Date: Tue, 9 Jul 2024 12:29:36 -0400 Subject: [PATCH 03/16] Adding dask test, restoring old futures test for CI --- .github/workflows/main.yml | 4 +++ tests/test_futures.py | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tests/test_futures.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index da41c43ae..b160758ee 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -120,6 +120,10 @@ jobs: run: | conda run -n coffea-env pytest --cov=./ --cov-report=xml -rP --cov-append tests/test_futures.py + - name: Run topeft processors over test files with dask executor + run: | + conda run -n coffea-env pytest --cov=./ --cov-report=xml -rP --cov-append tests/test_dask.py + - name: Get topeft yields run: | conda run -n coffea-env pytest --cov=./ --cov-report=xml -rP --cov-append -k test_make_yields_after_processor diff --git a/tests/test_futures.py b/tests/test_futures.py new file mode 100644 index 000000000..d87a77d52 --- /dev/null +++ b/tests/test_futures.py @@ -0,0 +1,51 @@ +import subprocess +from os.path import exists +import topeft.modules.dataDrivenEstimation as dataDrivenEstimation +from topeft.modules.comp_datacard import comp_datacard + +def test_topcoffea(): + args = [ + "time", + "python", + "analysis/topeft_run2/run_analysis.py", + "-x", + "futures", + "input_samples/sample_jsons/test_samples/UL17_private_ttH_for_CI.json", + "-o", + "output_check_yields", + "-p", + "analysis/topeft_run2/histos/" + ] + + # Run TopCoffea + subprocess.run(args, check=True) + + assert (exists('analysis/topeft_run2/histos/output_check_yields.pkl.gz')) + + +def test_nonprompt(): + a=dataDrivenEstimation.DataDrivenProducer('analysis/topeft_run2/histos/output_check_yields.pkl.gz', 'analysis/topeft_run2/histos/output_check_yields_nonprompt') + a.dumpToPickle() # Do we want to write this file when testing in CI? Maybe if we ever save the CI artifacts + + assert (exists('analysis/topeft_run2/histos/output_check_yields_nonprompt.pkl.gz')) + +def test_datacardmaker(): + args = [ + "time", + "python", + "analysis/topeft_run2/make_cards.py", + "analysis/topeft_run2/histos/output_check_yields_nonprompt.pkl.gz", + "-d", + "histos", + "--var-lst", + "lj0pt", + "--do-nuisance", + "--ch-lst", + "2lss_p_4j", + "--skip-selected-wcs-check" + ] + + # Run datacard maker + subprocess.run(args, check=True) + + assert (comp_datacard('histos/ttx_multileptons-2lss_p_4j_lj0pt.txt','analysis/topeft_run2/test/ttx_multileptons-2lss_p_4j_lj0pt.txt')) From 6a373f36dd60ea4cc1f38c95b26983484805ad9a Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 12:30:32 -0400 Subject: [PATCH 04/16] Remove blank lines --- analysis/topeft_run2/run_analysis.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index e37b394f0..ef8b1a4a3 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -23,10 +23,6 @@ from topeft.modules.get_renormfact_envelope import get_renormfact_envelope import analysis_processor - - - - LST_OF_KNOWN_EXECUTORS = ["dask","taskvine"] WGT_VAR_LST = [ From 93a4bfb34abd1701729b64e244ee090a401e14d7 Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 12:33:24 -0400 Subject: [PATCH 05/16] Futures was removed --- .github/workflows/main.yml | 4 --- tests/test_futures.py | 51 -------------------------------------- 2 files changed, 55 deletions(-) delete mode 100644 tests/test_futures.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b160758ee..1db8f7c5e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -116,10 +116,6 @@ jobs: run: | conda run -n coffea-env pytest --cov=./ --cov-report=xml -rP --cov-append tests/test_make_1d_quad_plots.py - - name: Run topeft processors over test files with futures executor - run: | - conda run -n coffea-env pytest --cov=./ --cov-report=xml -rP --cov-append tests/test_futures.py - - name: Run topeft processors over test files with dask executor run: | conda run -n coffea-env pytest --cov=./ --cov-report=xml -rP --cov-append tests/test_dask.py diff --git a/tests/test_futures.py b/tests/test_futures.py deleted file mode 100644 index d87a77d52..000000000 --- a/tests/test_futures.py +++ /dev/null @@ -1,51 +0,0 @@ -import subprocess -from os.path import exists -import topeft.modules.dataDrivenEstimation as dataDrivenEstimation -from topeft.modules.comp_datacard import comp_datacard - -def test_topcoffea(): - args = [ - "time", - "python", - "analysis/topeft_run2/run_analysis.py", - "-x", - "futures", - "input_samples/sample_jsons/test_samples/UL17_private_ttH_for_CI.json", - "-o", - "output_check_yields", - "-p", - "analysis/topeft_run2/histos/" - ] - - # Run TopCoffea - subprocess.run(args, check=True) - - assert (exists('analysis/topeft_run2/histos/output_check_yields.pkl.gz')) - - -def test_nonprompt(): - a=dataDrivenEstimation.DataDrivenProducer('analysis/topeft_run2/histos/output_check_yields.pkl.gz', 'analysis/topeft_run2/histos/output_check_yields_nonprompt') - a.dumpToPickle() # Do we want to write this file when testing in CI? Maybe if we ever save the CI artifacts - - assert (exists('analysis/topeft_run2/histos/output_check_yields_nonprompt.pkl.gz')) - -def test_datacardmaker(): - args = [ - "time", - "python", - "analysis/topeft_run2/make_cards.py", - "analysis/topeft_run2/histos/output_check_yields_nonprompt.pkl.gz", - "-d", - "histos", - "--var-lst", - "lj0pt", - "--do-nuisance", - "--ch-lst", - "2lss_p_4j", - "--skip-selected-wcs-check" - ] - - # Run datacard maker - subprocess.run(args, check=True) - - assert (comp_datacard('histos/ttx_multileptons-2lss_p_4j_lj0pt.txt','analysis/topeft_run2/test/ttx_multileptons-2lss_p_4j_lj0pt.txt')) From f17064b817bdca39d10b2d65886d5dbe2a632fe3 Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 12:37:33 -0400 Subject: [PATCH 06/16] Adding dask stuff to environment yml --- environment.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environment.yml b/environment.yml index 69e5c32cf..bd432f2f7 100644 --- a/environment.yml +++ b/environment.yml @@ -11,3 +11,6 @@ dependencies: - xrootd - git - pyyaml + - dask + - dask-awkward + - dask-histogram From 0a2801e83f6c1bc02bf7c301574ca8b6f85f011c Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 12:45:23 -0400 Subject: [PATCH 07/16] Unpin coffea, use python 3.10 --- environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index bd432f2f7..7fceed680 100644 --- a/environment.yml +++ b/environment.yml @@ -2,9 +2,9 @@ name: coffea-env channels: - conda-forge dependencies: - - conda-forge::python=3.9 + - conda-forge::python=3.10 - numpy=1.23.5 - - coffea=0.7.21 + - coffea - ndcctools - conda-pack - dill From d8dba615a3b818690fb2945c68481202fb5878ea Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 12:48:15 -0400 Subject: [PATCH 08/16] Default redirector '' -> '.' Fixes CI not finding local root file --- analysis/topeft_run2/run_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index ef8b1a4a3..722ed72bb 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -43,7 +43,7 @@ parser = argparse.ArgumentParser(description='You can customize your run') parser.add_argument('jsonFiles' , nargs='?', default='', help = 'Json file(s) containing files and metadata') parser.add_argument('--executor','-x' , default='taskvine', help = 'Which executor to use') - parser.add_argument('--prefix', '-r' , nargs='?', default='', help = 'Prefix or redirector to look for the files') + parser.add_argument('--prefix', '-r' , nargs='?', default='.', help = 'Prefix or redirector to look for the files') parser.add_argument('--test','-t' , action='store_true' , help = 'To perform a test, run over a few events in a couple of chunks') parser.add_argument('--pretend' , action='store_true', help = 'Read json files but, not execute the analysis') parser.add_argument('--nworkers','-n' , default=8 , help = 'Number of workers') From a206030393c07086c5811743ae9da609f6bc9b50 Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 13:00:38 -0400 Subject: [PATCH 09/16] Use `dask_hists` branch of TopCoffea --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1db8f7c5e..7ebb8ab8b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -87,7 +87,7 @@ jobs: run: | mkdir dir_for_topcoffea cd dir_for_topcoffea - git clone https://github.com/TopEFT/topcoffea.git + git clone -b dask_hists https://github.com/TopEFT/topcoffea.git cd topcoffea conda run -n coffea-env pip install -e . cd ../.. From fc55fcab39fc021dc275e707b689987d3bca9023 Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Tue, 9 Jul 2024 13:39:25 -0400 Subject: [PATCH 10/16] uncomment actual computation --- analysis/topeft_run2/run_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index 722ed72bb..e2ff5dd37 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -330,8 +330,8 @@ def LoadJsonToSampleName(jsonFile, prefix): ) to_compute = {} - # for name, events_of_name in events.items(): - # to_compute[name] = processor_instance.process(events_of_name) + for name, events_of_name in events.items(): + to_compute[name] = processor_instance.process(events_of_name) (output, ) = dask.compute(to_compute, **scheduler_opts) From a87b45750303f783677c6033df4af3056b4f6dfe Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 13:03:00 -0400 Subject: [PATCH 11/16] Allow coffea warnings for now --- analysis/topeft_run2/run_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index e2ff5dd37..21dce3d92 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -15,7 +15,7 @@ import topcoffea.modules.remote_environment as remote_environment import warnings -warnings.filterwarnings("error", module="coffea.*") +#warnings.filterwarnings("error", module="coffea.*") from coffea.nanoevents import NanoEventsFactory, NanoAODSchema from coffea.dataset_tools import preprocess, filter_files From 275d979acc4ea1eab9181a94207c1c3a6bb6ecaf Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 16:29:40 -0400 Subject: [PATCH 12/16] Various `da` and `dak` fixes --- analysis/topeft_run2/analysis_processor.py | 147 +++++++++++---------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/analysis/topeft_run2/analysis_processor.py b/analysis/topeft_run2/analysis_processor.py index 7bdddb064..931e64ed5 100644 --- a/analysis/topeft_run2/analysis_processor.py +++ b/analysis/topeft_run2/analysis_processor.py @@ -4,6 +4,7 @@ import coffea import numpy as np import dask_awkward as dak +import awkward as ak import dask.array as da import hist @@ -227,7 +228,7 @@ def process(self, events): # An array of lenght events that is just 1 for each event # Probably there's a better way to do this, but we use this method elsewhere so I guess why not.. - events.nom = da.ones_like(events.MET.pt) + events.nom = dak.ones_like(events.MET.pt) ele["idEmu"] = te_os.ttH_idEmu_cuts_E3(ele.hoe, ele.eta, ele.deltaEtaSC, ele.eInvMinusPInv, ele.sieie) ele["conept"] = te_os.coneptElec(ele.pt, ele.mvaTTHUL, ele.jetRelIso) @@ -286,7 +287,7 @@ def process(self, events): m_loose = mu[mu.isPres & mu.isLooseM] e_loose = ele[ele.isPres & ele.isLooseE] - l_loose = da.with_name(da.concatenate([e_loose, m_loose], axis=1), 'PtEtaPhiMCandidate') + l_loose = dak.with_name(dak.concatenate([e_loose, m_loose], axis=1), 'PtEtaPhiMCandidate') ################### Tau selection #################### @@ -297,8 +298,8 @@ def process(self, events): tau["isTight"] = te_os.isVLooseTau(tau.idDeepTau2017v2p1VSjet) # use these to veto # Compute pair invariant masses, for all flavors all signes - llpairs = da.combinations(l_loose, 2, fields=["l0","l1"]) - events["minMllAFAS"] = da.min( (llpairs.l0+llpairs.l1).mass, axis=-1) + llpairs = dak.combinations(l_loose, 2, fields=["l0","l1"]) + events["minMllAFAS"] = dak.min( (llpairs.l0+llpairs.l1).mass, axis=-1) # Build FO collection m_fo = mu[mu.isPres & mu.isLooseM & mu.isFO] @@ -311,10 +312,10 @@ def process(self, events): # Attach per lepton fake rates AttachPerLeptonFR(e_fo, flavor = "Elec", year=year) AttachPerLeptonFR(m_fo, flavor = "Muon", year=year) - m_fo['convVeto'] = da.ones_like(m_fo.charge) - m_fo['lostHits'] = da.zeros_like(m_fo.charge) - l_fo = da.with_name(da.concatenate([e_fo, m_fo], axis=1), 'PtEtaPhiMCandidate') - l_fo_conept_sorted = l_fo[da.argsort(l_fo.conept, axis=-1,ascending=False)] + m_fo['convVeto'] = dak.ones_like(m_fo.charge) + m_fo['lostHits'] = dak.zeros_like(m_fo.charge) + l_fo = dak.with_name(dak.concatenate([e_fo, m_fo], axis=1), 'PtEtaPhiMCandidate') + l_fo_conept_sorted = l_fo[dak.argsort(l_fo.conept, axis=-1,ascending=False)] ######### Systematics ########### @@ -335,12 +336,14 @@ def process(self, events): # We only calculate these values if not isData # Note: add() will generally modify up/down weights, so if these are needed for any reason after this point, we should instead pass copies to add() # Note: Here we will to the weights object the SFs that do not depend on any of the forthcoming loops - weights_obj_base = coffea.analysis_tools.Weights(len(events),storeIndividual=True) + weights_obj_base = coffea.analysis_tools.Weights(None,storeIndividual=True) + #weights_obj_base = coffea.analysis_tools.Weights(len(events),storeIndividual=True) if not isData: # If this is no an eft sample, get the genWeight if eft_coeffs is None: genw = events["genWeight"] - else: genw= np.ones_like(events["event"]) + else: genw= dak.ones_like(events["event"]) + #else: genw= da.ones_like(events["event"]) # Normalize by (xsec/sow)*genw where genw is 1 for EFT samples # Note that for theory systs, will need to multiply by sow/sow_wgtUP to get (xsec/sow_wgtUp)*genw and same for Down @@ -374,15 +377,16 @@ def process(self, events): for syst_var in syst_var_list: # Make a copy of the base weights object, so that each time through the loop we do not double count systs # In this loop over systs that impact kinematics, we will add to the weights objects the SFs that depend on the object kinematics - weights_obj_base_for_kinematic_syst = copy.deepcopy(weights_obj_base) + weights_obj_base_for_kinematic_syst = copy.copy(weights_obj_base) #################### Jets #################### # Jet cleaning, before any jet selection - #vetos_tocleanjets = da.with_name( da.concatenate([tau, l_fo], axis=1), "PtEtaPhiMCandidate") - vetos_tocleanjets = da.with_name( l_fo, "PtEtaPhiMCandidate") - tmp = da.cartesian([da.local_index(jets.pt), vetos_tocleanjets.jetIdx], nested=True) - cleanedJets = jets[~da.any(tmp.slot0 == tmp.slot1, axis=-1)] # this line should go before *any selection*, otherwise lep.jetIdx is not aligned with the jet index + #vetos_tocleanjets = dak.with_name( dak.concatenate([tau, l_fo], axis=1), "PtEtaPhiMCandidate") + vetos_tocleanjets = dak.with_name( l_fo, "PtEtaPhiMCandidate") + #tmp = dak.cartesian([dak.local_index(jets.pt), vetos_tocleanjets.jetIdx], nested=True) + #cleanedJets = jets[~da.any(tmp.0 == tmp.1, axis=-1)] # this line should go before *any selection*, otherwise lep.jetIdx is not aligned with the jet index + cleanedJets = jets[te_os.isClean(jets, vetos_tocleanjets)] # this line should go before *any selection*, otherwise lep.jetIdx is not aligned with the jet index # Selecting jets and cleaning them jetptname = "pt_nom" if hasattr(cleanedJets, "pt_nom") else "pt" @@ -391,20 +395,20 @@ def process(self, events): if not isData: cleanedJets["pt_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.pt cleanedJets["mass_raw"] = (1 - cleanedJets.rawFactor)*cleanedJets.mass - cleanedJets["pt_gen"] =da.values_astype(da.fill_none(cleanedJets.matched_gen.pt, 0), np.float32) - cleanedJets["rho"] = da.broadcast_arrays(events.fixedGridRhoFastjetAll, cleanedJets.pt)[0] - events_cache = events.caches[0] - cleanedJets = ApplyJetCorrections(year, corr_type='jets').build(cleanedJets, lazy_cache=events_cache) + cleanedJets["pt_gen"] =dak.values_astype(dak.fill_none(cleanedJets.matched_gen.pt, 0), np.float32) + cleanedJets["rho"] = dak.broadcast_arrays(events.fixedGridRhoFastjetAll, cleanedJets.pt)[0] + #events_cache = events.caches[0] + cleanedJets = ApplyJetCorrections(year, corr_type='jets').build(cleanedJets)#, lazy_cache=events_cache) # SYSTEMATICS cleanedJets=ApplyJetSystematics(year,cleanedJets,syst_var) - met=ApplyJetCorrections(year, corr_type='met').build(met_raw, cleanedJets, lazy_cache=events_cache) + met=ApplyJetCorrections(year, corr_type='met').build(met_raw, cleanedJets)#, lazy_cache=events_cache) cleanedJets["isGood"] = tc_os.is_tight_jet(getattr(cleanedJets, jetptname), cleanedJets.eta, cleanedJets.jetId, pt_cut=30., eta_cut=get_te_param("eta_j_cut"), id_cut=get_te_param("jet_id_cut")) goodJets = cleanedJets[cleanedJets.isGood] # Count jets - njets = da.num(goodJets) - ht = da.sum(goodJets.pt,axis=-1) - j0 = goodJets[da.argmax(goodJets.pt,axis=-1,keepdims=True)] + njets = dak.num(goodJets) + ht = dak.sum(goodJets.pt,axis=-1) + j0 = goodJets[dak.argmax(goodJets.pt,axis=-1,keepdims=True)] # Loose DeepJet WP if year == "2017": @@ -419,7 +423,7 @@ def process(self, events): raise ValueError(f"Error: Unknown year \"{year}\".") isBtagJetsLoose = (goodJets.btagDeepFlavB > btagwpl) isNotBtagJetsLoose = np.invert(isBtagJetsLoose) - nbtagsl = da.num(goodJets[isBtagJetsLoose]) + nbtagsl = dak.num(goodJets[isBtagJetsLoose]) # Medium DeepJet WP if year == "2017": @@ -434,7 +438,7 @@ def process(self, events): raise ValueError(f"Error: Unknown year \"{year}\".") isBtagJetsMedium = (goodJets.btagDeepFlavB > btagwpm) isNotBtagJetsMedium = np.invert(isBtagJetsMedium) - nbtagsm = da.num(goodJets[isBtagJetsMedium]) + nbtagsm = dak.num(goodJets[isBtagJetsMedium]) #################### Add variables into event object so that they persist #################### @@ -450,7 +454,7 @@ def process(self, events): te_es.addLepCatMasks(events) # Convenient to have l0, l1, l2 on hand - l_fo_conept_sorted_padded = da.pad_none(l_fo_conept_sorted, 3) + l_fo_conept_sorted_padded = dak.pad_none(l_fo_conept_sorted, 3) l0 = l_fo_conept_sorted_padded[:,0] l1 = l_fo_conept_sorted_padded[:,1] l2 = l_fo_conept_sorted_padded[:,2] @@ -465,10 +469,11 @@ def process(self, events): bJetSF = [GetBTagSF(goodJets, year, 'LOOSE'),GetBTagSF(goodJets, year, 'MEDIUM')] bJetEff = [GetBtagEff(goodJets, year, 'loose'),GetBtagEff(goodJets, year, 'medium')] bJetEff_data = [bJetEff[0]*bJetSF[0],bJetEff[1]*bJetSF[1]] - pMC = da.prod(bJetEff[1] [isBtagJetsMedium], axis=-1) * da.prod((bJetEff[0] [isBtagJetsLooseNotMedium] - bJetEff[1] [isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff[0] [isNotBtagJetsLoose]), axis=-1) - pMC = da.where(pMC==0,1,pMC) # removeing zeroes from denominator... - pData = da.prod(bJetEff_data[1] [isBtagJetsMedium], axis=-1) * da.prod((bJetEff_data[0] [isBtagJetsLooseNotMedium] - bJetEff_data[1] [isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff_data[0] [isNotBtagJetsLoose]), axis=-1) - weights_obj_base_for_kinematic_syst.add("btagSF", pData/pMC) + pMC = dak.prod(bJetEff[1] [isBtagJetsMedium], axis=-1) * dak.prod((bJetEff[0] [isBtagJetsLooseNotMedium] - bJetEff[1] [isBtagJetsLooseNotMedium]), axis=-1) * dak.prod((1-bJetEff[0] [isNotBtagJetsLoose]), axis=-1) + pMC = dak.where(pMC==0,1,pMC) # removeing zeroes from denominator... + pData = dak.prod(bJetEff_data[1] [isBtagJetsMedium], axis=-1) * dak.prod((bJetEff_data[0] [isBtagJetsLooseNotMedium] - bJetEff_data[1] [isBtagJetsLooseNotMedium]), axis=-1) * dak.prod((1-bJetEff_data[0] [isNotBtagJetsLoose]), axis=-1) + print(pData,pMC, '\n\n\n') + weights_obj_base_for_kinematic_syst.add("btagSF", (pData/pMC)) if self._do_systematics and syst_var=='nominal': for b_syst in ["bc_corr","light_corr",f"bc_{year}",f"light_{year}"]: @@ -476,13 +481,13 @@ def process(self, events): bJetSFDo = [GetBTagSF(goodJets, year, 'LOOSE', syst=b_syst)[1],GetBTagSF(goodJets, year, 'MEDIUM', syst=b_syst)[1]] bJetEff_dataUp = [bJetEff[0]*bJetSFUp[0],bJetEff[1]*bJetSFUp[1]] bJetEff_dataDo = [bJetEff[0]*bJetSFDo[0],bJetEff[1]*bJetSFDo[1]] - pDataUp = da.prod(bJetEff_dataUp[1][isBtagJetsMedium], axis=-1) * da.prod((bJetEff_dataUp[0][isBtagJetsLooseNotMedium] - bJetEff_dataUp[1][isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff_dataUp[0][isNotBtagJetsLoose]), axis=-1) - pDataDo = da.prod(bJetEff_dataDo[1][isBtagJetsMedium], axis=-1) * da.prod((bJetEff_dataDo[0][isBtagJetsLooseNotMedium] - bJetEff_dataDo[1][isBtagJetsLooseNotMedium]), axis=-1) * da.prod((1-bJetEff_dataDo[0][isNotBtagJetsLoose]), axis=-1) + pDataUp = dak.prod(bJetEff_dataUp[1][isBtagJetsMedium], axis=-1) * dak.prod((bJetEff_dataUp[0][isBtagJetsLooseNotMedium] - bJetEff_dataUp[1][isBtagJetsLooseNotMedium]), axis=-1) * dak.prod((1-bJetEff_dataUp[0][isNotBtagJetsLoose]), axis=-1) + pDataDo = dak.prod(bJetEff_dataDo[1][isBtagJetsMedium], axis=-1) * dak.prod((bJetEff_dataDo[0][isBtagJetsLooseNotMedium] - bJetEff_dataDo[1][isBtagJetsLooseNotMedium]), axis=-1) * dak.prod((1-bJetEff_dataDo[0][isNotBtagJetsLoose]), axis=-1) weights_obj_base_for_kinematic_syst.add(f"btagSF{b_syst}", events.nom, (pDataUp/pMC)/(pData/pMC),(pDataDo/pMC)/(pData/pMC)) # Trigger SFs GetTriggerSF(year,events,l0,l1) - weights_obj_base_for_kinematic_syst.add(f"triggerSF_{year}", events.trigger_sf, copy.deepcopy(events.trigger_sfUp), copy.deepcopy(events.trigger_sfDown)) # In principle does not have to be in the lep cat loop + weights_obj_base_for_kinematic_syst.add(f"triggerSF_{year}", events.trigger_sf, copy.copy(events.trigger_sfUp), copy.copy(events.trigger_sfDown)) # In principle does not have to be in the lep cat loop ######### Event weights that do depend on the lep cat ########### @@ -492,19 +497,19 @@ def process(self, events): for ch_name in ["2l", "2l_4t", "3l", "4l", "2l_CR", "2l_CRflip", "3l_CR", "2los_CRtt", "2los_CRZ"]: # For both data and MC - weights_dict[ch_name] = copy.deepcopy(weights_obj_base_for_kinematic_syst) + weights_dict[ch_name] = copy.copy(weights_obj_base_for_kinematic_syst) if ch_name.startswith("2l"): - weights_dict[ch_name].add("FF", events.fakefactor_2l, copy.deepcopy(events.fakefactor_2l_up), copy.deepcopy(events.fakefactor_2l_down)) - weights_dict[ch_name].add("FFpt", events.nom, copy.deepcopy(events.fakefactor_2l_pt1/events.fakefactor_2l), copy.deepcopy(events.fakefactor_2l_pt2/events.fakefactor_2l)) - weights_dict[ch_name].add("FFeta", events.nom, copy.deepcopy(events.fakefactor_2l_be1/events.fakefactor_2l), copy.deepcopy(events.fakefactor_2l_be2/events.fakefactor_2l)) - weights_dict[ch_name].add(f"FFcloseEl_{year}", events.nom, copy.deepcopy(events.fakefactor_2l_elclosureup/events.fakefactor_2l), copy.deepcopy(events.fakefactor_2l_elclosuredown/events.fakefactor_2l)) - weights_dict[ch_name].add(f"FFcloseMu_{year}", events.nom, copy.deepcopy(events.fakefactor_2l_muclosureup/events.fakefactor_2l), copy.deepcopy(events.fakefactor_2l_muclosuredown/events.fakefactor_2l)) + weights_dict[ch_name].add("FF", events.fakefactor_2l, copy.copy(events.fakefactor_2l_up), copy.copy(events.fakefactor_2l_down)) + weights_dict[ch_name].add("FFpt", events.nom, copy.copy(events.fakefactor_2l_pt1/events.fakefactor_2l), copy.copy(events.fakefactor_2l_pt2/events.fakefactor_2l)) + weights_dict[ch_name].add("FFeta", events.nom, copy.copy(events.fakefactor_2l_be1/events.fakefactor_2l), copy.copy(events.fakefactor_2l_be2/events.fakefactor_2l)) + weights_dict[ch_name].add(f"FFcloseEl_{year}", events.nom, copy.copy(events.fakefactor_2l_elclosureup/events.fakefactor_2l), copy.copy(events.fakefactor_2l_elclosuredown/events.fakefactor_2l)) + weights_dict[ch_name].add(f"FFcloseMu_{year}", events.nom, copy.copy(events.fakefactor_2l_muclosureup/events.fakefactor_2l), copy.copy(events.fakefactor_2l_muclosuredown/events.fakefactor_2l)) elif ch_name.startswith("3l"): - weights_dict[ch_name].add("FF", events.fakefactor_3l, copy.deepcopy(events.fakefactor_3l_up), copy.deepcopy(events.fakefactor_3l_down)) - weights_dict[ch_name].add("FFpt", events.nom, copy.deepcopy(events.fakefactor_3l_pt1/events.fakefactor_3l), copy.deepcopy(events.fakefactor_3l_pt2/events.fakefactor_3l)) - weights_dict[ch_name].add("FFeta", events.nom, copy.deepcopy(events.fakefactor_3l_be1/events.fakefactor_3l), copy.deepcopy(events.fakefactor_3l_be2/events.fakefactor_3l)) - weights_dict[ch_name].add(f"FFcloseEl_{year}", events.nom, copy.deepcopy(events.fakefactor_3l_elclosureup/events.fakefactor_3l), copy.deepcopy(events.fakefactor_3l_elclosuredown/events.fakefactor_3l)) - weights_dict[ch_name].add(f"FFcloseMu_{year}", events.nom, copy.deepcopy(events.fakefactor_3l_muclosureup/events.fakefactor_3l), copy.deepcopy(events.fakefactor_3l_muclosuredown/events.fakefactor_3l)) + weights_dict[ch_name].add("FF", events.fakefactor_3l, copy.copy(events.fakefactor_3l_up), copy.copy(events.fakefactor_3l_down)) + weights_dict[ch_name].add("FFpt", events.nom, copy.copy(events.fakefactor_3l_pt1/events.fakefactor_3l), copy.copy(events.fakefactor_3l_pt2/events.fakefactor_3l)) + weights_dict[ch_name].add("FFeta", events.nom, copy.copy(events.fakefactor_3l_be1/events.fakefactor_3l), copy.copy(events.fakefactor_3l_be2/events.fakefactor_3l)) + weights_dict[ch_name].add(f"FFcloseEl_{year}", events.nom, copy.copy(events.fakefactor_3l_elclosureup/events.fakefactor_3l), copy.copy(events.fakefactor_3l_elclosuredown/events.fakefactor_3l)) + weights_dict[ch_name].add(f"FFcloseMu_{year}", events.nom, copy.copy(events.fakefactor_3l_muclosureup/events.fakefactor_3l), copy.copy(events.fakefactor_3l_muclosuredown/events.fakefactor_3l)) # For data only if isData: @@ -514,14 +519,14 @@ def process(self, events): # For MC only if not isData: if ch_name.startswith("2l"): - weights_dict[ch_name].add("lepSF_muon", events.sf_2l_muon, copy.deepcopy(events.sf_2l_hi_muon), copy.deepcopy(events.sf_2l_lo_muon)) - weights_dict[ch_name].add("lepSF_elec", events.sf_2l_elec, copy.deepcopy(events.sf_2l_hi_elec), copy.deepcopy(events.sf_2l_lo_elec)) + weights_dict[ch_name].add("lepSF_muon", events.sf_2l_muon, copy.copy(events.sf_2l_hi_muon), copy.copy(events.sf_2l_lo_muon)) + weights_dict[ch_name].add("lepSF_elec", events.sf_2l_elec, copy.copy(events.sf_2l_hi_elec), copy.copy(events.sf_2l_lo_elec)) elif ch_name.startswith("3l"): - weights_dict[ch_name].add("lepSF_muon", events.sf_3l_muon, copy.deepcopy(events.sf_3l_hi_muon), copy.deepcopy(events.sf_3l_lo_muon)) - weights_dict[ch_name].add("lepSF_elec", events.sf_3l_elec, copy.deepcopy(events.sf_3l_hi_elec), copy.deepcopy(events.sf_3l_lo_elec)) + weights_dict[ch_name].add("lepSF_muon", events.sf_3l_muon, copy.copy(events.sf_3l_hi_muon), copy.copy(events.sf_3l_lo_muon)) + weights_dict[ch_name].add("lepSF_elec", events.sf_3l_elec, copy.copy(events.sf_3l_hi_elec), copy.copy(events.sf_3l_lo_elec)) elif ch_name.startswith("4l"): - weights_dict[ch_name].add("lepSF_muon", events.sf_4l_muon, copy.deepcopy(events.sf_4l_hi_muon), copy.deepcopy(events.sf_4l_lo_muon)) - weights_dict[ch_name].add("lepSF_elec", events.sf_4l_elec, copy.deepcopy(events.sf_4l_hi_elec), copy.deepcopy(events.sf_4l_lo_elec)) + weights_dict[ch_name].add("lepSF_muon", events.sf_4l_muon, copy.copy(events.sf_4l_hi_muon), copy.copy(events.sf_4l_lo_muon)) + weights_dict[ch_name].add("lepSF_elec", events.sf_4l_elec, copy.copy(events.sf_4l_hi_elec), copy.copy(events.sf_4l_lo_elec)) else: raise Exception(f"Unknown channel name: {ch_name}") @@ -546,12 +551,12 @@ def process(self, events): bmask_atleast3med = (nbtagsm>=3) # Used for tttt enriched # Charge masks - chargel0_p = da.fill_none(((l0.charge)>0),False) - chargel0_m = da.fill_none(((l0.charge)<0),False) - charge2l_0 = da.fill_none(((l0.charge+l1.charge)==0),False) - charge2l_1 = da.fill_none(((l0.charge+l1.charge)!=0),False) - charge3l_p = da.fill_none(((l0.charge+l1.charge+l2.charge)>0),False) - charge3l_m = da.fill_none(((l0.charge+l1.charge+l2.charge)<0),False) + chargel0_p = dak.fill_none(((l0.charge)>0),False) + chargel0_m = dak.fill_none(((l0.charge)<0),False) + charge2l_0 = dak.fill_none(((l0.charge+l1.charge)==0),False) + charge2l_1 = dak.fill_none(((l0.charge+l1.charge)!=0),False) + charge3l_p = dak.fill_none(((l0.charge+l1.charge+l2.charge)>0),False) + charge3l_m = dak.fill_none(((l0.charge+l1.charge+l2.charge)<0),False) ######### Store boolean masks with PackedSelection ########## @@ -630,42 +635,42 @@ def process(self, events): # Calculate ptbl ptbl_bjet = goodJets[(isBtagJetsMedium | isBtagJetsLoose)] - ptbl_bjet = ptbl_bjet[da.argmax(ptbl_bjet.pt,axis=-1,keepdims=True)] # Only save hardest b-jet + ptbl_bjet = ptbl_bjet[dak.argmax(ptbl_bjet.pt,axis=-1,keepdims=True)] # Only save hardest b-jet ptbl_lep = l_fo_conept_sorted ptbl = (ptbl_bjet.nearest(ptbl_lep) + ptbl_bjet).pt - ptbl = da.values_astype(da.fill_none(ptbl, -1), np.float32) + ptbl = dak.values_astype(dak.fill_none(ptbl, -1), np.float32) # Z pt (pt of the ll pair that form the Z for the onZ categories) ptz = te_es.get_Z_pt(l_fo_conept_sorted_padded[:,0:3],10.0) # Leading (b+l) pair pt - bjetsl = goodJets[isBtagJetsLoose][da.argsort(goodJets[isBtagJetsLoose].pt, axis=-1, ascending=False)] - bl_pairs = da.cartesian({"b":bjetsl,"l":l_fo_conept_sorted}) + bjetsl = goodJets[isBtagJetsLoose][dak.argsort(goodJets[isBtagJetsLoose].pt, axis=-1, ascending=False)] + bl_pairs = dak.cartesian({"b":bjetsl,"l":l_fo_conept_sorted}) blpt = (bl_pairs["b"] + bl_pairs["l"]).pt - bl0pt = da.flatten(blpt[da.argmax(blpt,axis=-1,keepdims=True)]) + bl0pt = dak.flatten(blpt[dak.argmax(blpt,axis=-1,keepdims=True)]) # Collection of all objects (leptons and jets) - l_j_collection = da.with_name(da.concatenate([l_fo_conept_sorted,goodJets], axis=1),"PtEtaPhiMCollection") + l_j_collection = dak.with_name(dak.concatenate([l_fo_conept_sorted,goodJets], axis=1),"PtEtaPhiMCollection") # Leading object (j or l) pt - o0pt = da.max(l_j_collection.pt,axis=-1) + o0pt = dak.max(l_j_collection.pt,axis=-1) # Pairs of l+j - l_j_pairs = da.combinations(l_j_collection,2,fields=["o0","o1"]) + l_j_pairs = dak.combinations(l_j_collection,2,fields=["o0","o1"]) l_j_pairs_pt = (l_j_pairs.o0 + l_j_pairs.o1).pt l_j_pairs_mass = (l_j_pairs.o0 + l_j_pairs.o1).mass - lj0pt = da.max(l_j_pairs_pt,axis=-1) + lj0pt = dak.max(l_j_pairs_pt,axis=-1) # Define invariant mass hists mll_0_1 = (l0+l1).mass # Invmass for leading two leps # ST (but "st" is too hard to search in the code, so call it ljptsum) - ljptsum = da.sum(l_j_collection.pt,axis=-1) + ljptsum = dak.sum(l_j_collection.pt,axis=-1) if self._ecut_threshold is not None: ecut_mask = (ljptsum Date: Tue, 9 Jul 2024 16:31:41 -0400 Subject: [PATCH 13/16] Disable random for now, other minor fixes --- topeft/modules/corrections.py | 273 +++++++++++++++++++++++++++------- 1 file changed, 221 insertions(+), 52 deletions(-) diff --git a/topeft/modules/corrections.py b/topeft/modules/corrections.py index 10523f192..f8a7aef4a 100644 --- a/topeft/modules/corrections.py +++ b/topeft/modules/corrections.py @@ -3,12 +3,14 @@ into coffea format of corrections. ''' +#import uproot, uproot_methods +import uproot from coffea import lookup_tools +import scipy.stats from topcoffea.modules.paths import topcoffea_path from topeft.modules.paths import topeft_path import numpy as np import awkward as ak -import scipy import gzip import pickle from coffea.jetmet_tools import JECStack, CorrectedJetsFactory, CorrectedMETFactory @@ -209,7 +211,7 @@ def ApplyTES(events, Taus, isData): gen = Taus.genPartFlav whereFlag = ((pt>20) & (pt<205) & (gen==5)) - tes = np.where(whereFlag, SFevaluator['TauTES_{year}'.format(year=year)](dm,pt), 1) + tes = ak.where(whereFlag, SFevaluator['TauTES_{year}'.format(year=year)](dm,pt), 1) return (Taus.pt*tes, Taus.mass*tes) #return(Taus.pt*tes) @@ -228,23 +230,23 @@ def AttachTauSF(events, Taus, year): mass= padded_Taus.mass whereFlag = ((pt>20) & (pt<205) & (gen==5) & (padded_Taus["isLoose"]) & (~padded_Taus["isMedium"])) - real_sf_loose = np.where(whereFlag, SFevaluator['TauSF_{year}_Loose'.format(year=year)](dm,pt), 1) - real_sf_loose_up = np.where(whereFlag, SFevaluator['TauSF_{year}_Loose_up'.format(year=year)](dm,pt), 1) - real_sf_loose_down = np.where(whereFlag, SFevaluator['TauSF_{year}_Loose_down'.format(year=year)](dm,pt), 1) + real_sf_loose = ak.where(whereFlag, SFevaluator['TauSF_{year}_Loose'.format(year=year)](dm,pt), 1) + real_sf_loose_up = ak.where(whereFlag, SFevaluator['TauSF_{year}_Loose_up'.format(year=year)](dm,pt), 1) + real_sf_loose_down = ak.where(whereFlag, SFevaluator['TauSF_{year}_Loose_down'.format(year=year)](dm,pt), 1) whereFlag = ((pt>20) & (pt<205) & (gen==5) & (padded_Taus["isMedium"]) & (~padded_Taus["isTight"])) - real_sf_medium = np.where(whereFlag, SFevaluator['TauSF_{year}_Medium'.format(year=year)](dm,pt), 1) - real_sf_medium_up = np.where(whereFlag, SFevaluator['TauSF_{year}_Medium_up'.format(year=year)](dm,pt), 1) - real_sf_medium_down = np.where(whereFlag, SFevaluator['TauSF_{year}_Medium_down'.format(year=year)](dm,pt), 1) + real_sf_medium = ak.where(whereFlag, SFevaluator['TauSF_{year}_Medium'.format(year=year)](dm,pt), 1) + real_sf_medium_up = ak.where(whereFlag, SFevaluator['TauSF_{year}_Medium_up'.format(year=year)](dm,pt), 1) + real_sf_medium_down = ak.where(whereFlag, SFevaluator['TauSF_{year}_Medium_down'.format(year=year)](dm,pt), 1) whereFlag = ((pt>20) & (pt<205) & (gen==5) & (padded_Taus["isTight"])) - real_sf_tight = np.where(whereFlag, SFevaluator['TauSF_{year}_Tight'.format(year=year)](dm,pt), 1) - real_sf_tight_up = np.where(whereFlag, SFevaluator['TauSF_{year}_Tight_up'.format(year=year)](dm,pt), 1) - real_sf_tight_down = np.where(whereFlag, SFevaluator['TauSF_{year}_Tight_down'.format(year=year)](dm,pt), 1) + real_sf_tight = ak.where(whereFlag, SFevaluator['TauSF_{year}_Tight'.format(year=year)](dm,pt), 1) + real_sf_tight_up = ak.where(whereFlag, SFevaluator['TauSF_{year}_Tight_up'.format(year=year)](dm,pt), 1) + real_sf_tight_down = ak.where(whereFlag, SFevaluator['TauSF_{year}_Tight_down'.format(year=year)](dm,pt), 1) whereFlag = ((pt>20) & (pt<205) & (gen!=5) & (gen!=0) & (gen!=6)) if year == "2016APV": year = "2016" - fake_sf = np.where(whereFlag, SFevaluator['TauFake_{year}'.format(year=year)](np.abs(eta),gen), 1) + fake_sf = ak.where(whereFlag, SFevaluator['TauFake_{year}'.format(year=year)](np.abs(eta),gen), 1) whereFlag = ((pt>20) & (pt<205) & (gen!=5) & (gen!=4) & (gen!=3) & (gen!=2) & (gen!=1) & (~padded_Taus["isLoose"]) & (padded_Taus["isVLoose"])) - faker_sf = np.where(whereFlag, SFevaluator['TauFakeSF_{year}'.format(year=year)](pt), 1) + faker_sf = ak.where(whereFlag, SFevaluator['TauFakeSF_{year}'.format(year=year)](pt), 1) padded_Taus["sf_tau"] = real_sf_loose*real_sf_medium*real_sf_tight*fake_sf*faker_sf padded_Taus["sf_tau_up"] = real_sf_loose_up*real_sf_medium_up*real_sf_tight_up padded_Taus["sf_tau_down"] = real_sf_loose_down*real_sf_medium_down*real_sf_tight_down @@ -291,9 +293,11 @@ def AttachPerLeptonFR(leps, flavor, year): leps['fakefactor_%sclosureup' % flav] = leps['fakefactor'] * leps['fakefactor_%sclosurefactor' % flav] if flavor == "Elec": - leps['fliprate'] = (chargeflip_sf)*(flip_lookup(leps.pt,abs(leps.eta))) + #TODO implement + #leps['fliprate'] = (chargeflip_sf)*(flip_lookup(leps.pt,abs(leps.eta))) + leps['fliprate'] = (chargeflip_sf)*(ak.ones_like(leps.pt)) else: - leps['fliprate'] = np.zeros_like(leps.pt) + leps['fliprate'] = ak.zeros_like(leps.pt) def fakeRateWeight1l(events, lep1): for syst in ffSysts+['_elclosureup','_elclosuredown','_muclosureup','_muclosuredown']: @@ -333,8 +337,8 @@ def AttachMuonSF(muons, year): eta = np.abs(muons.eta) pt = muons.pt if year not in ['2016','2016APV','2017','2018']: raise Exception(f"Error: Unknown year \"{year}\".") - reco_sf = np.where(pt < 20,SFevaluator['MuonRecoSF_{year}'.format(year=year)](eta,pt),1) # sf=1 when pt>20 becuase there is no reco SF available - reco_err = np.where(pt < 20,SFevaluator['MuonRecoSF_{year}_er'.format(year=year)](eta,pt),0) # sf error =0 when pt>20 becuase there is no reco SF available + reco_sf = ak.where(pt < 20,SFevaluator['MuonRecoSF_{year}'.format(year=year)](eta,pt),1) # sf=1 when pt>20 becuase there is no reco SF available + reco_err = ak.where(pt < 20,SFevaluator['MuonRecoSF_{year}_er'.format(year=year)](eta,pt),0) # sf error =0 when pt>20 becuase there is no reco SF available loose_sf = SFevaluator['MuonLooseSF_{year}'.format(year=year)](eta,pt) loose_err = np.sqrt( SFevaluator['MuonLooseSF_{year}_stat'.format(year=year)](eta,pt) * SFevaluator['MuonLooseSF_{year}_stat'.format(year=year)](eta,pt) + @@ -370,12 +374,12 @@ def AttachElectronSF(electrons, year): if year not in ['2016','2016APV','2017','2018']: raise Exception(f"Error: Unknown year \"{year}\".") - reco_sf = np.where( + reco_sf = ak.where( pt < 20, SFevaluator['ElecRecoSFBe_{year}'.format(year=year)](eta,pt), SFevaluator['ElecRecoSFAb_{year}'.format(year=year)](eta,pt) ) - reco_err = np.where( + reco_err = ak.where( pt < 20, SFevaluator['ElecRecoSFBe_{year}_er'.format(year=year)](eta,pt), SFevaluator['ElecRecoSFAb_{year}_er'.format(year=year)](eta,pt) @@ -420,10 +424,10 @@ def GetMCeffFunc(year, wp='medium', flav='b'): else: hists[k] = hin[k] h = hists['jetptetaflav'] - hnum = h[{'WP': wp}] - hden = h[{'WP': 'all'}] + hnum = h.integrate('WP', wp) + hden = h.integrate('WP', 'all') getnum = lookup_tools.dense_lookup.dense_lookup( - hnum.values(flow=True)[1:,1:,1:], # Strip off underflow + hnum.values()[()], [ hnum.axes['pt'].edges, hnum.axes['abseta'].edges, @@ -431,14 +435,14 @@ def GetMCeffFunc(year, wp='medium', flav='b'): ] ) getden = lookup_tools.dense_lookup.dense_lookup( - hden.values(flow=True)[1:,1:,1:], + hden.values()[()], [ hden.axes['pt'].edges, hnum.axes['abseta'].edges, hden.axes['flav'].edges ] ) - values = hnum.values(flow=True)[1:,1:,1:] + values = hnum.values()[()] edges = [hnum.axes['pt'].edges, hnum.axes['abseta'].edges, hnum.axes['flav'].edges] fun = lambda pt, abseta, flav: getnum(pt,abseta,flav)/getden(pt,abseta,flav) return fun @@ -482,23 +486,23 @@ def GetBTagSF(jets, year, wp='MEDIUM', syst='central'): # Workaround: For UL16, use the SFs from the UL16APV for light flavor jets if (f == 0) and (year == "2016"): if f"{year}" in syst: - jets[f"btag_{syst}_up"] = np.where( + jets[f"btag_{syst}_up"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag_UL16APV.eval("up_uncorrelated",jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_up"] ) - jets[f"btag_{syst}_down"] = np.where( + jets[f"btag_{syst}_down"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag_UL16APV.eval("down_uncorrelated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_down"] ) else: - jets[f"btag_{syst}_up"] = np.where( + jets[f"btag_{syst}_up"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag_UL16APV.eval("up_correlated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_up"] ) - jets[f"btag_{syst}_down"] = np.where( + jets[f"btag_{syst}_down"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag_UL16APV.eval("down_correlated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_down"] @@ -506,29 +510,186 @@ def GetBTagSF(jets, year, wp='MEDIUM', syst='central'): # Otherwise, proceed as usual else: if f"{year}" in syst: - jets[f"btag_{syst}_up"] = np.where( + jets[f"btag_{syst}_up"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag.eval("up_uncorrelated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_up"] ) - jets[f"btag_{syst}_down"] = np.where( + jets[f"btag_{syst}_down"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag.eval("down_uncorrelated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_down"] ) else: - jets[f"btag_{syst}_up"] = np.where( + jets[f"btag_{syst}_up"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag.eval("up_correlated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_up"] ) - jets[f"btag_{syst}_down"] = np.where( + jets[f"btag_{syst}_down"] = ak.where( abs(jets.hadronFlavour) == f, SFevaluatorBtag.eval("down_correlated", jets.hadronFlavour,np.abs(jets.eta),pt,jets.btagDeepFlavB,True), jets[f"btag_{syst}_down"] ) return ([jets[f"btag_{syst}_up"],jets[f"btag_{syst}_down"]]) +###### Pileup reweighing +############################################## +## Get central PU data and MC profiles and calculate reweighting +## Using the current UL recommendations in: +## https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJSONFileforData +## - 2018: /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/PileUp/UltraLegacy/ +## - 2017: /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/PileUp/UltraLegacy/ +## - 2016: /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/PileUp/UltraLegacy/ +## +## MC histograms from: +## https://github.com/CMS-LUMI-POG/PileupTools/ + +pudirpath = topcoffea_path('data/pileup/') + +def GetDataPUname(year, var=0): + ''' Returns the name of the file to read pu observed distribution ''' + if year == '2016APV': year = '2016-preVFP' + if year == '2016': year = "2016-postVFP" + if var == 'nominal': + ppxsec = get_tc_param("pu_w") + elif var == 'up': + ppxsec = get_tc_param("pu_w_up") + elif var == 'down': + ppxsec = get_tc_param("pu_w_down") + year = str(year) + return 'PileupHistogram-goldenJSON-13tev-%s-%sub-99bins.root' % ((year), str(ppxsec)) + +MCPUfile = {'2016APV':'pileup_2016BF.root', '2016':'pileup_2016GH.root', '2017':'pileup_2017_shifts.root', '2018':'pileup_2018_shifts.root'} +def GetMCPUname(year): + ''' Returns the name of the file to read pu MC profile ''' + return MCPUfile[str(year)] + +PUfunc = {} +### Load histograms and get lookup tables (extractors are not working here...) +for year in ['2016', '2016APV', '2017', '2018']: + PUfunc[year] = {} + with uproot.open(pudirpath+GetMCPUname(year)) as fMC: + hMC = fMC['pileup'] + PUfunc[year]['MC'] = lookup_tools.dense_lookup.dense_lookup( + hMC.values() / np.sum(hMC.values()), + hMC.axis(0).edges() + ) + with uproot.open(pudirpath + GetDataPUname(year,'nominal')) as fData: + hD = fData['pileup'] + PUfunc[year]['Data'] = lookup_tools.dense_lookup.dense_lookup( + hD.values() / np.sum(hD.values()), + hD.axis(0).edges() + ) + with uproot.open(pudirpath + GetDataPUname(year,'up')) as fDataUp: + hDUp = fDataUp['pileup'] + PUfunc[year]['DataUp'] = lookup_tools.dense_lookup.dense_lookup( + hDUp.values() / np.sum(hDUp.values()), + hD.axis(0).edges() + ) + with uproot.open(pudirpath + GetDataPUname(year, 'down')) as fDataDo: + hDDo = fDataDo['pileup'] + PUfunc[year]['DataDo'] = lookup_tools.dense_lookup.dense_lookup( + hDDo.values() / np.sum(hDDo.values()), + hD.axis(0).edges() + ) + +def GetPUSF(nTrueInt, year, var='nominal'): + year = str(year) + if year not in ['2016','2016APV','2017','2018']: + raise Exception(f"Error: Unknown year \"{year}\".") + nMC = PUfunc[year]['MC'](nTrueInt+1) + data_dir = 'Data' + if var == 'up': + data_dir = 'DataUp' + elif var == 'down': + data_dir = 'DataDo' + nData = PUfunc[year][data_dir](nTrueInt) + weights = np.divide(nData,nMC) + return weights + +def AttachPSWeights(events): + ''' + Return a list of PS weights + PS weights (w_var / w_nominal) + [0] is ISR=0.5 FSR = 1 + [1] is ISR=1 FSR = 0.5 + [2] is ISR=2 FSR = 1 + [3] is ISR=1 FSR = 2 + ''' + ISR = 0 + FSR = 1 + ISRdown = 0 + FSRdown = 1 + ISRup = 2 + FSRup = 3 + if events.PSWeight is None: + raise Exception('PSWeight not found!') + # Add up variation event weights + events['ISRUp'] = events.PSWeight[:, ISRup] + events['FSRUp'] = events.PSWeight[:, FSRup] + # Add down variation event weights + events['ISRDown'] = events.PSWeight[:, ISRdown] + events['FSRDown'] = events.PSWeight[:, FSRdown] + +def AttachScaleWeights(events): + ''' + Return a list of scale weights + LHE scale variation weights (w_var / w_nominal) + Case 1: + [0] is renscfact = 0.5d0 facscfact = 0.5d0 + [1] is renscfact = 0.5d0 facscfact = 1d0 + [2] is renscfact = 0.5d0 facscfact = 2d0 + [3] is renscfact = 1d0 facscfact = 0.5d0 + [4] is renscfact = 1d0 facscfact = 1d0 + [5] is renscfact = 1d0 facscfact = 2d0 + [6] is renscfact = 2d0 facscfact = 0.5d0 + [7] is renscfact = 2d0 facscfact = 1d0 + [8] is renscfact = 2d0 facscfact = 2d0 + Case 2: + [0] is MUF = "0.5" MUR = "0.5" + [1] is MUF = "1.0" MUR = "0.5" + [2] is MUF = "2.0" MUR = "0.5" + [3] is MUF = "0.5" MUR = "1.0" + [4] is MUF = "2.0" MUR = "1.0" + [5] is MUF = "0.5" MUR = "2.0" + [6] is MUF = "1.0" MUR = "2.0" + [7] is MUF = "2.0" MUR = "2.0" + ''' + # Determine if we are in case 1 or case 2 by checking if we have 8 or 9 weights + len_of_wgts = ak.count(events.LHEScaleWeight,axis=-1) + all_len_9_or_0_bool = ak.all((len_of_wgts==9) | (len_of_wgts==0)) + all_len_8_or_0_bool = ak.all((len_of_wgts==8) | (len_of_wgts==0)) + if all_len_9_or_0_bool: + scale_weights = ak.fill_none(ak.pad_none(events.LHEScaleWeight, 9), 1) # FIXME this is a bandaid until we understand _why_ some are empty + renormDown_factDown = 0 + renormDown = 1 + renormDown_factUp = 2 + factDown = 3 + nominal = 4 + factUp = 5 + renormUp_factDown = 6 + renormUp = 7 + renormUp_factUp = 8 + elif all_len_8_or_0_bool: + scale_weights = ak.fill_none(ak.pad_none(events.LHEScaleWeight, 8), 1) # FIXME this is a bandaid until we understand _why_ some are empty + renormDown_factDown = 0 + renormDown = 1 + renormDown_factUp = 2 + factDown = 3 + factUp = 4 + renormUp_factDown = 5 + renormUp = 6 + renormUp_factUp = 7 + else: + raise Exception("Unknown weight type") + # Get the weights from the event + events['renormfactDown'] = scale_weights[:,renormDown_factDown] + events['renormDown'] = scale_weights[:,renormDown] + events['factDown'] = scale_weights[:,factDown] + events['factUp'] = scale_weights[:,factUp] + events['renormUp'] = scale_weights[:,renormUp] + events['renormfactUp'] = scale_weights[:,renormUp_factUp] def AttachPdfWeights(events): ''' @@ -624,7 +785,7 @@ def ApplyJetSystematics(year,cleanedJets,syst_var): qmask = np.array(ak.flatten(qmask)) gmask = abs(cleanedJets.partonFlavour)==21 gmask = np.array(ak.flatten(gmask)) - corrections = np.array(np.zeros_like(ak.flatten(cleanedJets.JES_FlavorQCD.up.pt))) + corrections = np.array(ak.zeros_like(ak.flatten(cleanedJets.JES_FlavorQCD.up.pt))) if 'Up' in syst_var: corrections[bmask] = corrections[bmask] + np.array(ak.flatten(cleanedJets.JES_FlavorQCD.up.pt))[bmask] corrections[cmask] = corrections[cmask] + np.array(ak.flatten(cleanedJets.JES_FlavorQCD.up.pt))[cmask] @@ -665,9 +826,12 @@ def ApplyRochesterCorrections(year, mu, is_data): rochester = rochester_lookup.rochester_lookup(rochester_data) if not is_data: hasgen = ~np.isnan(ak.fill_none(mu.matched_gen.pt, np.nan)) - mc_rand = np.random.rand(*ak.to_numpy(ak.flatten(mu.pt)).shape) - mc_rand = ak.unflatten(mc_rand, ak.num(mu.pt, axis=1)) - corrections = np.array(ak.flatten(ak.ones_like(mu.pt))) + #FIXME randomize with dask(-awkward) + #mc_rand = np.random.rand(*ak.to_numpy(ak.flatten(mu.pt)).shape) + #mc_rand = ak.unflatten(mc_rand, ak.num(mu.pt, axis=1)) + mc_rand = ak.ones_like(mu.pt) + #corrections = np.array(ak.flatten(ak.ones_like(mu.pt))) + corrections = ak.ones_like(mu.pt) mc_kspread = rochester.kSpreadMC( mu.charge[hasgen],mu.pt[hasgen], mu.eta[hasgen], @@ -682,10 +846,10 @@ def ApplyRochesterCorrections(year, mu, is_data): mu.nTrackerLayers[~hasgen], mc_rand[~hasgen] ) - hasgen_flat = np.array(ak.flatten(hasgen)) - corrections[hasgen_flat] = np.array(ak.flatten(mc_kspread)) - corrections[~hasgen_flat] = np.array(ak.flatten(mc_ksmear)) - corrections = ak.unflatten(corrections, ak.num(mu.pt, axis=1)) + #hasgen_flat = np.array(ak.flatten(hasgen)) + #corrections[hasgen_flat] = np.array(ak.flatten(mc_kspread)) + #corrections[~hasgen_flat] = np.array(ak.flatten(mc_ksmear)) + #corrections = ak.unflatten(corrections, ak.num(mu.pt, axis=1)) else: corrections = rochester.kScaleDT(mu.charge, mu.pt, mu.eta, mu.phi) return (mu.pt * corrections) @@ -724,8 +888,8 @@ def clopper_pearson_interval(num, denom, coverage=_coverage1sd): def GetClopperPearsonInterval(hnum, hden): ''' Compute Clopper-Pearson interval from numerator and denominator histograms ''' - num = list(hnum.values(flow=True)[()]) - den = list(hden.values(flow=True)[()]) + num = list(hnum.values()[()]) + den = list(hden.values()[()]) if isinstance(num, list) and isinstance(num[0], np.ndarray): for i in range(len(num)): num[i] = np.array(StackOverUnderflow(list(num[i])), dtype=float) @@ -739,7 +903,7 @@ def GetClopperPearsonInterval(hnum, hden): den = np.array(den) num[num>den] = den[num > den] down, up = clopper_pearson_interval(num, den) - ratio = np.array(num, dtype=float) / den + ratio = ak.Array(num) / den return [ratio, down, up] def GetEff(num, den): @@ -790,19 +954,24 @@ def GetTriggerSF(year, events, lep0, lep1): ls = [] for syst in [0,1]: #2l - SF_ee = np.where((events.is2l & events.is_ee), LoadTriggerSF(year,ch='2l',flav='ee')[syst](lep0.pt,lep1.pt), 1.0) - SF_em = np.where((events.is2l & events.is_em), LoadTriggerSF(year,ch='2l',flav='em')[syst](lep0.pt,lep1.pt), 1.0) - SF_mm = np.where((events.is2l & events.is_mm), LoadTriggerSF(year,ch='2l',flav='mm')[syst](lep0.pt,lep1.pt), 1.0) + ''' + #TODO implement this properly + SF_ee = ak.where((events.is2l & events.is_ee), LoadTriggerSF(year,ch='2l',flav='ee')[syst](lep0.pt,lep1.pt), 1.0) + SF_em = ak.where((events.is2l & events.is_em), LoadTriggerSF(year,ch='2l',flav='em')[syst](lep0.pt,lep1.pt), 1.0) + SF_mm = ak.where((events.is2l & events.is_mm), LoadTriggerSF(year,ch='2l',flav='mm')[syst](lep0.pt,lep1.pt), 1.0) + ''' #3l ''' - SF_eee=np.where((events.is3l & events.is_eee),LoadTriggerSF(year,ch='3l',flav='eee')[syst](lep0.pt,lep0.eta),1.0) - SF_eem=np.where((events.is3l & events.is_eem),LoadTriggerSF(year,ch='3l',flav='eem')[syst](lep0.pt,lep0.eta),1.0) - SF_emm=np.where((events.is3l & events.is_emm),LoadTriggerSF(year,ch='3l',flav='emm')[syst](lep0.pt,lep0.eta),1.0) - SF_mmm=np.where((events.is3l & events.is_mmm),LoadTriggerSF(year,ch='3l',flav='mmm')[syst](lep0.pt,lep0.eta),1.0) + SF_eee=ak.where((events.is3l & events.is_eee),LoadTriggerSF(year,ch='3l',flav='eee')[syst](lep0.pt,lep0.eta),1.0) + SF_eem=ak.where((events.is3l & events.is_eem),LoadTriggerSF(year,ch='3l',flav='eem')[syst](lep0.pt,lep0.eta),1.0) + SF_emm=ak.where((events.is3l & events.is_emm),LoadTriggerSF(year,ch='3l',flav='emm')[syst](lep0.pt,lep0.eta),1.0) + SF_mmm=ak.where((events.is3l & events.is_mmm),LoadTriggerSF(year,ch='3l',flav='mmm')[syst](lep0.pt,lep0.eta),1.0) ls.append(SF_ee*SF_em*SF_mm*SF_eee*SF_eem*SF_emm*SF_mmm) ''' - ls.append(SF_ee * SF_em * SF_mm) - ls[1] = np.where(ls[1] == 1.0, 0.0, ls[1]) # stat unc. down + #ls.append(SF_ee * SF_em * SF_mm) + ls.append(1) # TODO restore line above + #ls[1] = ak.where(ls[1] == 1.0, 0.0, ls[1]) # stat unc. down + ls[1] = 0.0 if ls[1] == 1.0 else ls[1] # stat unc. down events['trigger_sf'] = ls[0] # nominal events['trigger_sfDown'] = ls[0] - np.sqrt(ls[1] * ls[1] + ls[0]*0.02*ls[0]*0.02) events['trigger_sfUp'] = ls[0] + np.sqrt(ls[1] * ls[1] + ls[0]*0.02*ls[0]*0.02) From 5ed215f9750302dd702788abd514c526bc7d3a35 Mon Sep 17 00:00:00 2001 From: "Brent R. Yates" Date: Tue, 9 Jul 2024 16:39:56 -0400 Subject: [PATCH 14/16] Fix lint --- analysis/topeft_run2/analysis_processor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/analysis/topeft_run2/analysis_processor.py b/analysis/topeft_run2/analysis_processor.py index 931e64ed5..9f0ce43e6 100644 --- a/analysis/topeft_run2/analysis_processor.py +++ b/analysis/topeft_run2/analysis_processor.py @@ -4,8 +4,6 @@ import coffea import numpy as np import dask_awkward as dak -import awkward as ak -import dask.array as da import hist from topcoffea.modules.histEFT import HistEFT From 680f239198fbd74041355bb4215af674d2e7171f Mon Sep 17 00:00:00 2001 From: "Brent R. Yates" Date: Tue, 9 Jul 2024 16:41:59 -0400 Subject: [PATCH 15/16] Another lint change --- analysis/topeft_run2/run_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/topeft_run2/run_analysis.py b/analysis/topeft_run2/run_analysis.py index 21dce3d92..8ed393a24 100644 --- a/analysis/topeft_run2/run_analysis.py +++ b/analysis/topeft_run2/run_analysis.py @@ -14,7 +14,7 @@ import topcoffea.modules.utils as utils import topcoffea.modules.remote_environment as remote_environment -import warnings +#import warnings #warnings.filterwarnings("error", module="coffea.*") from coffea.nanoevents import NanoEventsFactory, NanoAODSchema from coffea.dataset_tools import preprocess, filter_files From ea2e8c2dd751c5565cd5fd32b323324b2d57d1fd Mon Sep 17 00:00:00 2001 From: Brent Yates Date: Tue, 9 Jul 2024 17:58:39 -0400 Subject: [PATCH 16/16] Remove print --- analysis/topeft_run2/analysis_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/analysis/topeft_run2/analysis_processor.py b/analysis/topeft_run2/analysis_processor.py index 9f0ce43e6..3ae4fe2e3 100644 --- a/analysis/topeft_run2/analysis_processor.py +++ b/analysis/topeft_run2/analysis_processor.py @@ -470,7 +470,6 @@ def process(self, events): pMC = dak.prod(bJetEff[1] [isBtagJetsMedium], axis=-1) * dak.prod((bJetEff[0] [isBtagJetsLooseNotMedium] - bJetEff[1] [isBtagJetsLooseNotMedium]), axis=-1) * dak.prod((1-bJetEff[0] [isNotBtagJetsLoose]), axis=-1) pMC = dak.where(pMC==0,1,pMC) # removeing zeroes from denominator... pData = dak.prod(bJetEff_data[1] [isBtagJetsMedium], axis=-1) * dak.prod((bJetEff_data[0] [isBtagJetsLooseNotMedium] - bJetEff_data[1] [isBtagJetsLooseNotMedium]), axis=-1) * dak.prod((1-bJetEff_data[0] [isNotBtagJetsLoose]), axis=-1) - print(pData,pMC, '\n\n\n') weights_obj_base_for_kinematic_syst.add("btagSF", (pData/pMC)) if self._do_systematics and syst_var=='nominal':