Skip to content

Commit 18d26b4

Browse files
authored
Merge pull request cms-sw#36841 from jpata/mlpf_v2_CMSSW_12_1_0_pre3_updated
Updated MLPF producer with ONNX
2 parents d1ea868 + 539f744 commit 18d26b4

File tree

11 files changed

+365
-148
lines changed

11 files changed

+365
-148
lines changed

Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def setup_(self, step, stepName, stepDict, k, properties):
470470
if 'Reco' in step:
471471
stepDict[stepName][k] = merge([self.step3, stepDict[step][k]])
472472
def condition(self, fragment, stepList, key, hasHarvest):
473-
return fragment=="TTbar_14TeV" and '2021' in key
473+
return (fragment=="TTbar_14TeV" or fragment=="QCD_FlatPt_15_3000HS_14") and '2021PU' in key
474474

475475
upgradeWFs['mlpf'] = UpgradeWorkflow_mlpf(
476476
steps = [

RecoParticleFlow/Configuration/python/RecoParticleFlow_EventContent_cff.py

-5
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,3 @@
8181
'keep recoPFRecHits_particleFlowRecHitHGC__*',
8282
'keep *_simPFProducer_*_*'])
8383

84-
from Configuration.ProcessModifiers.mlpf_cff import mlpf
85-
from RecoParticleFlow.PFProducer.mlpf_EventContent_cff import MLPF_RECO
86-
87-
mlpf.toModify(RecoParticleFlowRECO,
88-
outputCommands = RecoParticleFlowRECO.outputCommands + MLPF_RECO.outputCommands)

RecoParticleFlow/Configuration/python/RecoParticleFlow_cff.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,11 @@
102102
e.toModify(pfNoPileUp, enable = False)
103103
e.toModify(pfPileUp, enable = False)
104104

105-
#
106-
# for MLPF
105+
106+
# for MLPF, replace standard PFAlgo with the ONNX-based MLPF producer
107107
from Configuration.ProcessModifiers.mlpf_cff import mlpf
108108
from RecoParticleFlow.PFProducer.mlpfProducer_cfi import mlpfProducer
109-
110-
_mlpfTask = cms.Task(mlpfProducer, particleFlowRecoTask.copy())
111-
112-
mlpf.toReplaceWith(particleFlowRecoTask, _mlpfTask)
109+
mlpf.toReplaceWith(particleFlowTmp, mlpfProducer)
113110

114111
#
115112
# switch from pfTICL to simPF

RecoParticleFlow/PFProducer/BuildFile.xml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
<use name="boost"/>
1717
<use name="clhep"/>
1818
<use name="rootmath"/>
19+
<use name="onnxruntime"/>
1920
<export>
2021
<lib name="1"/>
2122
</export>

RecoParticleFlow/PFProducer/interface/MLPFModel.h

+34-19
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,40 @@
77

88
namespace reco::mlpf {
99
//The model takes the following number of features for each input PFElement
10-
static constexpr unsigned int NUM_ELEMENT_FEATURES = 15;
10+
static constexpr unsigned int NUM_ELEMENT_FEATURES = 25;
11+
static constexpr unsigned int NUM_OUTPUT_FEATURES = 14;
1112

1213
//these are defined at model creation time and set the random LSH codebook size
13-
static constexpr int NUM_MAX_ELEMENTS_BATCH = 20000;
14-
static constexpr int LSH_BIN_SIZE = 100;
14+
static constexpr int LSH_BIN_SIZE = 64;
15+
static constexpr int NUM_MAX_ELEMENTS_BATCH = 200 * LSH_BIN_SIZE;
1516

16-
//In CPU mode, we only want to evaluate each event separately
17+
//In CPU mode, we want to evaluate each event separately
1718
static constexpr int BATCH_SIZE = 1;
1819

19-
//The model has 12 outputs for each particle:
20-
// out[0-7]: particle classification logits
21-
// out[8]: regressed eta
22-
// out[9]: regressed phi
23-
// out[10]: regressed energy
24-
// out[11]: regressed charge logit
25-
static constexpr unsigned int NUM_OUTPUTS = 12;
26-
static constexpr unsigned int NUM_CLASS = 7;
27-
static constexpr unsigned int IDX_ETA = 8;
28-
static constexpr unsigned int IDX_PHI = 9;
29-
static constexpr unsigned int IDX_ENERGY = 10;
30-
static constexpr unsigned int IDX_CHARGE = 11;
20+
//The model has 14 outputs for each particle:
21+
// out[0-7]: particle classification logits for each pdgId
22+
// out[8]: regressed charge
23+
// out[9]: regressed pt
24+
// out[10]: regressed eta
25+
// out[11]: regressed sin phi
26+
// out[12]: regressed cos phi
27+
// out[13]: regressed energy
28+
static constexpr unsigned int IDX_CLASS = 7;
29+
30+
static constexpr unsigned int IDX_CHARGE = 8;
31+
32+
static constexpr unsigned int IDX_PT = 9;
33+
static constexpr unsigned int IDX_ETA = 10;
34+
static constexpr unsigned int IDX_SIN_PHI = 11;
35+
static constexpr unsigned int IDX_COS_PHI = 12;
36+
static constexpr unsigned int IDX_ENERGY = 13;
37+
38+
//for consistency with the baseline PFAlgo
39+
static constexpr float PI_MASS = 0.13957;
3140

3241
//index [0, N_pdgids) -> PDGID
3342
//this maps the absolute values of the predicted PDGIDs to an array of ascending indices
34-
static const std::vector<int> pdgid_encoding = {0, 1, 2, 11, 13, 22, 130, 211};
43+
static const std::vector<int> pdgid_encoding = {0, 211, 130, 1, 2, 22, 11, 13};
3544

3645
//PFElement::type -> index [0, N_types)
3746
//this maps the type of the PFElement to an ascending index that is used by the model to distinguish between different elements
@@ -55,7 +64,13 @@ namespace reco::mlpf {
5564

5665
int argMax(std::vector<float> const& vec);
5766

58-
reco::PFCandidate makeCandidate(int pred_pid, int pred_charge, float pred_e, float pred_eta, float pred_phi);
67+
reco::PFCandidate makeCandidate(int pred_pid,
68+
int pred_charge,
69+
float pred_pt,
70+
float pred_eta,
71+
float pred_sin_phi,
72+
float pred_cos_phi,
73+
float pred_e);
5974

6075
const std::vector<const reco::PFBlockElement*> getPFElements(const reco::PFBlockCollection& blocks);
6176

@@ -64,4 +79,4 @@ namespace reco::mlpf {
6479
size_t ielem_originator);
6580
}; // namespace reco::mlpf
6681

67-
#endif
82+
#endif

RecoParticleFlow/PFProducer/plugins/BuildFile.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,6 @@
8080
<use name="RecoParticleFlow/PFProducer"/>
8181
<use name="RecoParticleFlow/PFTracking"/>
8282
<use name="RecoEcal/EgammaCoreTools"/>
83-
<use name="PhysicsTools/TensorFlow"/>
83+
<use name="PhysicsTools/ONNXRuntime"/>
8484
<flags EDM_PLUGIN="1"/>
8585
</library>

RecoParticleFlow/PFProducer/plugins/MLPFProducer.cc

+107-67
Original file line numberDiff line numberDiff line change
@@ -4,135 +4,175 @@
44
#include "FWCore/Framework/interface/MakerMacros.h"
55

66
#include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h"
7-
#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
7+
#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
88
#include "RecoParticleFlow/PFProducer/interface/MLPFModel.h"
99

10-
struct MLPFCache {
11-
const tensorflow::GraphDef* graph_def;
12-
};
10+
#include "DataFormats/ParticleFlowReco/interface/PFBlockElementTrack.h"
11+
12+
using namespace cms::Ort;
13+
14+
//use this to switch on detailed print statements in MLPF
15+
//#define MLPF_DEBUG
1316

14-
class MLPFProducer : public edm::stream::EDProducer<edm::GlobalCache<MLPFCache> > {
17+
class MLPFProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
1518
public:
16-
explicit MLPFProducer(const edm::ParameterSet&, const MLPFCache*);
19+
explicit MLPFProducer(const edm::ParameterSet&, const ONNXRuntime*);
20+
1721
void produce(edm::Event& event, const edm::EventSetup& setup) override;
1822
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
1923

2024
// static methods for handling the global cache
21-
static std::unique_ptr<MLPFCache> initializeGlobalCache(const edm::ParameterSet&);
22-
static void globalEndJob(MLPFCache*);
25+
static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
26+
static void globalEndJob(const ONNXRuntime*);
2327

2428
private:
2529
const edm::EDPutTokenT<reco::PFCandidateCollection> pfCandidatesPutToken_;
2630
const edm::EDGetTokenT<reco::PFBlockCollection> inputTagBlocks_;
27-
const std::string model_path_;
28-
tensorflow::Session* session_;
2931
};
3032

31-
MLPFProducer::MLPFProducer(const edm::ParameterSet& cfg, const MLPFCache* cache)
33+
MLPFProducer::MLPFProducer(const edm::ParameterSet& cfg, const ONNXRuntime* cache)
3234
: pfCandidatesPutToken_{produces<reco::PFCandidateCollection>()},
33-
inputTagBlocks_(consumes<reco::PFBlockCollection>(cfg.getParameter<edm::InputTag>("src"))),
34-
model_path_(cfg.getParameter<std::string>("model_path")) {
35-
session_ = tensorflow::createSession(cache->graph_def);
36-
}
35+
inputTagBlocks_(consumes<reco::PFBlockCollection>(cfg.getParameter<edm::InputTag>("src"))) {}
3736

3837
void MLPFProducer::produce(edm::Event& event, const edm::EventSetup& setup) {
3938
using namespace reco::mlpf;
4039

4140
const auto& blocks = event.get(inputTagBlocks_);
4241
const auto& all_elements = getPFElements(blocks);
4342

44-
const long long int num_elements_total = all_elements.size();
43+
std::vector<const reco::PFBlockElement*> selected_elements;
44+
unsigned int num_elements_total = 0;
45+
for (const auto* pelem : all_elements) {
46+
if (pelem->type() == reco::PFBlockElement::PS1 || pelem->type() == reco::PFBlockElement::PS2) {
47+
continue;
48+
}
49+
num_elements_total += 1;
50+
selected_elements.push_back(pelem);
51+
}
52+
const auto tensor_size = LSH_BIN_SIZE * std::max(2u, (num_elements_total / LSH_BIN_SIZE + 1));
4553

54+
#ifdef MLPF_DEBUG
55+
assert(num_elements_total < NUM_MAX_ELEMENTS_BATCH);
4656
//tensor size must be a multiple of the bin size and larger than the number of elements
47-
const auto tensor_size = LSH_BIN_SIZE * (num_elements_total / LSH_BIN_SIZE + 1);
4857
assert(tensor_size <= NUM_MAX_ELEMENTS_BATCH);
58+
assert(tensor_size % LSH_BIN_SIZE == 0);
59+
#endif
4960

50-
//Create the input tensor
51-
tensorflow::TensorShape shape({BATCH_SIZE, tensor_size, NUM_ELEMENT_FEATURES});
52-
tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
53-
input.flat<float>().setZero();
61+
#ifdef MLPF_DEBUG
62+
std::cout << "tensor_size=" << tensor_size << std::endl;
63+
#endif
5464

55-
//Fill the input tensor
65+
//Fill the input tensor (batch, elems, features) = (1, tensor_size, NUM_ELEMENT_FEATURES)
66+
std::vector<std::vector<float>> inputs(1, std::vector<float>(NUM_ELEMENT_FEATURES * tensor_size, 0.0));
5667
unsigned int ielem = 0;
57-
for (const auto* pelem : all_elements) {
68+
for (const auto* pelem : selected_elements) {
69+
if (ielem > tensor_size) {
70+
continue;
71+
}
72+
5873
const auto& elem = *pelem;
5974

6075
//prepare the input array from the PFElement
6176
const auto& props = getElementProperties(elem);
6277

6378
//copy features to the input array
6479
for (unsigned int iprop = 0; iprop < NUM_ELEMENT_FEATURES; iprop++) {
65-
input.tensor<float, 3>()(0, ielem, iprop) = normalize(props[iprop]);
80+
inputs[0][ielem * NUM_ELEMENT_FEATURES + iprop] = normalize(props[iprop]);
6681
}
6782
ielem += 1;
6883
}
6984

70-
//TF model input and output tensor names
71-
const tensorflow::NamedTensorList input_list = {{"x:0", input}};
72-
const std::vector<std::string> output_names = {"Identity:0"};
73-
74-
//Prepare the output tensor
75-
std::vector<tensorflow::Tensor> outputs;
76-
7785
//run the GNN inference, given the inputs and the output.
78-
//Note that the GNN enables information transfer between the input PFElements,
79-
//such that the output ML-PFCandidates are in general combinations of the input PFElements, in the form of
80-
//y_out = Adj.x_in, where x_in is input matrix (num_elem, NUM_ELEMENT_FEATURES), y_out is the output matrix (num_elem, NUM_OUTPUT_FEATURES)
81-
//and Adj is an adjacency matrix between the elements that is constructed on the fly during model inference.
82-
tensorflow::run(session_, input_list, output_names, &outputs);
83-
84-
//process the output tensor to ML-PFCandidates.
85-
//The output can contain up to num_elem particles, with predicted PDGID=0 corresponding to no particles predicted.
86-
const auto out_arr = outputs[0].tensor<float, 3>();
86+
const auto& outputs = globalCache()->run({"x:0"}, inputs, {{1, tensor_size, NUM_ELEMENT_FEATURES}});
87+
const auto& output = outputs[0];
88+
#ifdef MLPF_DEBUG
89+
assert(output.size() == tensor_size * NUM_OUTPUT_FEATURES);
90+
#endif
8791

8892
std::vector<reco::PFCandidate> pOutputCandidateCollection;
89-
for (unsigned int ielem = 0; ielem < all_elements.size(); ielem++) {
90-
//get the coefficients in the output corresponding to the class probabilities (raw logits)
91-
std::vector<float> pred_id_logits;
92-
for (unsigned int idx_id = 0; idx_id <= NUM_CLASS; idx_id++) {
93-
pred_id_logits.push_back(out_arr(0, ielem, idx_id));
93+
for (size_t ielem = 0; ielem < num_elements_total; ielem++) {
94+
std::vector<float> pred_id_probas(IDX_CLASS + 1, 0.0);
95+
const reco::PFBlockElement* elem = selected_elements[ielem];
96+
97+
for (unsigned int idx_id = 0; idx_id <= IDX_CLASS; idx_id++) {
98+
auto pred_proba = output[ielem * NUM_OUTPUT_FEATURES + idx_id];
99+
#ifdef MLPF_DEBUG
100+
assert(!std::isnan(pred_proba));
101+
#endif
102+
pred_id_probas[idx_id] = pred_proba;
94103
}
95104

105+
auto imax = argMax(pred_id_probas);
106+
96107
//get the most probable class PDGID
97-
int pred_pid = pdgid_encoding[argMax(pred_id_logits)];
108+
int pred_pid = pdgid_encoding[imax];
98109

99-
//get the predicted momentum components
100-
float pred_eta = out_arr(0, ielem, IDX_ETA);
101-
float pred_phi = out_arr(0, ielem, IDX_PHI);
102-
float pred_charge = out_arr(0, ielem, IDX_CHARGE);
103-
float pred_e = out_arr(0, ielem, IDX_ENERGY);
110+
#ifdef MLPF_DEBUG
111+
std::cout << "ielem=" << ielem << " inputs:";
112+
for (unsigned int iprop = 0; iprop < NUM_ELEMENT_FEATURES; iprop++) {
113+
std::cout << iprop << "=" << inputs[0][ielem * NUM_ELEMENT_FEATURES + iprop] << " ";
114+
}
115+
std::cout << std::endl;
116+
std::cout << "ielem=" << ielem << " pred: pid=" << pred_pid << std::endl;
117+
#endif
104118

105119
//a particle was predicted for this PFElement, otherwise it was a spectator
106120
if (pred_pid != 0) {
107-
auto cand = makeCandidate(pred_pid, pred_charge, pred_e, pred_eta, pred_phi);
108-
setCandidateRefs(cand, all_elements, ielem);
121+
//muons and charged hadrons should only come from tracks, otherwise we won't have track references to pass downstream
122+
if (((pred_pid == 13) || (pred_pid == 211)) && elem->type() != reco::PFBlockElement::TRACK) {
123+
pred_pid = 130;
124+
}
125+
126+
if (elem->type() == reco::PFBlockElement::TRACK) {
127+
const auto* eltTrack = dynamic_cast<const reco::PFBlockElementTrack*>(elem);
128+
129+
//a track with no muon ref should not produce a muon candidate, instead we interpret it as a charged hadron
130+
if (pred_pid == 13 && eltTrack->muonRef().isNull()) {
131+
pred_pid = 211;
132+
}
133+
134+
//tracks from displaced vertices need reference debugging downstream as well, so we just treat them as neutrals for the moment
135+
if ((pred_pid == 211) && (eltTrack->isLinkedToDisplacedVertex())) {
136+
pred_pid = 130;
137+
}
138+
}
139+
140+
//get the predicted momentum components
141+
float pred_pt = output[ielem * NUM_OUTPUT_FEATURES + IDX_PT];
142+
float pred_eta = output[ielem * NUM_OUTPUT_FEATURES + IDX_ETA];
143+
float pred_sin_phi = output[ielem * NUM_OUTPUT_FEATURES + IDX_SIN_PHI];
144+
float pred_cos_phi = output[ielem * NUM_OUTPUT_FEATURES + IDX_COS_PHI];
145+
float pred_e = output[ielem * NUM_OUTPUT_FEATURES + IDX_ENERGY];
146+
float pred_charge = output[ielem * NUM_OUTPUT_FEATURES + IDX_CHARGE];
147+
148+
auto cand = makeCandidate(pred_pid, pred_charge, pred_pt, pred_eta, pred_sin_phi, pred_cos_phi, pred_e);
149+
setCandidateRefs(cand, selected_elements, ielem);
109150
pOutputCandidateCollection.push_back(cand);
151+
152+
#ifdef MLPF_DEBUG
153+
std::cout << "ielem=" << ielem << " cand: pid=" << cand.pdgId() << " E=" << cand.energy() << " pt=" << cand.pt()
154+
<< " eta=" << cand.eta() << " phi=" << cand.phi() << " charge=" << cand.charge() << std::endl;
155+
#endif
110156
}
111157
} //loop over PFElements
112158

113159
event.emplace(pfCandidatesPutToken_, pOutputCandidateCollection);
114160
}
115161

116-
std::unique_ptr<MLPFCache> MLPFProducer::initializeGlobalCache(const edm::ParameterSet& params) {
117-
// this method is supposed to create, initialize and return a MLPFCache instance
118-
std::unique_ptr<MLPFCache> cache = std::make_unique<MLPFCache>();
119-
120-
//load the frozen TF graph of the GNN model
121-
std::string path = params.getParameter<std::string>("model_path");
122-
auto fullPath = edm::FileInPath(path).fullPath();
123-
LogDebug("MLPFProducer") << "Initializing MLPF model from " << fullPath;
124-
125-
cache->graph_def = tensorflow::loadGraphDef(fullPath);
126-
127-
return cache;
162+
std::unique_ptr<ONNXRuntime> MLPFProducer::initializeGlobalCache(const edm::ParameterSet& params) {
163+
return std::make_unique<ONNXRuntime>(params.getParameter<edm::FileInPath>("model_path").fullPath());
128164
}
129165

130-
void MLPFProducer::globalEndJob(MLPFCache* cache) { delete cache->graph_def; }
166+
void MLPFProducer::globalEndJob(const ONNXRuntime* cache) {}
131167

132168
void MLPFProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
133169
edm::ParameterSetDescription desc;
134170
desc.add<edm::InputTag>("src", edm::InputTag("particleFlowBlock"));
135-
desc.add<std::string>("model_path", "RecoParticleFlow/PFProducer/data/mlpf/mlpf_2020_11_04.pb");
171+
desc.add<edm::FileInPath>(
172+
"model_path",
173+
edm::FileInPath(
174+
"RecoParticleFlow/PFProducer/data/mlpf/"
175+
"mlpf_2021_11_16__no_einsum__all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010.onnx"));
136176
descriptions.addWithDefaultLabel(desc);
137177
}
138178

RecoParticleFlow/PFProducer/python/mlpf_EventContent_cff.py

-8
This file was deleted.

0 commit comments

Comments
 (0)