|
25 | 25 | #include "boost/bind.hpp"
|
26 | 26 | #include "boost/ref.hpp"
|
27 | 27 |
|
| 28 | +#include "boost/graph/graph_traits.hpp" |
| 29 | +#include "boost/graph/adjacency_list.hpp" |
| 30 | +#include "boost/graph/depth_first_search.hpp" |
| 31 | +#include "boost/graph/visitors.hpp" |
| 32 | + |
| 33 | + |
28 | 34 | #include <algorithm>
|
29 | 35 | #include <cassert>
|
30 | 36 | #include <cstdlib>
|
|
33 | 39 | #include <list>
|
34 | 40 | #include <map>
|
35 | 41 | #include <exception>
|
| 42 | +#include <sstream> |
36 | 43 |
|
37 | 44 | namespace edm {
|
38 | 45 | namespace {
|
@@ -888,6 +895,8 @@ namespace edm {
|
888 | 895 | }
|
889 | 896 |
|
890 | 897 | void Schedule::beginJob(ProductRegistry const& iRegistry) {
|
| 898 | + checkForCorrectness(); |
| 899 | + |
891 | 900 | globalSchedule_->beginJob(iRegistry);
|
892 | 901 | }
|
893 | 902 |
|
@@ -1026,4 +1035,266 @@ namespace edm {
|
1026 | 1035 | s->clearCounters();
|
1027 | 1036 | }
|
1028 | 1037 | }
|
| 1038 | + |
| 1039 | + //==================================== |
| 1040 | + // Schedule::checkForCorrectness algorithm |
| 1041 | + // |
| 1042 | + // The code creates a 'dependency' graph between all |
| 1043 | + // modules. A module depends on another module if |
| 1044 | + // 1) it 'consumes' data produced by that module |
| 1045 | + // 2) it appears directly after the module within a Path |
| 1046 | + // |
| 1047 | + // If there is a cycle in the 'dependency' graph then |
| 1048 | + // the schedule may be unrunnable. The schedule is still |
| 1049 | + // runnable if all cycles have at least two edges which |
| 1050 | + // connect modules only by Path dependencies (i.e. not |
| 1051 | + // linked by a data dependency). |
| 1052 | + // |
| 1053 | + // Example 1: |
| 1054 | + // C consumes data from B |
| 1055 | + // Path 1: A + B + C |
| 1056 | + // Path 2: B + C + A |
| 1057 | + // |
| 1058 | + // Cycle: A after C [p2], C consumes B, B after A [p1] |
| 1059 | + // Since this cycle has 2 path only edges it is OK since |
| 1060 | + // A and (B+C) are independent so their run order doesn't matter |
| 1061 | + // |
| 1062 | + // Example 2: |
| 1063 | + // B consumes A |
| 1064 | + // C consumes B |
| 1065 | + // Path: C + A |
| 1066 | + // |
| 1067 | + // Cycle: A after C [p], C consumes B, B consumes A |
| 1068 | + // Since this cycle has 1 path only edge it is unrunnable. |
| 1069 | + // |
| 1070 | + // Example 3: |
| 1071 | + // A consumes B |
| 1072 | + // B consumes C |
| 1073 | + // C consumes A |
| 1074 | + // (no Path since unscheduled execution) |
| 1075 | + // |
| 1076 | + // Cycle: A consumes B, B consumes C, C consumes A |
| 1077 | + // Since this cycle has 0 path only edges it is unrunnable. |
| 1078 | + //==================================== |
| 1079 | + |
| 1080 | + namespace { |
| 1081 | + typedef std::pair<unsigned int, unsigned int> SimpleEdge; |
| 1082 | + typedef std::map<SimpleEdge, std::vector<unsigned int>> EdgeToPathMap; |
| 1083 | + |
| 1084 | + typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS> Graph; |
| 1085 | + |
| 1086 | + typedef boost::graph_traits<Graph>::edge_descriptor Edge; |
| 1087 | + struct cycle_detector : public boost::dfs_visitor<> { |
| 1088 | + |
| 1089 | + cycle_detector(EdgeToPathMap const& iEdgeToPathMap, |
| 1090 | + std::vector<std::string> const& iPathNames, |
| 1091 | + std::map<std::string,unsigned int> const& iModuleNamesToIndex): |
| 1092 | + m_edgeToPathMap(iEdgeToPathMap), |
| 1093 | + m_pathNames(iPathNames), |
| 1094 | + m_namesToIndex(iModuleNamesToIndex){} |
| 1095 | + |
| 1096 | + void tree_edge(Edge iEdge, Graph const&) { |
| 1097 | + m_stack.push_back(iEdge); |
| 1098 | + } |
| 1099 | + |
| 1100 | + void finish_edge(Edge iEdge, Graph const& iGraph) { |
| 1101 | + if(not m_stack.empty()) { |
| 1102 | + if (iEdge == m_stack.back()) { |
| 1103 | + m_stack.pop_back(); |
| 1104 | + } |
| 1105 | + } |
| 1106 | + } |
| 1107 | + |
| 1108 | + //Called if a cycle happens |
| 1109 | + void back_edge(Edge iEdge, Graph const& iGraph) { |
| 1110 | + //NOTE: If the path containing the cycle contains two or more |
| 1111 | + // path only edges then there is no problem |
| 1112 | + |
| 1113 | + typedef typename boost::property_map<Graph, boost::vertex_index_t>::type IndexMap; |
| 1114 | + IndexMap const& index = get(boost::vertex_index, iGraph); |
| 1115 | + |
| 1116 | + unsigned int vertex = index[target(iEdge,iGraph)]; |
| 1117 | + |
| 1118 | + //Find last edge which starts with this vertex |
| 1119 | + std::list<Edge>::iterator itFirst = m_stack.begin(); |
| 1120 | + { |
| 1121 | + bool seenVertex = false; |
| 1122 | + while(itFirst != m_stack.end()) { |
| 1123 | + if(not seenVertex) { |
| 1124 | + if(index[source(*itFirst,iGraph)] == vertex) { |
| 1125 | + seenVertex = true; |
| 1126 | + } |
| 1127 | + } else |
| 1128 | + if (index[source(*itFirst,iGraph)] != vertex) { |
| 1129 | + break; |
| 1130 | + } |
| 1131 | + ++itFirst; |
| 1132 | + } |
| 1133 | + if(itFirst != m_stack.begin()) { |
| 1134 | + --itFirst; |
| 1135 | + } |
| 1136 | + } |
| 1137 | + //This edge has not been added to the stack yet |
| 1138 | + // making a copy allows us to add it in but not worry |
| 1139 | + // about removing it at the end of the routine |
| 1140 | + std::vector<Edge> tempStack; |
| 1141 | + tempStack.reserve(m_stack.size()+1); |
| 1142 | + tempStack.insert(tempStack.end(),itFirst,m_stack.end()); |
| 1143 | + tempStack.emplace_back(iEdge); |
| 1144 | + |
| 1145 | + unsigned int nPathDependencyOnly =0; |
| 1146 | + for(auto const& edge: tempStack) { |
| 1147 | + unsigned int in =index[source(edge,iGraph)]; |
| 1148 | + unsigned int out =index[target(edge,iGraph)]; |
| 1149 | + |
| 1150 | + auto iFound = m_edgeToPathMap.find(SimpleEdge(in,out)); |
| 1151 | + bool pathDependencyOnly = true; |
| 1152 | + for(auto dependency : iFound->second) { |
| 1153 | + if (dependency == std::numeric_limits<unsigned int>::max()) { |
| 1154 | + pathDependencyOnly = false; |
| 1155 | + break; |
| 1156 | + } |
| 1157 | + } |
| 1158 | + if (pathDependencyOnly) { |
| 1159 | + ++nPathDependencyOnly; |
| 1160 | + } |
| 1161 | + } |
| 1162 | + if(nPathDependencyOnly < 2) { |
| 1163 | + reportError(tempStack,index,iGraph); |
| 1164 | + } |
| 1165 | + } |
| 1166 | + private: |
| 1167 | + std::string const& pathName(unsigned int iIndex) const { |
| 1168 | + return m_pathNames[iIndex]; |
| 1169 | + } |
| 1170 | + |
| 1171 | + std::string const& moduleName(unsigned int iIndex) const { |
| 1172 | + for(auto const& item : m_namesToIndex) { |
| 1173 | + if(item.second == iIndex) { |
| 1174 | + return item.first; |
| 1175 | + } |
| 1176 | + } |
| 1177 | + assert(false); |
| 1178 | + } |
| 1179 | + |
| 1180 | + void |
| 1181 | + reportError(std::vector<Edge>const& iEdges, |
| 1182 | + boost::property_map<Graph, boost::vertex_index_t>::type const& iIndex, |
| 1183 | + Graph const& iGraph) const { |
| 1184 | + std::stringstream oStream; |
| 1185 | + oStream <<"Module run order problem found: \n"; |
| 1186 | + bool first_edge = true; |
| 1187 | + for(auto const& edge: iEdges) { |
| 1188 | + unsigned int in =iIndex[source(edge,iGraph)]; |
| 1189 | + unsigned int out =iIndex[target(edge,iGraph)]; |
| 1190 | + |
| 1191 | + if(first_edge) { |
| 1192 | + first_edge = false; |
| 1193 | + } else { |
| 1194 | + oStream<<", "; |
| 1195 | + } |
| 1196 | + oStream <<moduleName(in); |
| 1197 | + |
| 1198 | + auto iFound = m_edgeToPathMap.find(SimpleEdge(in,out)); |
| 1199 | + bool pathDependencyOnly = true; |
| 1200 | + for(auto dependency : iFound->second) { |
| 1201 | + if (dependency == std::numeric_limits<unsigned int>::max()) { |
| 1202 | + pathDependencyOnly = false; |
| 1203 | + break; |
| 1204 | + } |
| 1205 | + } |
| 1206 | + if (pathDependencyOnly) { |
| 1207 | + oStream <<" after "<<moduleName(out)<<" [path "<<pathName(iFound->second[0])<<"]"; |
| 1208 | + } else { |
| 1209 | + oStream <<" consumes "<<moduleName(out); |
| 1210 | + } |
| 1211 | + } |
| 1212 | + oStream<<"\n Running in the threaded framework would lead to indeterminate results." |
| 1213 | + "\n Please change order of modules in mentioned Path(s) to avoid inconsistent module ordering."; |
| 1214 | + |
| 1215 | + LogError("UnrunnableSchedule")<<oStream.str(); |
| 1216 | + } |
| 1217 | + |
| 1218 | + EdgeToPathMap const& m_edgeToPathMap; |
| 1219 | + std::vector<std::string> const& m_pathNames; |
| 1220 | + std::map<std::string,unsigned int> m_namesToIndex; |
| 1221 | + |
| 1222 | + std::list<Edge> m_stack; |
| 1223 | + }; |
| 1224 | + } |
| 1225 | + |
| 1226 | + void |
| 1227 | + Schedule::checkForCorrectness() const |
| 1228 | + { |
| 1229 | + //Need to lookup names to ids quickly |
| 1230 | + std::map<std::string,unsigned int> moduleNamesToIndex; |
| 1231 | + for(auto worker: allWorkers()) { |
| 1232 | + moduleNamesToIndex.insert( std::make_pair(worker->description().moduleLabel(), |
| 1233 | + worker->description().id())); |
| 1234 | + } |
| 1235 | + |
| 1236 | + //If a module to module dependency comes from a path, remember which path |
| 1237 | + EdgeToPathMap edgeToPathMap; |
| 1238 | + |
| 1239 | + //determine the path dependencies |
| 1240 | + std::vector<std::string> pathNames; |
| 1241 | + { |
| 1242 | + streamSchedules_[0]->availablePaths(pathNames); |
| 1243 | + |
| 1244 | + std::vector<std::string> moduleNames; |
| 1245 | + std::vector<std::string> reducedModuleNames; |
| 1246 | + unsigned int pathIndex=0; |
| 1247 | + for(auto const& path: pathNames) { |
| 1248 | + moduleNames.clear(); |
| 1249 | + reducedModuleNames.clear(); |
| 1250 | + std::set<std::string> alreadySeenNames; |
| 1251 | + |
| 1252 | + streamSchedules_[0]->modulesInPath(path,moduleNames); |
| 1253 | + std::string lastModuleName; |
| 1254 | + unsigned int lastModuleIndex; |
| 1255 | + for(auto const& name: moduleNames) { |
| 1256 | + auto found = alreadySeenNames.insert(name); |
| 1257 | + if(found.second) { |
| 1258 | + //first time for this path |
| 1259 | + unsigned int moduleIndex = moduleNamesToIndex[name]; |
| 1260 | + if(not lastModuleName.empty() ) { |
| 1261 | + edgeToPathMap[std::make_pair(moduleIndex,lastModuleIndex)].push_back(pathIndex); |
| 1262 | + } |
| 1263 | + lastModuleName = name; |
| 1264 | + lastModuleIndex = moduleIndex; |
| 1265 | + } |
| 1266 | + } |
| 1267 | + ++pathIndex; |
| 1268 | + } |
| 1269 | + } |
| 1270 | + { |
| 1271 | + std::vector<const char*> dependentModules; |
| 1272 | + //determine the data dependencies |
| 1273 | + for(auto const& worker: allWorkers()) { |
| 1274 | + dependentModules.clear(); |
| 1275 | + //NOTE: what about aliases? |
| 1276 | + worker->modulesDependentUpon(dependentModules); |
| 1277 | + auto found = moduleNamesToIndex.find(worker->description().moduleLabel()); |
| 1278 | + if (found == moduleNamesToIndex.end()) { |
| 1279 | + //The module was from a previous process |
| 1280 | + continue; |
| 1281 | + } |
| 1282 | + unsigned int moduleIndex = found->second; |
| 1283 | + for(auto name: dependentModules) { |
| 1284 | + edgeToPathMap[std::make_pair(moduleIndex, moduleNamesToIndex[name])].push_back(std::numeric_limits<unsigned int>::max()); |
| 1285 | + } |
| 1286 | + } |
| 1287 | + } |
| 1288 | + //Now use boost graph library to find cycles in the dependencies |
| 1289 | + std::vector<SimpleEdge> outList; |
| 1290 | + outList.reserve(edgeToPathMap.size()); |
| 1291 | + for(auto const& edgeInfo: edgeToPathMap) { |
| 1292 | + outList.push_back(edgeInfo.first); |
| 1293 | + } |
| 1294 | + |
| 1295 | + Graph g(outList.begin(),outList.end(), moduleNamesToIndex.size()); |
| 1296 | + |
| 1297 | + cycle_detector detector(edgeToPathMap,pathNames,moduleNamesToIndex); |
| 1298 | + boost::depth_first_search(g,boost::visitor(detector)); |
| 1299 | + } |
1029 | 1300 | }
|
0 commit comments