From 98b8c4e805faada8834dd61e6e51833cf7310b3d Mon Sep 17 00:00:00 2001 From: Giorgi Lomia Date: Tue, 12 Oct 2021 20:57:51 +0000 Subject: [PATCH] Starting to move tools into RDGInspection.h --- libtsuba/include/tsuba/RDGInspection.h | 265 +++++++++++++++++++++++++ libtsuba/src/RDGInspection.cpp | 31 +++ tools/graph-remap/graph-remap.cpp | 43 +--- tools/graph-stats/graph-stats.cpp | 261 +++--------------------- 4 files changed, 331 insertions(+), 269 deletions(-) create mode 100644 libtsuba/include/tsuba/RDGInspection.h create mode 100644 libtsuba/src/RDGInspection.cpp diff --git a/libtsuba/include/tsuba/RDGInspection.h b/libtsuba/include/tsuba/RDGInspection.h new file mode 100644 index 0000000000..081e6566f5 --- /dev/null +++ b/libtsuba/include/tsuba/RDGInspection.h @@ -0,0 +1,265 @@ +#ifndef KATANA_LIBTSUBA_RDGINSPECTION_H_ +#define KATANA_LIBTSUBA_RDGINSPECTION_H_ + +#include +#include +#include + +#include "katana/BufferedGraph.h" +#include "katana/FileGraph.h" +#include "katana/Galois.h" +#include "katana/LCGraph.h" +#include "katana/OfflineGraph.h" +#include "llvm/Support/CommandLine.h" + +namespace tsuba { + +namespace cll = llvm::cl; + +enum StatMode { + degreehist, + degrees, + maxDegreeNode, + dsthist, + indegreehist, + sortedlogoffsethist, + sparsityPattern, + summary +}; + +typedef katana::OfflineGraph Graph; +typedef Graph::GraphNode GNode; + +using Writer = katana::FileGraphWriter; + +void +doSummary(Graph& graph) { + std::cout << "NumNodes: " << graph.size() << "\n"; + std::cout << "NumEdges: " << graph.sizeEdges() << "\n"; + std::cout << "SizeofEdge: " << graph.edgeSize() << "\n"; +} + +void +doDegrees(Graph& graph) { + for (auto n : graph) { + std::cout << graph.edges(n).size() << "\n"; + } +} + +void +findMaxDegreeNode(Graph& graph) { + uint64_t nodeID = 0; + size_t MaxDegree = 0; + uint64_t MaxDegreeNode = 0; + for (auto n : graph) { + size_t degree = graph.edges(n).size(); + if (MaxDegree < degree) { + MaxDegree = degree; + MaxDegreeNode = nodeID; + } + ++nodeID; + } + std::cout << "MaxDegreeNode : " << MaxDegreeNode + << " , MaxDegree : " << MaxDegree << "\n"; +} + +void +printHistogram( + const std::string& name, std::map& hists, + const uint64_t& number_of_bins) { + auto max = hists.rbegin()->first; + if (number_of_bins <= 0) { + std::cout << name << "Bin,Start,End,Count\n"; + for (unsigned x = 0; x <= max; ++x) { + std::cout << x << ',' << x << ',' << x + 1 << ','; + if (hists.count(x)) { + std::cout << hists[x] << '\n'; + } else { + std::cout << "0\n"; + } + } + } else { + std::vector bins(number_of_bins); + auto bwidth = (max + 1) / number_of_bins; + if ((max + 1) % number_of_bins) { + ++bwidth; + } + // std::cerr << "* " << max << " " << number_of_bins << " " << bwidth << "\n"; + for (auto p : hists) { + bins.at(p.first / bwidth) += p.second; + } + std::cout << name << "Bin,Start,End,Count\n"; + for (unsigned x = 0; x < bins.size(); ++x) { + std::cout << x << ',' << x * bwidth << ',' << (x * bwidth + bwidth) << ',' + << bins[x] << '\n'; + } + } +} + +void +doSparsityPattern( + Graph& graph, const int64_t& columns, + std::function printFn) { + unsigned blockSize = (graph.size() + columns - 1) / columns; + + for (int i = 0; i < columns; ++i) { + std::vector row(columns); + auto p = katana::block_range(graph.begin(), graph.end(), i, columns); + for (auto ii = p.first, ei = p.second; ii != ei; ++ii) { + for (auto jj : graph.edges(*ii)) { + row[graph.getEdgeDst(jj) / blockSize] = true; + } + } + for (int x = 0; x < columns; ++x) { + printFn(x, i, row[x]); + } + } +} + +void +doDegreeHistogram(Graph& graph, const uint64_t& numBins) { + std::map hist; + for (auto ii : graph) { + ++hist[graph.edges(ii).size()]; + } + printHistogram("Degree", hist, numBins); +} + +void +doInDegreeHistogram(Graph& graph, const uint64_t& numBins) { + std::vector inv(graph.size()); + std::map hist; + for (auto ii : graph) { + for (auto jj : graph.edges(ii)) { + ++inv[graph.getEdgeDst(jj)]; + } + } + for (uint64_t n : inv) { + ++hist[n]; + } + printHistogram("InDegree", hist, numBins); +} + +struct EdgeComp { + typedef katana::EdgeSortValue Edge; + + bool operator()(const Edge& a, const Edge& b) const { return a.dst < b.dst; } +}; + +int +getLogIndex(ptrdiff_t x) { + int logvalue = 0; + int sign = x < 0 ? -1 : 1; + + if (x < 0) { + x = -x; + } + + while ((x >>= 1) != 0) { + ++logvalue; + } + return sign * logvalue; +} + +void +doSortedLogOffsetHistogram([[maybe_unused]] Graph& graph) { + // Graph copy; + // { + // // Original FileGraph is immutable because it is backed by a file + // copy = graph; + // } + + // std::vector > hists; + // hists.emplace_back(); + // auto hist = &hists.back(); + // int curHist = 0; + // auto p = katana::block_range( + // boost::counting_iterator(0), + // boost::counting_iterator(graph.sizeEdges()), + // curHist, + // numHist); + // for (auto ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { + // copy.sortEdges(*ii, EdgeComp()); + + // GNode last = 0; + // bool first = true; + // for (auto jj = copy.edge_begin(*ii), ej = copy.edge_end(*ii); jj != ej; + // ++jj) { + // GNode dst = copy.getEdgeDst(jj); + // ptrdiff_t diff = dst - (ptrdiff_t) last; + + // if (!first) { + // int index = getLogIndex(diff); + // ++(*hist)[index]; + // } + // first = false; + // last = dst; + // if (++p.first == p.second) { + // hists.emplace_back(); + // hist = &hists.back(); + // curHist += 1; + // p = katana::block_range( + // boost::counting_iterator(0), + // boost::counting_iterator(graph.sizeEdges()), + // curHist, + // numHist); + // } + // } + // } + + // printHistogram("LogOffset", hists); +} + +void +doDestinationHistogram(Graph& graph, const uint64_t& numBins) { + std::map hist; + for (auto ii : graph) { + for (auto jj : graph.edges(ii)) { + ++hist[graph.getEdgeDst(jj)]; + } + } + printHistogram("DestinationBin", hist, numBins); +} + +/** + * Create node map from file + */ +std::map +createNodeMap(const std::string& mappingFilename) { + katana::gInfo("Creating node map"); + // read new mapping + std::ifstream mapFile(mappingFilename); + mapFile.seekg(0, std::ios_base::end); + + int64_t endOfFile = mapFile.tellg(); + if (!mapFile) { + KATANA_DIE("failed to read file"); + } + + mapFile.seekg(0, std::ios_base::beg); + if (!mapFile) { + KATANA_DIE("failed to read file"); + } + + // remap node listed on line n in the mapping to node n + std::map remapper; + uint64_t counter = 0; + while (((int64_t)mapFile.tellg() + 1) != endOfFile) { + uint64_t nodeID; + mapFile >> nodeID; + if (!mapFile) { + KATANA_DIE("failed to read file"); + } + remapper[nodeID] = counter++; + } + + KATANA_LOG_ASSERT(remapper.size() == counter); + katana::gInfo("Remapping ", counter, " nodes"); + + katana::gInfo("Node map created"); + + return remapper; +} + +} // namespace tsuba +#endif diff --git a/libtsuba/src/RDGInspection.cpp b/libtsuba/src/RDGInspection.cpp new file mode 100644 index 0000000000..e31ad0d033 --- /dev/null +++ b/libtsuba/src/RDGInspection.cpp @@ -0,0 +1,31 @@ +/* + * This file belongs to the Galois project, a C++ library for exploiting + * parallelism. The code is being released under the terms of the 3-Clause BSD + * License (a copy is located in LICENSE.txt at the top-level directory). + * + * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. + * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS + * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF + * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF + * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH + * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances + * shall University be liable for incidental, special, indirect, direct or + * consequential damages or loss of profits, interruption of business, or + * related expenses which may arise from use of Software or Documentation, + * including but not limited to those resulting from defects in Software and/or + * Documentation, or loss or inaccuracy of data of any kind. + */ + +#include "RDGInspection.h" + +#include +#include +#include + +#include "katana/Galois.h" +#include "katana/LCGraph.h" +#include "katana/OfflineGraph.h" +#include "llvm/Support/CommandLine.h" + +namespace cll = llvm::cl; \ No newline at end of file diff --git a/tools/graph-remap/graph-remap.cpp b/tools/graph-remap/graph-remap.cpp index f05213b639..62827dc595 100644 --- a/tools/graph-remap/graph-remap.cpp +++ b/tools/graph-remap/graph-remap.cpp @@ -21,6 +21,7 @@ #include "katana/FileGraph.h" #include "katana/Galois.h" #include "llvm/Support/CommandLine.h" +#include "tsuba/RDGInspection.h" namespace cll = llvm::cl; @@ -33,52 +34,12 @@ static cll::opt outputFilename( using Writer = katana::FileGraphWriter; -/** - * Create node map from file - */ -std::map -createNodeMap() { - katana::gInfo("Creating node map"); - // read new mapping - std::ifstream mapFile(mappingFilename); - mapFile.seekg(0, std::ios_base::end); - - int64_t endOfFile = mapFile.tellg(); - if (!mapFile) { - KATANA_DIE("failed to read file"); - } - - mapFile.seekg(0, std::ios_base::beg); - if (!mapFile) { - KATANA_DIE("failed to read file"); - } - - // remap node listed on line n in the mapping to node n - std::map remapper; - uint64_t counter = 0; - while (((int64_t)mapFile.tellg() + 1) != endOfFile) { - uint64_t nodeID; - mapFile >> nodeID; - if (!mapFile) { - KATANA_DIE("failed to read file"); - } - remapper[nodeID] = counter++; - } - - KATANA_LOG_ASSERT(remapper.size() == counter); - katana::gInfo("Remapping ", counter, " nodes"); - - katana::gInfo("Node map created"); - - return remapper; -} - int main(int argc, char** argv) { katana::SharedMemSys G; llvm::cl::ParseCommandLineOptions(argc, argv); - std::map remapper = createNodeMap(); + std::map remapper = tsuba::createNodeMap(mappingFilename); katana::gInfo("Loading graph to remap"); katana::BufferedGraph graphToRemap; diff --git a/tools/graph-stats/graph-stats.cpp b/tools/graph-stats/graph-stats.cpp index 7ab12a1baa..b85b482c50 100644 --- a/tools/graph-stats/graph-stats.cpp +++ b/tools/graph-stats/graph-stats.cpp @@ -25,38 +25,28 @@ #include "katana/LCGraph.h" #include "katana/OfflineGraph.h" #include "llvm/Support/CommandLine.h" +#include "tsuba/RDGInspection.h" namespace cll = llvm::cl; -enum StatMode { - degreehist, - degrees, - maxDegreeNode, - dsthist, - indegreehist, - sortedlogoffsethist, - sparsityPattern, - summary -}; - static cll::opt inputfilename( cll::Positional, cll::desc(""), cll::Required); -static cll::list statModeList( +static cll::list statModeList( cll::desc("Available stats:"), cll::values( - clEnumVal(degreehist, "Histogram of degrees"), - clEnumVal(degrees, "Node degrees"), - clEnumVal(maxDegreeNode, "Max Degree Node"), - clEnumVal(dsthist, "Histogram of destinations"), - clEnumVal(indegreehist, "Histogram of indegrees"), + clEnumVal(tsuba::degreehist, "Histogram of degrees"), + clEnumVal(tsuba::degrees, "Node degrees"), + clEnumVal(tsuba::maxDegreeNode, "Max Degree Node"), + clEnumVal(tsuba::dsthist, "Histogram of destinations"), + clEnumVal(tsuba::indegreehist, "Histogram of indegrees"), clEnumVal( - sortedlogoffsethist, + tsuba::sortedlogoffsethist, "Histogram of neighbor offsets with sorted edges"), clEnumVal( - sparsityPattern, + tsuba::sparsityPattern, "Pattern of non-zeros when graph is " "interpreted as a sparse matrix"), - clEnumVal(summary, "Graph summary"))); + clEnumVal(tsuba::summary, "Graph summary"))); static cll::opt numBins( "numBins", cll::desc("Number of bins"), cll::init(-1)); static cll::opt columns( @@ -65,192 +55,6 @@ static cll::opt columns( typedef katana::OfflineGraph Graph; typedef Graph::GraphNode GNode; -void -doSummary(Graph& graph) { - std::cout << "NumNodes: " << graph.size() << "\n"; - std::cout << "NumEdges: " << graph.sizeEdges() << "\n"; - std::cout << "SizeofEdge: " << graph.edgeSize() << "\n"; -} - -void -doDegrees(Graph& graph) { - for (auto n : graph) { - std::cout << graph.edges(n).size() << "\n"; - } -} - -void -findMaxDegreeNode(Graph& graph) { - uint64_t nodeID = 0; - size_t MaxDegree = 0; - uint64_t MaxDegreeNode = 0; - for (auto n : graph) { - size_t degree = graph.edges(n).size(); - if (MaxDegree < degree) { - MaxDegree = degree; - MaxDegreeNode = nodeID; - } - ++nodeID; - } - std::cout << "MaxDegreeNode : " << MaxDegreeNode - << " , MaxDegree : " << MaxDegree << "\n"; -} - -void -printHistogram(const std::string& name, std::map& hists) { - auto max = hists.rbegin()->first; - if (numBins <= 0) { - std::cout << name << "Bin,Start,End,Count\n"; - for (unsigned x = 0; x <= max; ++x) { - std::cout << x << ',' << x << ',' << x + 1 << ','; - if (hists.count(x)) { - std::cout << hists[x] << '\n'; - } else { - std::cout << "0\n"; - } - } - } else { - std::vector bins(numBins); - auto bwidth = (max + 1) / numBins; - if ((max + 1) % numBins) { - ++bwidth; - } - // std::cerr << "* " << max << " " << numBins << " " << bwidth << "\n"; - for (auto p : hists) { - bins.at(p.first / bwidth) += p.second; - } - std::cout << name << "Bin,Start,End,Count\n"; - for (unsigned x = 0; x < bins.size(); ++x) { - std::cout << x << ',' << x * bwidth << ',' << (x * bwidth + bwidth) << ',' - << bins[x] << '\n'; - } - } -} - -void -doSparsityPattern( - Graph& graph, std::function printFn) { - unsigned blockSize = (graph.size() + columns - 1) / columns; - - for (int i = 0; i < columns; ++i) { - std::vector row(columns); - auto p = katana::block_range(graph.begin(), graph.end(), i, columns); - for (auto ii = p.first, ei = p.second; ii != ei; ++ii) { - for (auto jj : graph.edges(*ii)) { - row[graph.getEdgeDst(jj) / blockSize] = true; - } - } - for (int x = 0; x < columns; ++x) { - printFn(x, i, row[x]); - } - } -} - -void -doDegreeHistogram(Graph& graph) { - std::map hist; - for (auto ii : graph) { - ++hist[graph.edges(ii).size()]; - } - printHistogram("Degree", hist); -} - -void -doInDegreeHistogram(Graph& graph) { - std::vector inv(graph.size()); - std::map hist; - for (auto ii : graph) { - for (auto jj : graph.edges(ii)) { - ++inv[graph.getEdgeDst(jj)]; - } - } - for (uint64_t n : inv) { - ++hist[n]; - } - printHistogram("InDegree", hist); -} - -struct EdgeComp { - typedef katana::EdgeSortValue Edge; - - bool operator()(const Edge& a, const Edge& b) const { return a.dst < b.dst; } -}; - -int -getLogIndex(ptrdiff_t x) { - int logvalue = 0; - int sign = x < 0 ? -1 : 1; - - if (x < 0) { - x = -x; - } - - while ((x >>= 1) != 0) { - ++logvalue; - } - return sign * logvalue; -} - -void -doSortedLogOffsetHistogram([[maybe_unused]] Graph& graph) { - // Graph copy; - // { - // // Original FileGraph is immutable because it is backed by a file - // copy = graph; - // } - - // std::vector > hists; - // hists.emplace_back(); - // auto hist = &hists.back(); - // int curHist = 0; - // auto p = katana::block_range( - // boost::counting_iterator(0), - // boost::counting_iterator(graph.sizeEdges()), - // curHist, - // numHist); - // for (auto ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - // copy.sortEdges(*ii, EdgeComp()); - - // GNode last = 0; - // bool first = true; - // for (auto jj = copy.edge_begin(*ii), ej = copy.edge_end(*ii); jj != ej; - // ++jj) { - // GNode dst = copy.getEdgeDst(jj); - // ptrdiff_t diff = dst - (ptrdiff_t) last; - - // if (!first) { - // int index = getLogIndex(diff); - // ++(*hist)[index]; - // } - // first = false; - // last = dst; - // if (++p.first == p.second) { - // hists.emplace_back(); - // hist = &hists.back(); - // curHist += 1; - // p = katana::block_range( - // boost::counting_iterator(0), - // boost::counting_iterator(graph.sizeEdges()), - // curHist, - // numHist); - // } - // } - // } - - // printHistogram("LogOffset", hists); -} - -void -doDestinationHistogram(Graph& graph) { - std::map hist; - for (auto ii : graph) { - for (auto jj : graph.edges(ii)) { - ++hist[graph.getEdgeDst(jj)]; - } - } - printHistogram("DestinationBin", hist); -} - int main(int argc, char** argv) { llvm::cl::ParseCommandLineOptions(argc, argv); @@ -258,38 +62,39 @@ main(int argc, char** argv) { Graph graph(inputfilename); for (unsigned i = 0; i != statModeList.size(); ++i) { switch (statModeList[i]) { - case degreehist: - doDegreeHistogram(graph); + case tsuba::degreehist: + tsuba::doDegreeHistogram(graph, numBins); break; - case degrees: - doDegrees(graph); + case tsuba::degrees: + tsuba::doDegrees(graph); break; - case maxDegreeNode: - findMaxDegreeNode(graph); + case tsuba::maxDegreeNode: + tsuba::findMaxDegreeNode(graph); break; - case dsthist: - doDestinationHistogram(graph); + case tsuba::dsthist: + tsuba::doDestinationHistogram(graph, numBins); break; - case indegreehist: - doInDegreeHistogram(graph); + case tsuba::indegreehist: + tsuba::doInDegreeHistogram(graph, numBins); break; - case sortedlogoffsethist: - doSortedLogOffsetHistogram(graph); + case tsuba::sortedlogoffsethist: + tsuba::doSortedLogOffsetHistogram(graph); break; - case sparsityPattern: { + case tsuba::sparsityPattern: { unsigned lastrow = ~0; - doSparsityPattern(graph, [&lastrow](unsigned, unsigned y, bool val) { - if (y != lastrow) { - lastrow = y; - std::cout << '\n'; - } - std::cout << (val ? 'x' : '.'); - }); + tsuba::doSparsityPattern( + graph, columns, [&lastrow](unsigned, unsigned y, bool val) { + if (y != lastrow) { + lastrow = y; + std::cout << '\n'; + } + std::cout << (val ? 'x' : '.'); + }); std::cout << '\n'; break; } - case summary: - doSummary(graph); + case tsuba::summary: + tsuba::doSummary(graph); break; default: std::cerr << "Unknown stat requested\n";