Skip to content

Commit

Permalink
hogwild path cover of the graph
Browse files Browse the repository at this point in the history
  • Loading branch information
ekg committed Mar 21, 2021
1 parent a1526e9 commit 1a5eaea
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 27 deletions.
2 changes: 1 addition & 1 deletion deps/libhandlegraph
Submodule libhandlegraph updated 60 files
+73 −17 CMakeLists.txt
+80 −0 src/append_graph.cpp
+23 −0 src/apply_orientations.cpp
+165 −0 src/are_equivalent.cpp
+65 −0 src/copy_graph.cpp
+77 −0 src/count_walks.cpp
+481 −0 src/dagify.cpp
+23 −0 src/deletable_handle_graph.cpp
+125 −0 src/dijkstra.cpp
+285 −0 src/eades_algorithm.cpp
+31 −0 src/extend.cpp
+35 −0 src/find_shortest_paths.cpp
+65 −0 src/find_tips.cpp
+0 −226 src/handle.cpp
+99 −0 src/handle_graph.cpp
+28 −0 src/include/handlegraph/algorithms/append_graph.hpp
+27 −0 src/include/handlegraph/algorithms/apply_orientations.hpp
+27 −0 src/include/handlegraph/algorithms/are_equivalent.hpp
+33 −0 src/include/handlegraph/algorithms/copy_graph.hpp
+36 −0 src/include/handlegraph/algorithms/count_walks.hpp
+23 −0 src/include/handlegraph/algorithms/dagify.hpp
+45 −0 src/include/handlegraph/algorithms/dijkstra.hpp
+27 −0 src/include/handlegraph/algorithms/eades_algorithm.hpp
+22 −0 src/include/handlegraph/algorithms/extend.hpp
+27 −0 src/include/handlegraph/algorithms/find_shortest_paths.hpp
+31 −0 src/include/handlegraph/algorithms/find_tips.hpp
+27 −0 src/include/handlegraph/algorithms/is_acyclic.hpp
+42 −0 src/include/handlegraph/algorithms/is_single_stranded.hpp
+25 −0 src/include/handlegraph/algorithms/reverse_complement.hpp
+29 −0 src/include/handlegraph/algorithms/split_strands.hpp
+18 −0 src/include/handlegraph/algorithms/strongly_connected_components.hpp
+44 −0 src/include/handlegraph/algorithms/topological_sort.hpp
+23 −0 src/include/handlegraph/algorithms/unchop.hpp
+38 −0 src/include/handlegraph/algorithms/weakly_connected_components.hpp
+65 −0 src/include/handlegraph/buildable_snarl_decomposition.hpp
+6 −0 src/include/handlegraph/deletable_handle_graph.hpp
+10 −5 src/include/handlegraph/mutable_path_handle_graph.hpp
+3 −0 src/include/handlegraph/path_handle_graph.hpp
+84 −0 src/include/handlegraph/serializable.hpp
+4 −93 src/include/handlegraph/serializable_handle_graph.hpp
+433 −0 src/include/handlegraph/snarl_decomposition.hpp
+135 −0 src/include/handlegraph/trivially_serializable.hpp
+52 −3 src/include/handlegraph/types.hpp
+24 −0 src/include/handlegraph/util.hpp
+96 −0 src/is_acyclic.cpp
+113 −0 src/is_single_stranded.cpp
+22 −0 src/mutable_handle_graph.cpp
+82 −0 src/path_handle_graph.cpp
+20 −0 src/path_position_handle_graph.cpp
+25 −0 src/ranked_handle_graph.cpp
+36 −0 src/reverse_complement.cpp
+68 −0 src/serializable.cpp
+70 −0 src/snarl_decomposition.cpp
+69 −0 src/split_strands.cpp
+385 −0 src/strongly_connected_components.cpp
+295 −0 src/topological_sort.cpp
+143 −0 src/trivially_serializable.cpp
+53 −0 src/types.cpp
+500 −0 src/unchop.cpp
+150 −0 src/weakly_connected_components.cpp
110 changes: 110 additions & 0 deletions src/algorithms/cover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,5 +383,115 @@ namespace odgi {
}
}



void hogwild_path_cover(handlegraph::MutablePathDeletableHandleGraph &graph,
double target_depth,
//bool write_node_coverages, std::string &node_coverages,
const uint64_t& nthreads, const bool& ignore_paths, const bool& show_progress) {

// get depth -> graph depth
uint64_t node_count = graph.get_node_count();
uint64_t graph_bp = 0;
uint64_t step_count = 0;
// make a rank select dictionary over our sequence space
// to get a node randomly distributed in the total length of the graph
graph.for_each_handle(
[&](const handle_t& h) {
graph_bp += graph.get_length(h);
step_count += graph.get_step_count(h);
});
//sdsl::bit_vector graph_vec();
sdsl::bit_vector graph_bv(graph_bp);
graph_bp = 0;
graph.for_each_handle(
[&](const handle_t& h) {
graph_bv[graph_bp] = 1;
graph_bp += graph.get_length(h);
});
sdsl::bit_vector::rank_1_type graph_bv_rank;
sdsl::util::assign(graph_bv_rank, sdsl::bit_vector::rank_1_type(&graph_bv));
uint64_t target_step_count = node_count * target_depth - (ignore_paths ? 0 : step_count);
// run hogwild until our thread count is
std::unique_ptr<progress_meter::ProgressMeter> progress_meter;
if (show_progress) {
progress_meter = std::make_unique<progress_meter::ProgressMeter>(
target_step_count, "[odgi::hogwild_cover] covering the graph:");
}
// launch a thread to update the learning rate, count iterations, and decide when to stop
std::atomic<uint64_t> added_steps; added_steps.store(0);
std::atomic<uint64_t> added_paths; added_paths.store(0);
auto worker_lambda =
[&](uint64_t tid) {
// everyone tries to seed with their own random data
const std::uint64_t seed = 9399220 + tid;
XoshiroCpp::Xoshiro256Plus gen(seed); // a nice, fast PRNG

// we'll sample from all path steps
std::uniform_int_distribution<uint64_t> dis_graph_pos = std::uniform_int_distribution<uint64_t>(0, graph_bp-1);
std::uniform_int_distribution<uint64_t> flip(0, 1);
while (added_steps.load() < target_step_count) {
// create a path
std::stringstream ss;
ss << "cover_" << added_paths++;
path_handle_t path = graph.create_path_handle(ss.str());
// find a random handle
handle_t h = graph.get_handle(graph_bv_rank(dis_graph_pos(gen)), flip(gen));
uint64_t iter = 0;
handle_t lowest;
bool seen_low = false;
while (graph.get_step_count(h) > 0 && iter++ < 100) {
h = graph.get_handle(graph_bv_rank(dis_graph_pos(gen)), flip(gen));
if (!seen_low || graph.get_step_count(h) < graph.get_step_count(lowest)) {
lowest = h;
seen_low = true;
}
}
if (seen_low) {
h = lowest;
}
while (true) {
graph.append_step(path, h);
if (show_progress) progress_meter->increment(1);
++added_steps;
if (added_steps.load() >= target_step_count) {
break;
}
handle_t best_next;
uint64_t lowest_cov = std::numeric_limits<uint64_t>::max();
graph.follow_edges(
h, false,
[&](const handle_t& n) {
uint64_t next_cov = graph.get_step_count(n);
if (next_cov < lowest_cov) {
best_next = n;
lowest_cov = next_cov;
}
});
if (lowest_cov < std::numeric_limits<uint64_t>::max()) {
h = best_next;
} else {
break;
}
}
}
};

std::vector<std::thread> workers;
workers.reserve(nthreads);
for (uint64_t t = 0; t < nthreads; ++t) {
workers.emplace_back(worker_lambda, t);
}

for (uint64_t t = 0; t < nthreads; ++t) {
workers[t].join();
}

if (show_progress) {
progress_meter->finish();
}
}


}
}
16 changes: 12 additions & 4 deletions src/algorithms/cover.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
#include <unordered_map>
#include <stack>
#include <map>

#include <sdsl/bit_vectors.hpp>
#include <handlegraph/mutable_path_deletable_handle_graph.hpp>
#include <handlegraph/util.hpp>
#include "weakly_connected_components.hpp"
#include <deps/ips4o/ips4o.hpp>
#include "XoshiroCpp.hpp"
#include "progress.hpp"

namespace odgi {
namespace algorithms {
namespace algorithms {

using namespace handlegraph;

Expand Down Expand Up @@ -57,5 +59,11 @@ namespace odgi {
size_t min_node_coverage, size_t max_number_of_paths_generable,
bool write_node_coverages, std::string &node_coverages,
const uint64_t& nthreads, const bool& ignore_paths, const bool& show_progress);
}
}

void hogwild_path_cover(handlegraph::MutablePathDeletableHandleGraph &graph,
double target_depth,
//bool write_node_coverages, std::string &node_coverages,
const uint64_t& nthreads, const bool& ignore_paths, const bool& show_progress);

}
}
49 changes: 27 additions & 22 deletions src/subcommand/cover_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace odgi {
args::HelpFlag help(parser, "help", "display this help summary", {'h', "help"});
args::ValueFlag<std::string> dg_in_file(parser, "FILE", "load the graph from this file", {'i', "idx"});
args::ValueFlag<std::string> dg_out_file(parser, "FILE","store the graph with the generated paths in this file", {'o', "out"});
args::ValueFlag<double> hogwild_depth(parser, "DEPTH", "randomly cover the graph until it has reaches the given average DEPTH",{'H', "hogwild-depth"});
args::ValueFlag<uint64_t> num_paths_per_component(parser, "N", "number of paths to generate per component",{'n', "num-paths-per-component"});
args::ValueFlag<uint64_t> node_window_size(parser, "N","size of the node window to check each time a new path is extended (it has to be greater than or equal to 2)",{'k', "node-window-size"});
args::ValueFlag<uint64_t> min_node_coverage(parser, "N","minimum node coverage to reach (it has to be greater than 0)",{'c', "min-node-coverage"});
Expand Down Expand Up @@ -91,32 +92,36 @@ namespace odgi {
}
}

uint64_t max_number_of_paths_generable = graph.get_node_count() * 5;
if (args::get(debug)){
if (_min_node_coverage) {
std::cerr << "[odgi::cover] there will be generated paths until the minimum node coverage is " << _min_node_coverage
<< ", or until the maximum number of allowed generated paths is reached ("
<< max_number_of_paths_generable << ")." << std::endl;
} else {
std::cerr << "[odgi::cover] there will be generated " << _num_paths_per_component << " paths per component."
<< std::endl;
}
}

uint64_t num_threads = args::get(nthreads) ? args::get(nthreads) : 1;

std::string node_coverages;
algorithms::path_cover(graph, _num_paths_per_component, _node_window_size, _min_node_coverage,
max_number_of_paths_generable,
write_node_coverages, node_coverages,
num_threads, args::get(ignore_paths), args::get(debug));
if (hogwild_depth) {
algorithms::hogwild_path_cover(graph, args::get(hogwild_depth), num_threads, args::get(ignore_paths), args::get(debug));
} else {
uint64_t max_number_of_paths_generable = graph.get_node_count() * 5;
if (args::get(debug)){
if (_min_node_coverage) {
std::cerr << "[odgi::cover] there will be generated paths until the minimum node coverage is " << _min_node_coverage
<< ", or until the maximum number of allowed generated paths is reached ("
<< max_number_of_paths_generable << ")." << std::endl;
} else {
std::cerr << "[odgi::cover] there will be generated " << _num_paths_per_component << " paths per component."
<< std::endl;
}
}

std::string node_coverages;
algorithms::path_cover(graph, _num_paths_per_component, _node_window_size, _min_node_coverage,
max_number_of_paths_generable,
write_node_coverages, node_coverages,
num_threads, args::get(ignore_paths), args::get(debug));

if (write_node_coverages) {
std::string covfile = args::get(write_node_coverages);
if (write_node_coverages) {
std::string covfile = args::get(write_node_coverages);

ofstream f(covfile.c_str());
f << node_coverages;
f.close();
ofstream f(covfile.c_str());
f << node_coverages;
f.close();
}
}

std::string outfile = args::get(dg_out_file);
Expand Down

0 comments on commit 1a5eaea

Please sign in to comment.