Skip to content

Commit

Permalink
crushin Ns
Browse files Browse the repository at this point in the history
  • Loading branch information
ekg committed Aug 16, 2021
1 parent 4eb47f8 commit 3386c77
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ add_library(odgi_objs OBJECT
${CMAKE_SOURCE_DIR}/src/subcommand/bin_main.cpp
${CMAKE_SOURCE_DIR}/src/subcommand/matrix_main.cpp
${CMAKE_SOURCE_DIR}/src/subcommand/chop_main.cpp
${CMAKE_SOURCE_DIR}/src/subcommand/crush_main.cpp
${CMAKE_SOURCE_DIR}/src/subcommand/groom_main.cpp
${CMAKE_SOURCE_DIR}/src/subcommand/layout0_main.cpp
${CMAKE_SOURCE_DIR}/src/subcommand/layout_main.cpp
Expand Down Expand Up @@ -440,6 +441,7 @@ add_library(odgi_objs OBJECT
${CMAKE_SOURCE_DIR}/src/algorithms/untangle.cpp
${CMAKE_SOURCE_DIR}/src/algorithms/stepindex.cpp
${CMAKE_SOURCE_DIR}/src/algorithms/groom.cpp
${CMAKE_SOURCE_DIR}/src/algorithms/crush_n.cpp
${CMAKE_SOURCE_DIR}/src/unittest/edge.cpp
${CMAKE_SOURCE_DIR}/src/algorithms/tips.cpp)

Expand Down
28 changes: 28 additions & 0 deletions src/algorithms/crush_n.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "crush_n.hpp"

namespace odgi {
namespace algorithms {

void crush_n(odgi::graph_t& graph) {
graph.for_each_handle([&](const handle_t& handle) {
// strip Ns from start
std::string seq;
bool in_n = false;
for (auto c : graph.get_sequence(handle)) {
if (c == 'N') {
if (in_n) {
continue;
} else {
in_n = true;
}
} else {
in_n = false;
}
seq.push_back(c);
}
graph.set_handle_sequence(handle, seq);
}, true); // in parallel
}

}
}
20 changes: 20 additions & 0 deletions src/algorithms/crush_n.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include <handlegraph/types.hpp>
#include <handlegraph/util.hpp>
#include <handlegraph/mutable_path_deletable_handle_graph.hpp>
#include <vector>
#include "odgi.hpp"

namespace odgi {
namespace algorithms {

using namespace handlegraph;

/**
* Replace runs of Ns at the start and end of nodes with a single N.
*/
void crush_n(odgi::graph_t& graph);

}
}
102 changes: 102 additions & 0 deletions src/subcommand/crush_main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include "subcommand.hpp"
#include "odgi.hpp"
#include "args.hxx"
#include <omp.h>
#include "algorithms/crush_n.hpp"
#include "utils.hpp"

namespace odgi {

using namespace odgi::subcommand;

int main_crush(int argc, char **argv) {

// trick argumentparser to do the right thing with the subcommand
for (uint64_t i = 1; i < argc - 1; ++i) {
argv[i] = argv[i + 1];
}
const std::string prog_name = "odgi chop";
argv[0] = (char *) prog_name.c_str();
--argc;

args::ArgumentParser parser("Divide nodes into smaller pieces preserving node topology and order.");
args::Group mandatory_opts(parser, "[ MANDATORY ARGUMENTS ]");
args::ValueFlag<std::string> og_in_file(mandatory_opts, "FILE", "Load the succinct variation graph in ODGI format from this *FILE*. The file name usually ends with *.og*. It also accepts GFAv1, but the on-the-fly conversion to the ODGI format requires additional time!", {'i', "idx"});
args::ValueFlag<std::string> og_out_file(mandatory_opts, "FILE", "Write the N-crushed succinct variation graph in ODGI format to *FILE*. A file ending of *.og* is recommended.",
{'o', "out"});
args::Group threading_opts(parser, "[ Threading ]");
args::ValueFlag<uint64_t> nthreads(threading_opts, "N", "Number of threads to use for parallel operations.",
{'t', "threads"});
args::Group processing_info_opts(parser, "[ Processing Information ]");
args::Flag debug(processing_info_opts, "debug", "Print information about the process to stderr.", {'d', "debug"});
args::Flag progress(processing_info_opts, "progress", "Write the current progress to stderr.", {'P', "progress"});
args::Group program_info_opts(parser, "[ Program Information ]");
args::HelpFlag help(program_info_opts, "help", "Print a help message for odgi crush.", {'h', "help"});
try {
parser.ParseCLI(argc, argv);
} catch (args::Help) {
std::cout << parser;
return 0;
} catch (args::ParseError e) {
std::cerr << e.what() << std::endl;
std::cerr << parser;
return 1;
}
if (argc == 1) {
std::cout << parser;
return 1;
}

if (!og_in_file) {
std::cerr
<< "[odgi::crush] error: please specify an input file from where to load the graph via -i=[FILE], --idx=[FILE]."
<< std::endl;
return 1;
}

if (!og_out_file) {
std::cerr
<< "[odgi::crush] error: please specify an output file to where to store the graph via -o=[FILE], --out=[FILE]."
<< std::endl;
return 1;
}

const uint64_t num_threads = args::get(nthreads) ? args::get(nthreads) : 1;

graph_t graph;
assert(argc > 0);
{
const std::string infile = args::get(og_in_file);
if (!infile.empty()) {
if (infile == "-") {
graph.deserialize(std::cin);
} else {
utils::handle_gfa_odgi_input(infile, "crush", args::get(progress), num_threads, graph);
}
}
}

graph.set_number_of_threads(num_threads);
algorithms::crush_n(graph);

{
const std::string outfile = args::get(og_out_file);
if (!outfile.empty()) {
if (outfile == "-") {
graph.serialize(std::cout);
} else {
ofstream f(outfile.c_str());
graph.serialize(f);
f.close();
}
}
}

return 0;
}

static Subcommand odgi_crush("crush", "Crush runs of N.",
PIPELINE, 3, main_crush);


}

0 comments on commit 3386c77

Please sign in to comment.