diff --git a/.gitignore b/.gitignore
index 549bfa6..0e6781d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,6 @@
 /libs/winflexbison
 /src/clusty/x64/Debug/clusty.tlog
 /src/clusty/x64/Debug
-/src
+/src/clusty/
+
+/src/clusty.vcxproj.user
diff --git a/makefile b/makefile
index 02d476f..2b638bf 100644
--- a/makefile
+++ b/makefile
@@ -101,9 +101,10 @@ OBJS := \
 	$(MIMALLOC_OBJ) \
 	$(MAIN_DIR)/console.o \
 	$(MAIN_DIR)/conversion.o \
-	$(MAIN_DIR)/distances.o \
+	$(MAIN_DIR)/graph.o \
 	$(MAIN_DIR)/log.o \
 	$(MAIN_DIR)/main.o \
+	$(MAIN_DIR)/params.o \
 
 %.o: %.cpp igraph
 	$(CXX) $(CFLAGS) -c $< -o $@
diff --git a/src/cd_hit.h b/src/cd_hit.h
index 0791f20..cf7f070 100644
--- a/src/cd_hit.h
+++ b/src/cd_hit.h
@@ -17,12 +17,12 @@
 #include <vector>
 #include <unordered_map>
 
-template <class DistanceMatrix>
-class CdHit : public IClustering<DistanceMatrix> {
+template <class Distance>
+class CdHit : public IClustering<Distance> {
 public:
 
 	int operator()(
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		double threshold,
 		std::vector<int>& assignments) override {
@@ -41,10 +41,10 @@ class CdHit : public IClustering<DistanceMatrix> {
 				assignments[obj] = cluster_id;
 
 				// iterate over connected object and assign those which are unassigned
-				for (const dist_t* edge = distances.begin(obj); edge < distances.end(obj); ++edge) {
-					int other = edge->u.s.hi;
+				for (const Distance* edge = distances.begin(obj); edge < distances.end(obj); ++edge) {
+					int other = edge->get_id();
 
-					if (edge->d <= threshold && assignments[other] == -1) {
+					if (edge->get_d() <= threshold && assignments[other] == -1) {
 						assignments[other] = cluster_id;
 					}	
 				}
diff --git a/src/clustering.h b/src/clustering.h
index 58e92eb..f035e4f 100644
--- a/src/clustering.h
+++ b/src/clustering.h
@@ -10,8 +10,9 @@
 
 #include <vector>
 #include <numeric>
+#include <algorithm>
 
-#include "distances.h"
+#include "sparse_matrix.h"
 
 struct node_t {
 	int first = -1;
@@ -23,12 +24,12 @@ struct node_t {
 		: first(first), second(second), distance(distance) {}
 };
 
-template <class DistanceMatrix>
+template <class Distance>
 class IClustering {
 public:
 
 	virtual int operator()(
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		double threshold,
 		std::vector<int>& assignments) = 0;
@@ -37,12 +38,12 @@ class IClustering {
 
 };
 
-template <class DistanceMatrix>
-class HierarchicalClustering : public IClustering<DistanceMatrix> {
+template <class Distance>
+class HierarchicalClustering : public IClustering<Distance> {
 protected:
 	
 	void makeDendrogram(
-		const std::vector<dist_t>& lambda,
+		const std::vector<Distance>& lambda,
 		const std::vector<int>& pi,
 		std::vector<node_t>& dendrogram) 
 	{
@@ -51,7 +52,7 @@ class HierarchicalClustering : public IClustering<DistanceMatrix> {
 		std::vector<int> elements(n_objects - 1);
 		std::iota(elements.begin(), elements.end(), 0);
 
-		stable_sort(elements.begin(), elements.end(), [&lambda](int x, int y) {
+		std::stable_sort(elements.begin(), elements.end(), [&lambda](int x, int y) {
 			return lambda[x] < lambda[y];
 			});
 
@@ -64,7 +65,7 @@ class HierarchicalClustering : public IClustering<DistanceMatrix> {
 		for (int i = 0; i < n_objects - 1; ++i) {
 			int j = elements[i];
 			int next = pi[j];
-			dendrogram.emplace_back(index[j], index[next], lambda[j].d);
+			dendrogram.emplace_back(index[j], index[next], lambda[j].get_d());
 			index[next] = n_objects + i;
 		}
 	}
diff --git a/src/clusty.vcxproj b/src/clusty.vcxproj
index dcb0fdc..1ef1412 100644
--- a/src/clusty.vcxproj
+++ b/src/clusty.vcxproj
@@ -133,7 +133,7 @@
       <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <BufferSecurityCheck>false</BufferSecurityCheck>
-      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard>stdcpplatest</LanguageStandard>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
@@ -151,7 +151,7 @@
       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <BufferSecurityCheck>false</BufferSecurityCheck>
-      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard>stdcpplatest</LanguageStandard>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -165,9 +165,10 @@
     <ClCompile Include="..\libs\mimalloc\src\static.c" />
     <ClCompile Include="console.cpp" />
     <ClCompile Include="conversion.cpp" />
-    <ClCompile Include="distances.cpp" />
+    <ClCompile Include="graph.cpp" />
     <ClCompile Include="log.cpp" />
     <ClCompile Include="main.cpp" />
+    <ClCompile Include="params.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="cd_hit.h" />
@@ -179,8 +180,13 @@
     <ClInclude Include="log.h" />
     <ClInclude Include="memory_monotonic.h" />
     <ClInclude Include="distances.h" />
+    <ClInclude Include="graph.h" />
+    <ClInclude Include="params.h" />
     <ClInclude Include="set_cover.h" />
     <ClInclude Include="single_bfs.h" />
+    <ClInclude Include="sparse_matrix.h" />
+    <ClInclude Include="graph_named.h" />
+    <ClInclude Include="graph_numbered.h" />
     <ClInclude Include="uclust.h" />
     <ClInclude Include="utils.h" />
     <ClInclude Include="version.h" />
diff --git a/src/clusty.vcxproj.filters b/src/clusty.vcxproj.filters
index 87e2276..b5b6ce2 100644
--- a/src/clusty.vcxproj.filters
+++ b/src/clusty.vcxproj.filters
@@ -13,11 +13,12 @@
     <ClCompile Include="main.cpp" />
     <ClCompile Include="console.cpp" />
     <ClCompile Include="conversion.cpp" />
-    <ClCompile Include="distances.cpp" />
+    <ClCompile Include="graph.cpp" />
     <ClCompile Include="..\libs\mimalloc\src\static.c">
       <Filter>Library Files</Filter>
     </ClCompile>
     <ClCompile Include="log.cpp" />
+    <ClCompile Include="params.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="distances.h" />
@@ -34,5 +35,10 @@
     <ClInclude Include="set_cover.h" />
     <ClInclude Include="single_bfs.h" />
     <ClInclude Include="log.h" />
+    <ClInclude Include="graph_named.h" />
+    <ClInclude Include="graph_numbered.h" />
+    <ClInclude Include="sparse_matrix.h" />
+    <ClInclude Include="graph.h" />
+    <ClInclude Include="params.h" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/src/console.cpp b/src/console.cpp
index 7c9131d..a7b8eab 100644
--- a/src/console.cpp
+++ b/src/console.cpp
@@ -7,109 +7,207 @@
 // *******************************************************************************************
 
 #include "console.h"
-#include "distances.h"
 #include "log.h"
 
+#include "graph_named.h"
+#include "graph_numbered.h"
+#include "sparse_matrix.h"
+
+#define VAL(str) #str
+#define TOSTRING(str) VAL(str)
+#include "version.h"
+
 #include <vector>
+#include <memory>
+#include <chrono>
 #include <fstream>
-#include <iostream>
+#include <string>
 
 using namespace std;
 
-void Console::printUsage() const {
-	LOG_NORMAL << "Usage:" << endl
-		<< "clusty [options] <distances> <assignments>" << endl << endl
-		<< "Parameters:" << endl
-		<< "  <distances> - input TSV/CSV table with pairwise distances" << endl
-		<< "  <assignments> - output TSV/CSV table with assignments" << endl << endl
-		<< "Options:" << endl
-		<< "  " + PARAM_FILE_OBJECTS + " <string> - optional TSV/CSV file with object names in the first column sorted decreasingly w.r.t. representativness" << endl
-		<< "  " + PARAM_ALGO + " <single | complete | uclust | set-cover | cd-hit | leiden> - clustering algorithm:" << endl
-		<< "    * single     - single linkage (connected component, MMseqs mode 1)" << endl
-		<< "    * complete   - complete linkage" << endl
-		<< "    * uclust     - UCLUST" << endl
-		<< "    * set-cover  - greedy set cover (MMseqs mode 0)" << endl
-		<< "    * cd-hit     - CD-HIT (greedy incremental; MMseqs mode 2)" << endl 
-		<< "    * leiden     - Leiden algorithm" << endl << endl
-
-		<< "  " + PARAM_ID_COLUMNS + " <column-name1> <column-name2> - names of columns with sequence identifiers (default: two first columns)" << endl
-		<< "  " + PARAM_DISTANCE_COLUMN + " <column-name> - name of the column with pairwise distances (or similarities; default: third column)" << endl
-		<< "  " + FLAG_SIMILARITY + " - use similarity instead of distances (has to be in [0,1] interval; default: false)" << endl
-		<< "  " + FLAG_PERCENT_SIMILARITY + " - use percent similarity (has to be in [0,100] interval; default: false)" << endl
-		<< "  " + PARAM_MIN + " <column-name> <real-threshold> - accept pairwise connections with values greater or equal given threshold in a specified column" << endl
-		<< "  " + PARAM_MAX + " <column-name> <real-threshold> - accept pairwise connections with values lower or equal given threshold in a specified column" << endl
-		<< "  " + FLAG_NUMERIC_IDS + " - use when sequences in the distances file are represented by numbers (can be mapped to string ids by the object file)" << endl
-		<< "  " + FLAG_OUT_REPRESENTATIVES + " - output a representative object for each cluster instead of a cluster numerical identifier (default: " << std::boolalpha << outputRepresentatives << ")" << endl
-		<< "  " + FLAG_OUT_CSV + " - output a CSV table instead of a default TSV (default: " << std::boolalpha << outputCSV <<")" 
-
-#ifndef NO_LEIDEN
-		<< endl << endl
-		<< "  " + PARAM_LEIDEN_RESOLUTION + " - resolution parameter for Leiden algorithm (default: " << leidenParams.resolution << ")" << endl
-		<< "  " + PARAM_LEIDEN_BETA + " - beta parameter for Leiden algorithm (default: " << leidenParams.beta << ")" << endl
-		<< "  " + PARAM_LEIDEN_RESOLUTION + " - number of interations for Leiden algorithm (default: " << leidenParams.numIterations << ")" 
-#endif
-		<< endl << endl;
-}
+// *******************************************************************************************
+bool Console::init(int argc, char** argv, Params& params) {
+	Log::getInstance(Log::LEVEL_NORMAL).enable();
 
+	LOG_NORMAL << "Clusty" << endl
+		<< "  version " << VERSION
+#ifdef GIT_COMMIT
+		<< "-" << TOSTRING(GIT_COMMIT)
+#endif
+		<< " (" << DATE << ")" << endl << endl;
 
-bool Console::parse(int argc, char** argv) {
+	if (!params.parse(argc, argv)) {
+		params.printUsage();
+		return false;
+	}
 
-	vector<string> args;
-	for (int i = 1; i < argc; ++i) {
-		args.emplace_back(argv[i]);
+	if (params.verbose) {
+		Log::getInstance(Log::LEVEL_VERBOSE).enable();
 	}
 
-	if (args.size() < 2) {
-		return false;
+	return true;
+}
+
+// *******************************************************************************************
+std::unique_ptr<Graph> Console::loadGraph(const Params& params) {
+	
+	unique_ptr<Graph> graph;
+
+	if (params.numericIds) {
+		if (needDistances(params.algo)) {
+			graph = make_unique<GraphNumbered<dist_t>>();
+		}
+		else {
+			graph = make_unique<GraphNumbered<mini_dist_t>>();
+		}
+	}
+	else {
+		if (needDistances(params.algo)) {
+			graph = make_unique<GraphNamed<dist_t>>();
+		}
+		else {
+			graph = make_unique<GraphNamed<mini_dist_t>>();
+		}
 	}
 
-	std::string tmp;
-	findOption(args, PARAM_ALGO, tmp);
-	if (tmp.length()) {
-		algo = str2algo(tmp);
+	LOG_NORMAL << "Loading pairwise distances from " << params.distancesFile << "... ";
+	auto t = std::chrono::high_resolution_clock::now();
+
+	vector<char> filebuf(128ULL << 20);  // 128MB buffer
+	ifstream ifs;
+	ifs.rdbuf()->pubsetbuf(filebuf.data(), filebuf.size());
+	ifs.open(params.distancesFile, ios_base::binary);
+
+	if (!ifs) {
+		throw std::runtime_error("Unable to open distance file");
 	}
 
-	findOption(args, PARAM_FILE_OBJECTS, objectsFile);
+	map<DistanceSpecification, distance_transformation_t> transforms{
+		{ DistanceSpecification::Distance, [](double d) { return d; } },
+		{ DistanceSpecification::Similarity,		[](double d) { return 1.0 - d; } },
+		{ DistanceSpecification::PercentSimilarity, [](double d) { return 1.0 - d * 0.01; } },
+	};
 
-	findOption(args, PARAM_ID_COLUMNS, idColumns.first, idColumns.second);
-	numericIds = findSwitch(args, FLAG_NUMERIC_IDS);
+	size_t n_total_dists = graph->load(ifs, params.idColumns, params.distanceColumn,
+		transforms[params.distanceSpecification], params.columns2filters);
+	
+	auto dt = std::chrono::high_resolution_clock::now() - t;
 
-	findOption(args, PARAM_DISTANCE_COLUMN, distanceColumn);
+	ifs.close();
+	LOG_NORMAL << endl
+		<< "  input graph: " << graph->getNumInputVertices() << " nodes, " << n_total_dists << " edges" << endl
+		<< "  filtered graph: " << graph->getNumVertices() << " nodes, " << graph->getNumEdges() << " edges" << endl
+		<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
 
-	bool use_similarity = findSwitch(args, FLAG_SIMILARITY);
-	bool use_percent_similarity = findSwitch(args, FLAG_PERCENT_SIMILARITY);
+	return graph;
+}
 
-	if (use_percent_similarity) {
-		distanceSpecification = DistanceSpecification::PercentSimilarity;
-	} else if (use_similarity) {
-		distanceSpecification = DistanceSpecification::Similarity;
+// *******************************************************************************************
+void Console::loadObjects(
+	const Params& params,
+	const Graph& graph,
+	std::vector<int>& objects,
+	std::vector<std::string>& names) {
+
+	names.clear();
+	objects.resize(graph.getNumVertices());
+	std::iota(objects.begin(), objects.end(), 0);
+	ifstream ifs;
+
+	if (!params.objectsFile.empty()) {
+		LOG_NORMAL << "Loading objects from " << params.objectsFile << "... ";
+		ifs.open(params.objectsFile);
+
+		if (ifs) {
+			auto t = std::chrono::high_resolution_clock::now();
+			string token;
+			token.reserve(16 << 10); // 16k buffer
+
+			// omit header
+			getline(ifs, token);
+			auto is_sep = [](char c) {return c == ',' || c == '\t' || c == '\r' || c == '\t'; };
+
+			while (getline(ifs, token)) {
+				auto it = find_if(token.begin(), token.end(), is_sep);
+				if (it != token.begin()) {
+					names.emplace_back(token.begin(), it);
+				}
+			}
+
+			ifs.close();
+			LOG_NORMAL << endl;
+
+			graph.reorderObjects(names, objects);
+
+			auto dt = std::chrono::high_resolution_clock::now() - t;
+			LOG_NORMAL << "  total objects: " << names.size() << endl
+				<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
+		}
+		else {
+			throw std::runtime_error("Unable to open objects file");
+		}
 	}
+}
+
 
-	string column;
-	double value;
-	while (findOption(args, PARAM_MIN, column, value)) { 
-		columns2filters[column].min = std::max(value, columns2filters[column].min);
+// *******************************************************************************************
+void Console::doClustering(
+	const Params& params,
+	Graph& graph,
+	const std::vector<int>& objects,
+	std::vector<int>& assignments)
+{
+	assignments.clear();
+	
+	LOG_NORMAL << "Clustering (algorithm: " << Params::algo2str(params.algo) << ")... ";
+
+	auto t = std::chrono::high_resolution_clock::now();
+	double threshold = std::nexttoward(std::numeric_limits<double>::max(), 0.0);
+	int n_clusters = 0;
+
+	if (needDistances(params.algo)) {
+		auto clustering = createClusteringAlgo<dist_t>(params.algo);
+		IMatrix& mat = graph.getMatrix();
+		SparseMatrix<dist_t>& distances = static_cast<SparseMatrix<dist_t>&>(mat);
+		n_clusters = (*clustering)(distances, objects, threshold, assignments);
 	}
-	while (findOption(args, PARAM_MAX, column, value)) { 
-		columns2filters[column].max = std::min(value, columns2filters[column].max);
+	else {
+		auto clustering = createClusteringAlgo<mini_dist_t>(params.algo);
+		IMatrix& mat = graph.getMatrix();
+		SparseMatrix<mini_dist_t>& distances = static_cast<SparseMatrix<mini_dist_t>&>(mat);
+		n_clusters = (*clustering)(distances, objects, threshold, assignments);
 	}
 
-	outputRepresentatives = findSwitch(args, FLAG_OUT_REPRESENTATIVES);
-	outputCSV = findSwitch(args, FLAG_OUT_CSV);
+	auto dt = std::chrono::high_resolution_clock::now() - t;
+	
+	LOG_NORMAL << endl
+		<< "  objects: " << graph.getNumVertices() << ", clusters: " << n_clusters << endl
+		<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
+}
 
-	// leiden parameters
-	findOption(args, PARAM_LEIDEN_RESOLUTION, leidenParams.resolution);
-	findOption(args, PARAM_LEIDEN_BETA, leidenParams.beta);
-	findOption(args, PARAM_LEIDEN_ITERATIONS, leidenParams.numIterations);
+// *******************************************************************************************
+void Console::saveAssignments(
+	const Params& params,
+	const Graph& graph,
+	const std::vector<std::string>& names,
+	const std::vector<int>& assignments) {
 
-	verbose = findSwitch(args, FLAG_VERBOSE);
+	LOG_NORMAL << "Saving clusters... ";
+	auto t = std::chrono::high_resolution_clock::now();
 
-	if (args.size() == 2) {
-		distancesFile = args[0];
-		output = args[1];
-		return true;
-	}
+	char sep = params.outputCSV ? ',' : '\t';
 
-	return false;
-}
+	ofstream ofs(params.output);
+	int n_total_clusters = 0;
+	if (params.outputRepresentatives) {
+		n_total_clusters = graph.saveRepresentatives(ofs, names, assignments, sep);
+	}
+	else {
+		n_total_clusters = graph.saveAssignments(ofs, names, assignments, sep);
+	}
 
+	auto dt = std::chrono::high_resolution_clock::now() - t;
+	LOG_NORMAL << endl
+		<< "  total clusters (including singletons): " << n_total_clusters << endl
+		<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
+}
\ No newline at end of file
diff --git a/src/console.h b/src/console.h
index 747bdb8..ca7f3df 100644
--- a/src/console.h
+++ b/src/console.h
@@ -1,5 +1,3 @@
-#pragma once
-
 // *******************************************************************************************
 // This file is a part of Clusty software distributed under GNU GPL 3 license.
 // The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
@@ -7,147 +5,79 @@
 // Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
 //
 // *******************************************************************************************
-
-#include "distances.h"
+#pragma once
+#include "params.h"
+#include "graph.h"
+
+#include "linkage_heaptrix.h"
+#include "uclust.h"
+#include "set_cover.h"
+#include "single_bfs.h"
+#include "cd_hit.h"
 #include "leiden.h"
 
+#include <memory>
+#include <vector>
 #include <string>
-#include <stdexcept>
-#include <sstream>
-#include <map>
-
-enum class Algo {
-	SingleLinkage,
-	CompleteLinkage,
-	UClust,
-	SetCover,
-	CdHit,
-	Leiden
-};
-
-enum class DistanceSpecification {
-	Distance,
-	Similarity,
-	PercentSimilarity
-};
 
 class Console {
-	const std::string PARAM_ALGO{ "--algo" };
-
-	const std::string PARAM_FILE_OBJECTS{"--objects-file"};
-	
-	const std::string PARAM_ID_COLUMNS{"--id-cols"};
-	const std::string FLAG_NUMERIC_IDS{"--numeric-ids"};
-	const std::string PARAM_DISTANCE_COLUMN{"--distance-col"};
-	const std::string FLAG_SIMILARITY{"--similarity"};
-	const std::string FLAG_PERCENT_SIMILARITY{"--percent-similarity"};
-
-	const std::string PARAM_MAX{"--max"};
-	const std::string PARAM_MIN{"--min"};
-
-	const std::string FLAG_OUT_REPRESENTATIVES{"--out-representatives"};
-	const std::string FLAG_OUT_CSV{"--out-csv"};
-
-	const std::string PARAM_LEIDEN_RESOLUTION{"--leiden-resolution"};
-	const std::string PARAM_LEIDEN_BETA{"--leiden-beta"};
-	const std::string PARAM_LEIDEN_ITERATIONS{"--leiden-iterations"};
-
-	const std::string FLAG_VERBOSE{ "-v" };
+	Params params;
 
 public:		
-	static Algo str2algo(const std::string& str) 
-	{
-		if (str == "single")				{ return Algo::SingleLinkage;   }
-		else if (str == "complete")			{ return Algo::CompleteLinkage; }
-		else if (str == "uclust")           { return Algo::UClust;          }
-		else if (str == "set-cover")        { return Algo::SetCover;       }
-		else if (str == "cd-hit")			{ return Algo::CdHit;			}
-		else if (str == "leiden")			{ return Algo::Leiden; }
-		
-		else { throw std::runtime_error("Unkown clustering algorithm"); }
-	}
-
-	static std::string algo2str(Algo algo) {
-		switch (algo) {
-		case Algo::SingleLinkage: return "single";
-		case Algo::CompleteLinkage: return "complete";
-		case Algo::UClust: return "uclust";
-		case Algo::SetCover: return "set-cover";
-		case Algo::Leiden: return "leiden";
-		case Algo::CdHit: return "cd-hit";
-		default: throw std::runtime_error("Unkown clustering algorithm");
-		}
-	}
-
-
-public:
-
-	std::string objectsFile;
-	std::string distancesFile;
-	std::string output;
-
-	Algo algo{ Algo::SingleLinkage };
-
-	std::pair<std::string, std::string> idColumns;
-	bool numericIds{ false };
-
-	std::string distanceColumn;
-	DistanceSpecification distanceSpecification{ DistanceSpecification::Distance };
+	bool init(int argc, char** argv, Params& params);
 	
-	std::map<std::string, ColumnFilter> columns2filters;
-	bool outputRepresentatives{ false };
-	bool outputCSV{ false };
-
-	LeidenParams leidenParams;
-
-	bool verbose{ false };
-
-	void printUsage() const;
-	bool parse(int argc, char** argv);
-
-	bool findSwitch(std::vector<std::string>& params, const std::string& name) {
-		auto it = find(params.begin(), params.end(), name); // verbose mode
-		if (it != params.end()) {
-			params.erase(it);
-			return true;
-		}
-
-		return false;
+	std::unique_ptr<Graph> loadGraph(const Params& params);
+	
+	void loadObjects(
+		const Params& params,
+		const Graph& graph,
+		std::vector<int>& objects,
+		std::vector<std::string>& names);
+
+	void doClustering(
+		const Params& params,
+		Graph& graph,
+		const std::vector<int>& objects,
+		std::vector<int>& assignments);
+
+	void saveAssignments(
+		const Params& params,
+		const Graph& graph,
+		const std::vector<std::string>& names,
+		const std::vector<int>& assignments);
+
+protected:
+
+	bool needDistances(Algo algo) const { 
+		return (algo == Algo::CompleteLinkage || algo == Algo::Leiden || algo == Algo::UClust);
 	}
 
-	template <typename T>
-	bool findOption(std::vector<std::string>& params, const std::string& name, T& v) {
-		if (params.size() < 2) {
-			return false;
-		}
+	template <class Distance>
+	std::unique_ptr<IClustering<Distance>> createClusteringAlgo(Algo algo) {
 		
-		auto stop = std::prev(params.end());
-		auto it = find(params.begin(), stop, name); // verbose mode
-		if (it != stop) {
-			std::istringstream iss(*std::next(it));
-			if (iss >> v) {
-				params.erase(it, it + 2);
-				return true;
-			}
+		std::unique_ptr<IClustering<Distance>> clustering;
+
+		switch (algo)
+		{
+			
+		case Algo::SingleLinkage:
+			clustering = std::make_unique<SingleLinkageBFS<Distance>>(); break;
+		case Algo::CompleteLinkage:
+			clustering = std::make_unique<CompleteLinkage<Distance>>(); break;
+		case Algo::UClust:
+			clustering = std::make_unique<UClust<Distance>>(); break;
+		case Algo::SetCover:
+			clustering = std::make_unique<SetCover<Distance>>(); break;
+		case Algo::CdHit:
+			clustering = std::make_unique<CdHit<Distance>>(); break;
+		case Algo::Leiden:
+			clustering = std::make_unique<Leiden<Distance>>(params.leidenParams); break;
+			
+		default:
+			throw std::runtime_error("Unkown clustering algorithm");
 		}
-		return false;
-	}
 
-	template <typename T, typename U>
-	bool findOption(std::vector<std::string>& params, const std::string& name, T& value1, U& value2) {
-		if (params.size() < 3) {
-			return false;
-		}
-		
-		auto stop = std::prev(params.end(), 2);
-		auto it = find(params.begin(), stop, name); // verbose mode
-		if (it != stop) {
-			if (std::istringstream(*std::next(it)) >> value1 
-				&& std::istringstream(*std::next(it, 2)) >> value2) {
-				params.erase(it, it + 3);
-				return true;
-			}
-		}
-		return false;
+		return clustering;
 	}
+
 };
diff --git a/src/distances.cpp b/src/distances.cpp
deleted file mode 100644
index e0ea0c6..0000000
--- a/src/distances.cpp
+++ /dev/null
@@ -1,735 +0,0 @@
-// *******************************************************************************************
-// This file is a part of Clusty software distributed under GNU GPL 3 license.
-// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
-//
-// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
-//
-// *******************************************************************************************
-
-#include "distances.h"
-#include "log.h"
-
-#include <iostream>
-#include <limits>
-#include <unordered_map>
-
-using namespace std;
-
-const double dist_t::MAX = std::numeric_limits<double>::max();
-
-/*********************************************************************************************************************/
-void SparseMatrix::processHeader(
-	std::ifstream& ifs,
-	const std::pair<std::string, std::string>& idColumns,
-	const std::string& distanceColumn,
-	std::map<std::string, ColumnFilter>& columns2filters,
-	int idColumnsOut[2],
-	int& distanceColumnOut,
-	std::vector<ColumnFilter>& filters) {
-
-	std::string line;
-	std::getline(ifs, line);
-
-	std::replace(line.begin(), line.end(), ',', ' ');
-	std::istringstream iss(line);
-	std::vector<std::string> columns;
-	std::copy(std::istream_iterator<std::string>(iss), std::istream_iterator<std::string>(), std::back_inserter(columns));
-
-	if (columns.size() < 3) {
-		throw std::runtime_error("Error loading distances: at least three columns are required");
-	}
-
-	// default column identifiers
-	int col_ids[]{ 0, 1 };
-	int col_distance = 2; // by default use 3rd column as the one with distance 
-
-	int n_columns = (int)columns.size();
-
-	if (!idColumns.first.empty()) {
-		col_ids[0] = std::find(columns.begin(), columns.end(), idColumns.first) - columns.begin();
-		col_ids[1] = std::find(columns.begin(), columns.end(), idColumns.second) - columns.begin();
-
-		if (col_ids[0] == n_columns || col_ids[1] == n_columns) {
-			throw std::runtime_error("Error loading distances: id columns not found");
-		}
-
-		if (col_ids[0] > col_ids[1]) {
-			std::swap(col_ids[0], col_ids[1]);
-		}
-
-	}
-
-	if (!distanceColumn.empty()) {
-		col_distance = std::find(columns.begin(), columns.end(), distanceColumn) - columns.begin();
-		if (col_distance == n_columns) {
-			throw std::runtime_error("Error loading distances: " + distanceColumn + " column not found");
-		}
-	}
-
-	filters.resize(columns.size());
-
-	for (const auto& f : columns2filters) {
-		int col = std::find(columns.begin(), columns.end(), f.first) - columns.begin();
-		if (col == n_columns) {
-			throw std::runtime_error("Error loading distances: " + f.first + " column not found");
-		}
-		filters[col] = f.second;
-		filters[col].enabled = true;
-	}
-
-	idColumnsOut[0] = col_ids[0];
-	idColumnsOut[1] = col_ids[1];
-	distanceColumnOut = col_distance;
-}
-
-/*********************************************************************************************************************/
-size_t SparseMatrixNamed::load(
-	std::ifstream& ifs,
-	const std::pair<std::string, std::string>& idColumns,
-	const std::string& distanceColumn,
-	distance_transformation_t transform,
-	std::map<std::string, ColumnFilter>& columns2filters) {
-
-	size_t n_total_distances = 0;
-	const size_t N = 512ULL << 20; // 128MB buffer
-	char* buf = new char[N];
-	char* buf_end = buf + N;
-
-	// get header
-	int col_ids[]{ 0, 1 };
-	int col_distance = 2; // by default use 3rd column as the one with distance 
-	std::vector<ColumnFilter> filters;
-	processHeader(ifs, idColumns, distanceColumn, columns2filters, col_ids, col_distance, filters);
-	int n_columns = filters.size();
-
-	auto is_sep = [](char c) {return c == ',' || c == '\t' || c == '\r' || c == '\t'; };
-	auto is_newline = [](char c) {return c == '\r' || c == '\n'; };
-
-	namesBuffer = new char[1LL << 30]; // 1 GB buffer for names
-	char* raw_ptr = namesBuffer;
-
-	// assume space for 8M objects
-	distances.reserve(8LL << 20); 
-
-	bool continueReading = true;
-	char* place = buf;
-
-	while (continueReading) {
-		size_t n_wanted = buf_end - place;
-		ifs.read(place, n_wanted);
-		size_t n_read = ifs.gcount();
-		char* block_end = place + n_read;
-		int offset = 0;
-
-		// no more data
-		if (n_read < n_wanted) {
-			continueReading = false;
-		}
-		else {
-			// find last newline
-			while (!is_newline(*(block_end - 1))) {
-				--block_end;
-				++offset;
-			}
-		}
-
-		//LOG_DEBUG << "portion: " << n_read << ", offset: " << offset << ", carryOn: " << carryOn << endl;
-
-		// pass through buffer
-		char* line = buf;
-		while (true) {
-			char* line_end = std::find_if(line, block_end, is_newline);
-
-			// no more lines
-			if (line_end == block_end) {
-				break;
-			}
-
-			++n_total_distances;
-
-			char* p = line;
-			decltype(names2ids)::iterator its[2];
-			double d = std::numeric_limits<double>::max();
-
-			int k = 0;
-			bool carryOn = true;
-
-			for (int c = 0; c < n_columns; ++c) {
-				char* q = std::find_if(p, line_end, is_sep); // support both tsv and csv files
-				*q = 0;
-
-				if (k < 2 && c == col_ids[k]) {
-					char* name = p;
-					
-					// store name in hashtable
-					its[k] = names2ids.find(name);
-
-					if (its[k] == names2ids.end()) {
-						
-						char* localName = raw_ptr;
-						while (*raw_ptr++ = *name++) {}
-						auto it_and_flag = names2ids.insert({ localName, {-1, names2ids.size()} }); // -1 indicate singleton
-						its[k] = it_and_flag.first;
-					}
-					
-					++k;
-				}
-				else if (c == col_distance || filters[c].enabled) {
-					double value = Conversions::strtod(p, &p);
-					
-					if (c == col_distance) {
-						d = transform(value); 	// convert similarity to distance if neccessary
-					}
-
-					// check distance condition
-					if (value < filters[c].min || value > filters[c].max) {
-						carryOn = false;
-						break;
-					}
-				}
-				
-				p = q + 1;
-			}
-
-			// move to the next line
-			line = std::find_if(line_end, block_end, [](char c) { return c != '\r' && c != '\n' && c != 0; });
-
-			// do not consider rows not fulfilling conditions
-			if (carryOn == false) {
-				continue;
-			}
-
-			// translate seq names to numerical ids
-			for (int k = 0; k < 2; ++k) {
-				auto it = its[k];
-
-				// if name not mapped to numerical ids
-				if (it->second.first == -1) {
-					ids2names.push_back(it->first);
-					it->second.first = ids2names.size() - 1;
-				}
-			}
-
-			uint32_t i = std::min(its[0]->second.first, its[1]->second.first);
-			uint32_t j = std::max(its[0]->second.first, its[1]->second.first);
-
-			// omit diagonal elements - they are assumed to have 0 distance
-			if (i == j) {
-				continue;
-			}
-
-			if (distances.size() <= j) {
-				distances.resize(j + 1);
-			}
-
-			auto& Di = distances[i];
-			auto& Dj = distances[j];
-
-			// extend capacity by factor 1.5 with 16 as an initial state
-			if (Di.capacity() == Di.size()) {
-				Di.reserve(Di.capacity() == 0 ? 16 : size_t(Di.capacity() * 1.5)); 
-			}
-
-			if (Dj.capacity() == Dj.size()) {
-				Dj.reserve(Dj.capacity() == 0 ? 16 : size_t(Dj.capacity() * 1.5)); 
-			}
-
-			Di.emplace_back(i, j, d);
-			Dj.emplace_back(j, i, d);
-		}
-
-		// copy remaining part after consuming all the lines
-		if (continueReading && offset > 0) {
-			memcpy(buf, block_end, offset);
-			place = buf + offset;
-		}
-		else {
-			place = buf;
-		}
-	}
-
-	// if neccessary, sort distances in rows according to the second id	
-	n_elements = 0;
-
-	for (auto& row : distances) {
-		std::sort(row.begin(), row.end(), [](const dist_t& a, const dist_t& b) { return (a.u.ids == b.u.ids) ? (a.d < b.d) : (a.u.ids < b.u.ids); });
-		auto newEnd = std::unique(row.begin(), row.end(), [](const dist_t& a, const dist_t& b) { return a.u.ids == b.u.ids; });
-		
-		row.erase(newEnd, row.end());
-
-		n_elements += row.size();
-	}
-
-	delete[] buf;
-
-	return n_total_distances;
-}
-
-/*********************************************************************************************************************/
-int SparseMatrixNamed::saveAssignments(
-	std::ofstream& ofs,
-	const std::vector<std::string>& globalNames,
-	const std::vector<int>& assignments,
-	char separator) {
-
-	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
-
-	vector<string> names;
-	if (globalNames.empty()) {
-		names.resize(names2ids.size());
-		vector<pair<const char*, pair<int,int>>> entries(names2ids.size());
-		copy(names2ids.begin(), names2ids.end(), entries.begin());
-		sort(entries.begin(), entries.end(), [](const auto& a, const auto& b) { return a.second.second < b.second.second; });
-
-		transform(entries.begin(), entries.end(), names.begin(), [](const auto& entry) { return string(entry.first); });
-	}
-	else {
-		names = globalNames;
-	}
-		
-	std::vector<int> globalAssignments(names.size());
-
-	int singleton_id = n_clusters;
-
-	for (size_t i = 0; i < names.size(); ++i) {
-
-		int id = get_id(names[i].c_str());
-		if (id == -1) {
-			// not in matrix 
-			globalAssignments[i] = singleton_id++;
-		}
-		else {
-			globalAssignments[i] = assignments[id];
-		}
-	}
-
-	ofs << "object" << separator << "cluster" << endl;
-
-	for (size_t i = 0; i < names.size(); ++i) {
-		ofs << names[i] << separator << globalAssignments[i] << endl;
-	}
-
-	return singleton_id;
-}
-
-/*********************************************************************************************************************/
-int SparseMatrixNamed::saveRepresentatives(
-	std::ofstream& ofs,
-	const std::vector<std::string>& globalNames,
-	const std::vector<int>& assignments,
-	char separator
-) {
-	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
-	
-	std::vector<std::string> names;
-	if (globalNames.empty()) {
-		names.resize(names2ids.size());
-		vector<pair<const char*, pair<int, int>>> entries(names2ids.size());
-		copy(names2ids.begin(), names2ids.end(), entries.begin());
-		sort(entries.begin(), entries.end(), [](const auto& a, const auto& b) { return a.second.second < b.second.second; });
-
-		transform(entries.begin(), entries.end(), names.begin(), [](const auto& entry) { return string(entry.first); });
-	}
-	else {
-		names = globalNames;
-	}
-	
-	std::unordered_map<string, int> names2globalIds;
-
-	for (size_t i = 0; i < names.size(); ++i) {
-		names2globalIds[names[i]] = i;
-	}
-
-	std::vector<int> lowestClusterMembers(n_clusters, std::numeric_limits<int>::max());
-
-	for (size_t i = 0; i < assignments.size(); ++i) {
-		// translate element ids
-		string name = get_name(i);
-		int global_id = names2globalIds[name];
-		int cluster_id = assignments[i];
-
-		if (global_id < lowestClusterMembers[cluster_id]) {
-			lowestClusterMembers[cluster_id] = global_id;
-		}
-	}
-
-	std::vector<string*> representatives(names.size());
-
-	int n_singletons = 0;
-	for (size_t i = 0; i < names.size(); ++i) {
-
-		int id = get_id(names[i].c_str());
-		if (id == -1) {
-			// not in matrix - own representative
-			representatives[i] = &names[i];
-			++n_singletons;
-			//	LOG_DEBUG << names[i] << endl;
-		}
-		else {
-			int cluster_id = assignments[id];
-			int lowest_member = lowestClusterMembers[cluster_id];
-			representatives[i] = &names[lowest_member];
-		}
-	}
-
-	ofs << "object" << separator << "cluster" << endl;
-
-	for (size_t i = 0; i < names.size(); ++i) {
-		ofs << names[i] << separator << *representatives[i] << endl;
-	}
-
-	return n_clusters + n_singletons;
-}
-
-
-/*********************************************************************************************************************/
-size_t SparseMatrixNumbered::load(
-	std::ifstream& ifs,
-	const std::pair<std::string, std::string>& idColumns,
-	const std::string& distanceColumn,
-	distance_transformation_t transform,
-	std::map<std::string, ColumnFilter>& columns2filters) {
-
-	size_t n_total_distances = 0;
-	const size_t N = 512ULL << 20; 
-	char* buf = new char[N];
-	char* buf_end = buf + N - 1;
-		
-	// get header
-	int col_ids[]{ 0, 1 };
-	int col_distance = 2; // by default use 3rd column as the one with distance 
-	std::vector<ColumnFilter> filters;
-	processHeader(ifs, idColumns, distanceColumn, columns2filters, col_ids, col_distance, filters);
-	int n_columns = filters.size();
-
-	auto is_sep = [](char c) {return c == ',' || c == '\t' || c == '\r' || c == '\t'; };
-	auto is_newline = [](char c) {return c == '\r' || c == '\n'; };
-
-	// assume space for 8M objects
-	distances.reserve(8LL << 20);
-	global2local.reserve(8LL << 20);
-	local2global.reserve(8LL << 20);
-
-	bool continueReading = true;
-	char* place = buf;
-
-	while (continueReading) {
-		size_t n_wanted = buf_end - place;
-		ifs.read(place, n_wanted);
-		size_t n_read = ifs.gcount();
-		char* block_end = place + n_read;
-		int offset = 0;
-
-		// no more data
-		if (n_read < n_wanted) {
-			continueReading = false;
-		}
-		else {
-			// find last newline
-			while (!is_newline(*(block_end - 1))) {
-				--block_end;
-				++offset;
-			}
-		}
-
-		//LOG_DEBUG << "portion: " << n_read << ", offset: " << offset << ", carryOn: " << continueReading << endl;
-
-		// pass through buffer
-		char* line = buf;
-		while (true) {
-			char* line_end = std::find_if(line, block_end, is_newline);
-
-			// no more lines
-			if (line_end == block_end) {
-				break;
-			}
-			
-			++n_total_distances;
-			
-			char* p = line;
-			double d = std::numeric_limits<double>::max();
-			int k = 0;
-			bool carryOn = true;
-
-			int global_ids[2];
-
-			for (int c = 0; c < n_columns; ++c) {
-				char* q = std::find_if(p, line_end, is_sep); // support both tsv and csv files
-				*q = 0;
-
-				if (k < 2 && c == col_ids[k]) {
-					global_ids[k] = Conversions::strtol(p, nullptr);
-
-					if (global_ids[k] + 1 > (int)global2local.size()) {
-						global2local.resize(global_ids[k] + 1, -1);
-					}
-					
-					++k;
-				}
-				else if (c == col_distance || filters[c].enabled) {
-					double value = Conversions::strtod(p, &p);
-
-					if (c == col_distance) {
-						d = transform(value); 	// convert similarity to distance if neccessary
-					}
-
-					// check distance condition
-					if (value < filters[c].min || value > filters[c].max) {
-						carryOn = false;
-						break;
-					}
-				}
-
-				p = q + 1;
-			}
-
-			// move to the next line
-			line = std::find_if(line_end, block_end, [](char c) { return c != '\r' && c != '\n' && c != 0; });
-
-			// do not consider rows not fulfilling conditions
-			if (carryOn == false) {
-				continue;
-			}
-	
-			int pair_ids[2];
-
-
-			for (int k = 0; k < 2; ++k) {
-
-				int gid = global_ids[k];
-				int lid = global2local[gid];
-
-				if (lid == -1) {
-					lid = local2global.size();
-					local2global.push_back(gid);
-					global2local[gid] = lid;
-				}
-
-				pair_ids[k] = lid;
-			}
-
-			uint32_t i = std::min(pair_ids[0], pair_ids[1]);
-			uint32_t j = std::max(pair_ids[0], pair_ids[1]);
-
-			// omit diagonal elements - they are assumed to have 0 distance
-			if (i == j) {
-				continue;
-			}
-
-			if (distances.size() <= j) {
-				distances.resize(j + 1);
-			}
-
-			auto& Di = distances[i];
-			auto& Dj = distances[j];
-
-			// extend capacity by factor 1.5 with 16 as an initial state
-			if (Di.capacity() == Di.size()) {
-				Di.reserve(Di.capacity() == 0 ? 16 : size_t(Di.capacity() * 1.5));
-			}
-
-			if (Dj.capacity() == Dj.size()) {
-				Dj.reserve(Dj.capacity() == 0 ? 16 : size_t(Dj.capacity() * 1.5));
-			}
-
-			Di.emplace_back(i, j, d);
-			Dj.emplace_back(j, i, d);
-		}
-
-		// copy remaining part after consuming all the lines
-		if (continueReading && offset > 0) {
-			memcpy(buf, block_end, offset);
-			place = buf + offset;
-		}
-		else {
-			place = buf;
-		}
-	}
-
-	// if neccessary, sort distances in rows according to the second id
-	n_elements = 0;
-
-	for (auto& row : distances) {
-		std::sort(row.begin(), row.end(), [](const dist_t& a, const dist_t& b) { return (a.u.ids == b.u.ids) ? (a.d < b.d) : (a.u.ids < b.u.ids); });
-		auto newEnd = std::unique(row.begin(), row.end(), [](const dist_t& a, const dist_t& b) { return a.u.ids == b.u.ids; });
-
-		row.erase(newEnd, row.end());
-		n_elements += row.size();
-	}
-
-	delete[] buf;
-
-	// Print distance histogram in the verbose mode
-	if (Log::getInstance(Log::LEVEL_VERBOSE).isEnabled()) {
-
-		std::vector<double> histo_bounds{ 0 };
-		double width = 0.001;
-
-		while (histo_bounds.back() < 0.05)
-		{
-			histo_bounds.push_back(histo_bounds.back() + width);
-		}
-		histo_bounds.push_back(std::numeric_limits<double>::max());
-		std::vector<int> histo(histo_bounds.size());
-
-		for (auto& row : distances) {
-			for (const auto& e : row) {
-				for (size_t i = 0; i < histo_bounds.size(); ++i) {
-					if (e.d < histo_bounds[i]) {
-						++histo[i];
-						break;
-					}
-				}
-			}
-		}
-
-		LOG_VERBOSE << endl << "Distance histogram" << endl;
-		for (size_t i = 0; i < histo_bounds.size(); ++i) {
-			LOG_VERBOSE << "  d < " << histo_bounds[i] << ": " << histo[i] << endl;
-		}
-		LOG_VERBOSE << endl;
-	}
-
-	return n_total_distances;
-}
-
-
-/*********************************************************************************************************************/
-int SparseMatrixNumbered::saveAssignments(
-	std::ofstream& ofs,
-	const std::vector<std::string>& globalNames,
-	const std::vector<int>& assignments,
-	char separator) {
-
-	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
-	int singleton_id = n_clusters;
-
-	ofs << "object" << separator << "cluster" << endl;
-
-	if (globalNames.empty()) {
-		for (size_t local_id = 0; local_id < local2global.size(); ++local_id) {
-			ofs << local2global[local_id] << "," << assignments[local_id] << endl;
-		}
-	}
-	else {
-		std::vector<int> globalAssignments(globalNames.size());
-
-	
-		for (size_t i = 0; i < globalNames.size(); ++i) {
-
-			int local_id = get_local_id(i);
-			if (local_id == -1) {
-				// not in matrix 
-				globalAssignments[i] = singleton_id++;
-			}
-			else {
-				globalAssignments[i] = assignments[local_id];
-			}
-		}
-
-		for (size_t i = 0; i < globalNames.size(); ++i) {
-			ofs << globalNames[i] << separator << globalAssignments[i] << endl;
-		}
-	}
-
-	return singleton_id; // return total number of clusters
-}
-
-
-/*********************************************************************************************************************/
-int SparseMatrixNumbered::saveRepresentatives(
-	std::ofstream& ofs,
-	const std::vector<std::string>& globalNames,
-	const std::vector<int>& assignments,
-	char separator
-) {
-
-	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
-	
-	std::vector<std::string> names;
-	if (globalNames.empty()) {
-		names.resize(global2local.size());
-		int i = 0;
-		std::generate(names.begin(), names.end(), [&i]() { return std::to_string(i++); });
-	}
-	else {
-		names = globalNames;
-	}
-
-	std::unordered_map<string, int> names2globalIds;
-
-	for (size_t i = 0; i < names.size(); ++i) {
-		names2globalIds[names[i]] = i;
-	}
-
-	std::vector<int> lowestClusterMembers(n_clusters, std::numeric_limits<int>::max());
-
-	for (int local_id = 0; local_id < (int)assignments.size(); ++local_id) {
-		// translate element ids
-		int global_id = local2global[local_id];
-		int cluster_id = assignments[local_id];
-
-		if (global_id < lowestClusterMembers[cluster_id]) {
-			lowestClusterMembers[cluster_id] = global_id;
-		}
-	}
-
-	std::vector<string> representatives(names.size());
-
-	int n_singletons = 0;
-	for (size_t i = 0; i < names.size(); ++i) {
-
-		int local_id = get_local_id(i);
-		if (local_id == -1) {
-			// not in matrix - own representative
-			representatives[i] = names[i];
-			++n_singletons;
-			//	LOG_DEBUG << names[i] << endl;
-		}
-		else {
-			int cluster_id = assignments[local_id];
-			int lowest_member = lowestClusterMembers[cluster_id];
-			representatives[i] = names[lowest_member];
-		}
-	}
-
-	ofs << "object" << separator << "cluster" << endl;
-
-	for (size_t i = 0; i < names.size(); ++i) {
-		ofs << names[i] << separator << representatives[i] << endl;
-	}
-
-	return n_clusters + n_singletons;
-}
-
-/*********************************************************************************************************************/
-void SparseMatrixNamed::print(std::ostream& out) {
-
-	out.precision(10);
-
-	std::vector<const char*> names(names2ids.size());
-	int i = 0;
-	for (auto q : names2ids) {
-		names[i] = q.first;
-		++i;
-	}
-
-	std::sort(names.begin(), names.end(), [](const char* a, const char* b) {
-		return strcmp(a, b) < 0;
-		});
-
-	for (auto name : names) {
-
-		int i = names2ids[name].first;
-		std::vector<dist_t>& row = distances[i];
-
-		std::sort(row.begin(), row.end(), [this](const dist_t& a, const dist_t& b) {
-			return strcmp(ids2names[a.u.s.hi], ids2names[b.u.s.hi]) < 0;
-			});
-
-		for (auto& p : row) {
-			out << ids2names[p.u.s.lo] << "," << ids2names[p.u.s.hi] << "," << std::fixed << p.d << std::endl;
-		}
-	}
-}
-
diff --git a/src/distances.h b/src/distances.h
index 84e93c6..4521281 100644
--- a/src/distances.h
+++ b/src/distances.h
@@ -7,300 +7,58 @@
 // Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
 //
 // *******************************************************************************************
+#include "conversion.h"
 
-#include <vector>
-#include <unordered_map>
 #include <limits>
 #include <algorithm>
 #include <fstream>
-#include <sstream>
-#include <cstring>
-#include <iomanip>
-#include <map>
-#include <iterator>
+#include <cstdint>
 #include <functional>
 
-#include "conversion.h"
 
+/*********************************************************************************************************************/
 using distance_transformation_t = std::function<double(double)>;
 
-struct ColumnFilter {
-	double min{ std::numeric_limits<double>::lowest() };
-	double max{ std::numeric_limits<double>::max() };
-	bool enabled{ false };
-};
-
-struct dist_t {
-	static const double MAX;
-	
-	union {
-		uint64_t ids;  ///< index (id)
-		struct {
-			uint32_t lo;   ///< numer wiersza
-			uint32_t hi;   ///< numer kolumny
-		} s;
-	} u ;
+/*********************************************************************************************************************/
+#pragma pack(push, 4)
+class dist_t {
 	
-	double d;  ///< distance   std::numeric_limits<double>::max() 
-
-	dist_t() :  d(std::numeric_limits<double>::max()) { u.ids = 0; }
-	dist_t(uint64_t ids, double d) :  d(d) { u.ids = ids; }
-	dist_t(uint64_t i, uint64_t j, double d) :  d(d) { u.s.lo = i;  u.s.hi = j;}
-
-	dist_t(const std::pair<uint64_t, double>& rhs) :  d(rhs.second) { u.ids = rhs.first;}
-
-	static uint64_t pack(uint64_t i, uint64_t j) {
-		if (i >= j) {
-			std::swap(i, j);
-		}
-		return (i << 32ULL) | j;
-	}
-
-	static void unpack(uint64_t packed_ids, uint64_t& lo, uint64_t& hi) {
-		hi = packed_ids & 0xffffffff;
-		lo = packed_ids >> 32ULL;
-	}
-
-	// this is to preserve consistency with similarity variant
-	// - increasingly by distance
-	// - decreasingly by id
-	bool operator<(const dist_t& rhs) const {
-		return (d == rhs.d)
-			? (u.ids > rhs.u.ids)
-			: (d < rhs.d);
-	}
-
-	bool operator<=(const dist_t& rhs) const {
-		return (d == rhs.d)
-			? (u.ids >= rhs.u.ids)
-			: (d <= rhs.d);
-	}
-};
-
-
+	double d;		///< distance   std::numeric_limits<double>::max() 
+	uint32_t id;	// object identifier
 
-class StringHasher {
-	std::hash<char> hasher;
 public:
-	StringHasher() {}
+	dist_t() : d(std::numeric_limits<double>::max()), id(0) {}
+	dist_t(uint32_t id, double d) : d(d), id(id) {}
 
-	size_t operator()(const char* s) const {
-		size_t hs = hasher(*s);
-		++s;
-		while (*s) {
-			hs ^= hasher(*s);
-			++s;
-		}
-		return hs;
-	}
-};
-
-class StringEqual {
-	
-public:
-	StringEqual() {}
+	double get_d() const { return d; }
+	uint32_t get_id() const { return id; }
 
-	bool operator()(const char* a, const char* b) const {
-		return (std::strcmp(a, b) == 0);
+	bool operator<(const dist_t& rhs) const {
+		return (id == rhs.id) ? (d < rhs.d) : (id < rhs.id);
 	}
 };
+#pragma pack(pop)
 
-
-class SparseMatrix
-{
-protected:
-
-	size_t n_elements{ 0 };
-
-	std::vector<std::vector<dist_t>> distances;
-
-	/** a vector of distances */
-	//std::vector<dist_t> distances;
-
-	/** Each row holds a pointer to the first distance in the row. 
-		The last points address right after the collection.
-	*/
-	//std::vector<dist_t*> rows; 
-
+/*********************************************************************************************************************/
+class mini_dist_t {
+	uint32_t id;	// object identifier
 
 public:
+	mini_dist_t() : id(0) {}
+	mini_dist_t(uint32_t id, double d) : id(id) {}
 
-	SparseMatrix() {}
-	
-	virtual ~SparseMatrix() {}
-
-	size_t num_objects() const { return distances.size(); }
-
-	size_t num_elements() const { return n_elements; }
-
-	virtual size_t num_input_objects() const = 0;
-
-	const dist_t* begin(int row_id) const { return distances[row_id].data(); }
-	const dist_t* end(int row_id) const { return distances[row_id].data() + distances[row_id].size(); }
-
-	void clear(int row_id) { std::vector<dist_t>().swap(distances[row_id]); }
-
-	size_t get_num_neighbours(int i) { return distances[i].size(); }
+	double get_d() const { return 0.0; } // mini_dist_t is always below threshold
+	uint32_t get_id() const { return id; }
 
-	dist_t get(uint64_t i, uint64_t j) const 
-	{
-		dist_t v {i, j, dist_t::MAX };
-		auto it = std::lower_bound(begin(i), end(i), v, [](const dist_t& a, const dist_t& b) { return a.u.s.hi < b.u.s.hi; });
-
-		return (it == end(i) || it->u.s.hi != j) ? v : *it;
-	}
-
-	void extract_row(uint32_t row, uint32_t count, dist_t* out) const 
-	{
-		const dist_t* cur = this->begin(row);
-		const dist_t* end = this->end(row);
-		
-		for (uint32_t i = 0; i < count; ++i) 
-		{
-			if (i == row) {
-				out[i].u.s.lo = out[i].u.s.hi = row;
-				out[i].d = 0.0; // diagonal element
-			}
-			else if (cur == end || i < cur->u.s.hi) 
-			{
-				// before current or after end
-				out[i].u.s.lo = row;
-				out[i].u.s.hi = i;
-				out[i].d = std::numeric_limits<double>::max();
-			}
-			else 
-			{
-				out[i] = *cur;
-				++cur;
-			}
-		}
+	bool operator<(const mini_dist_t& rhs) const {
+		return (id < rhs.id);
 	}
-
-	virtual size_t load(
-		std::ifstream& ifs,
-		const std::pair<std::string, std::string>& idColumns,
-		const std::string& distanceColumn,
-		distance_transformation_t transform,
-		std::map<std::string, ColumnFilter>& columns2filters) = 0;
-
-	virtual int saveAssignments(
-		std::ofstream& ofs,
-		const std::vector<std::string>& externalNames,
-		const std::vector<int>& assignments,
-		char separator) = 0;
-
-	virtual int saveRepresentatives(
-		std::ofstream& ofs,
-		const std::vector<std::string>& externalNames,
-		const std::vector<int>& assignments,
-		char separator) = 0;
-
-	virtual void print(std::ostream& out) = 0;
-
-protected:
-
-	void processHeader(
-		std::ifstream& ifs,
-		const std::pair<std::string, std::string>& idColumns,
-		const std::string& distanceColumn,
-		std::map<std::string, ColumnFilter>& columns2filters,
-		int idColumnsOut[2],
-		int& distanceColumnOut,
-		std::vector<ColumnFilter>& filters);
-	
 };
 
-
-
-class SparseMatrixNamed : public SparseMatrix {
-	
-	using ids_pair_t = std::pair<int, int>;
-	
-	std::unordered_map<const char*, ids_pair_t, StringHasher, StringEqual> names2ids;
-	
-	std::vector<const char*> ids2names;
-
-	char* namesBuffer{ nullptr };
-
+/*********************************************************************************************************************/
+class IMatrix {
 public:
-	~SparseMatrixNamed() {
-		delete[] namesBuffer;
-	}
-
-	size_t num_input_objects() const override { return names2ids.size(); }
-
-	int get_id(const char* name) const {
-		auto it = names2ids.find((char*)name);
-		if (it == names2ids.end()) {
-			return -1;
-		}
-		else {
-			return it->second.first;
-		}
-	}
-
-	const char* get_name(int id) const { return ids2names[id]; }
-
-	size_t load(
-		std::ifstream& ifs,
-		const std::pair<std::string, std::string>& idColumns,
-		const std::string& distanceColumn,
-		distance_transformation_t transform,
-		std::map<std::string, ColumnFilter>& columns2filters) override;
-
-	int saveAssignments(
-		std::ofstream& ofs,
-		const std::vector<std::string>& globalNames,
-		const std::vector<int>& assignments,
-		char separator) override;
-
-	int saveRepresentatives(
-		std::ofstream& ofs,
-		const std::vector<std::string>& externalNames,
-		const std::vector<int>& assignments,
-		char separator) override;
-
-	void print(std::ostream& out) override;
-
+	virtual ~IMatrix() {}
 };
 
 
-class SparseMatrixNumbered : public SparseMatrix {
-	
-	std::vector<int> global2local;
-	std::vector<int> local2global;
-
-public:
-
-	int get_local_id(int global_id) {
-		if ((size_t)global_id > global2local.size() - 1) {
-			return -1;
-		}
-		else {
-			return global2local[global_id];
-		}
-	}
-	
-	size_t num_input_objects() const override { return global2local.size(); }
-
-	size_t load(
-		std::ifstream& ifs,
-		const std::pair<std::string, std::string>& idColumns,
-		const std::string& distanceColumn,
-		distance_transformation_t transform,
-		std::map<std::string, ColumnFilter>& columns2filters) override;
-
-	int saveAssignments(
-		std::ofstream& ofs,
-		const std::vector<std::string>& globalNames,
-		const std::vector<int>& assignments,
-		char separator);
-
-	int saveRepresentatives(
-		std::ofstream& ofs,
-		const std::vector<std::string>& externalNames,
-		const std::vector<int>& assignments,
-		char separator);
-
-	void print(std::ostream& out) override {}
-};
diff --git a/src/graph.cpp b/src/graph.cpp
new file mode 100644
index 0000000..18e8aa3
--- /dev/null
+++ b/src/graph.cpp
@@ -0,0 +1,83 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+
+#include "graph.h"
+#include "log.h"
+
+#include <iostream>
+#include <limits>
+#include <unordered_map>
+#include <algorithm>
+#include <iterator>
+
+using namespace std;
+
+/*********************************************************************************************************************/
+void Graph::processHeader(
+	std::ifstream& ifs,
+	const std::pair<std::string, std::string>& idColumns,
+	const std::string& distanceColumn,
+	const std::map<std::string, ColumnFilter>& columns2filters,
+	int idColumnsOut[2],
+	int& distanceColumnOut,
+	std::vector<ColumnFilter>& filters) {
+
+	std::string line;
+	std::getline(ifs, line);
+
+	std::replace(line.begin(), line.end(), ',', ' ');
+	std::istringstream iss(line);
+	std::vector<std::string> columns;
+	std::copy(std::istream_iterator<std::string>(iss), std::istream_iterator<std::string>(), std::back_inserter(columns));
+
+	if (columns.size() < 3) {
+		throw std::runtime_error("Error loading distances: at least three columns are required");
+	}
+
+	// default column identifiers
+	int col_ids[]{ 0, 1 };
+	int col_distance = 2; // by default use 3rd column as the one with distance 
+
+	int n_columns = (int)columns.size();
+
+	if (!idColumns.first.empty()) {
+		col_ids[0] = std::find(columns.begin(), columns.end(), idColumns.first) - columns.begin();
+		col_ids[1] = std::find(columns.begin(), columns.end(), idColumns.second) - columns.begin();
+
+		if (col_ids[0] == n_columns || col_ids[1] == n_columns) {
+			throw std::runtime_error("Error loading distances: id columns not found");
+		}
+
+		if (col_ids[0] > col_ids[1]) {
+			std::swap(col_ids[0], col_ids[1]);
+		}
+	}
+
+	if (!distanceColumn.empty()) {
+		col_distance = std::find(columns.begin(), columns.end(), distanceColumn) - columns.begin();
+		if (col_distance == n_columns) {
+			throw std::runtime_error("Error loading distances: " + distanceColumn + " column not found");
+		}
+	}
+
+	filters.resize(columns.size());
+
+	for (const auto& f : columns2filters) {
+		int col = std::find(columns.begin(), columns.end(), f.first) - columns.begin();
+		if (col == n_columns) {
+			throw std::runtime_error("Error loading distances: " + f.first + " column not found");
+		}
+		filters[col] = f.second;
+		filters[col].enabled = true;
+	}
+
+	idColumnsOut[0] = col_ids[0];
+	idColumnsOut[1] = col_ids[1];
+	distanceColumnOut = col_distance;
+}
+
diff --git a/src/graph.h b/src/graph.h
new file mode 100644
index 0000000..6f65ee1
--- /dev/null
+++ b/src/graph.h
@@ -0,0 +1,73 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+#pragma once
+#include "distances.h"
+
+#include <vector>
+#include <limits>
+#include <fstream>
+#include <cstring>
+#include <map>
+
+// *******************************************************************************************/
+struct ColumnFilter {
+	double min{ std::numeric_limits<double>::lowest() };
+	double max{ std::numeric_limits<double>::max() };
+	bool enabled{ false };
+};
+
+// *******************************************************************************************/
+class Graph {
+
+public:
+	virtual IMatrix& getMatrix() = 0;
+	
+	virtual size_t getNumVertices() const = 0;
+
+	virtual size_t getNumInputVertices() const = 0;
+
+	virtual size_t getNumEdges() const = 0;
+		
+	virtual size_t load(
+		std::ifstream& ifs,
+		const std::pair<std::string, std::string>& idColumns,
+		const std::string& distanceColumn,
+		distance_transformation_t transform,
+		const std::map<std::string, ColumnFilter>& columns2filters) = 0;
+
+	virtual int saveAssignments(
+		std::ofstream& ofs,
+		const std::vector<std::string>& externalNames,
+		const std::vector<int>& assignments,
+		char separator) const = 0;
+
+	virtual int saveRepresentatives(
+		std::ofstream& ofs,
+		const std::vector<std::string>& externalNames,
+		const std::vector<int>& assignments,
+		char separator) const = 0;
+
+	virtual void reorderObjects(
+		const std::vector<std::string>& externalNames,
+		std::vector<int>& objects) const = 0;
+
+	virtual void print(std::ostream& out) const = 0;
+
+	virtual ~Graph() {}
+
+protected:
+
+	void processHeader(
+		std::ifstream& ifs,
+		const std::pair<std::string, std::string>& idColumns,
+		const std::string& distanceColumn,
+		const std::map<std::string, ColumnFilter>& columns2filters,
+		int idColumnsOut[2],
+		int& distanceColumnOut,
+		std::vector<ColumnFilter>& filters);
+};
\ No newline at end of file
diff --git a/src/graph_named.h b/src/graph_named.h
new file mode 100644
index 0000000..22b3c85
--- /dev/null
+++ b/src/graph_named.h
@@ -0,0 +1,456 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+#pragma once
+#include "sparse_matrix.h"
+#include "graph.h"
+
+#include <iostream>
+#include <limits>
+#include <unordered_map>
+#include <vector>
+
+
+
+// *******************************************************************************************
+class StringHasher {
+	std::hash<char> hasher;
+public:
+	StringHasher() {}
+
+	size_t operator()(const char* s) const {
+		size_t hs = hasher(*s);
+		++s;
+		while (*s) {
+			hs ^= hasher(*s);
+			++s;
+		}
+		return hs;
+	}
+};
+
+// *******************************************************************************************
+class StringEqual {
+
+public:
+	StringEqual() {}
+
+	bool operator()(const char* a, const char* b) const {
+		return (std::strcmp(a, b) == 0);
+	}
+};
+
+/*********************************************************************************************************************/
+template <class Distance>
+class GraphNamed : public Graph {
+
+	using ids_pair_t = std::pair<int, int>;
+
+	SparseMatrix<Distance> matrix;
+
+	std::unordered_map<const char*, ids_pair_t, StringHasher, StringEqual> names2ids;
+
+	std::vector<const char*> ids2names;
+
+	char* namesBuffer{ nullptr };
+
+public:
+	~GraphNamed() {
+		delete[] namesBuffer;
+	}
+
+	IMatrix& getMatrix() override { return matrix; }
+
+	size_t getNumVertices() const override { return matrix.num_objects(); }
+
+	size_t getNumInputVertices() const override { return this->names2ids.size(); }
+
+	size_t getNumEdges() const override { return matrix.num_elements(); }
+
+	void reorderObjects(
+		const std::vector<std::string>& externalNames,
+		std::vector<int>& objects) const override {
+
+		int obj_id = 0;
+		for (const std::string& name : externalNames) {
+			int local_id = get_id(name.c_str());
+			if (local_id != -1) {
+				objects[obj_id++] = local_id;
+			}
+		}
+	}
+
+	size_t load(
+		std::ifstream& ifs,
+		const std::pair<std::string, std::string>& idColumns,
+		const std::string& distanceColumn,
+		distance_transformation_t transform,
+		const std::map<std::string, ColumnFilter>& columns2filters) override;
+
+	int saveAssignments(
+		std::ofstream& ofs,
+		const std::vector<std::string>& globalNames,
+		const std::vector<int>& assignments,
+		char separator) const override;
+
+	int saveRepresentatives(
+		std::ofstream& ofs,
+		const std::vector<std::string>& externalNames,
+		const std::vector<int>& assignments,
+		char separator) const override;
+
+	void print(std::ostream& out) const override;
+
+protected:
+	
+	int get_id(const char* name) const {
+		auto it = names2ids.find((char*)name);
+		if (it == names2ids.end()) {
+			return -1;
+		}
+		else {
+			return it->second.first;
+		}
+	}
+};
+
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+size_t GraphNamed<Distance>::load(
+	std::ifstream& ifs,
+	const std::pair<std::string, std::string>& idColumns,
+	const std::string& distanceColumn,
+	distance_transformation_t transform,
+	const std::map<std::string, ColumnFilter>& columns2filters) {
+
+	size_t n_total_distances = 0;
+	const size_t N = 512ULL << 20; // 128MB buffer
+	char* buf = new char[N];
+	char* buf_end = buf + N;
+
+	// get header
+	int col_ids[]{ 0, 1 };
+	int col_distance = 2; // by default use 3rd column as the one with distance 
+	std::vector<ColumnFilter> filters;
+	processHeader(ifs, idColumns, distanceColumn, columns2filters, col_ids, col_distance, filters);
+	int n_columns = filters.size();
+
+	auto is_sep = [](char c) {return c == ',' || c == '\t' || c == '\r' || c == '\t'; };
+	auto is_newline = [](char c) {return c == '\r' || c == '\n'; };
+
+	namesBuffer = new char[1LL << 30]; // 1 GB buffer for names
+	char* raw_ptr = namesBuffer;
+
+	// assume space for 8M objects
+	matrix.distances.reserve(8LL << 20);
+
+	bool continueReading = true;
+	char* place = buf;
+
+	while (continueReading) {
+		size_t n_wanted = buf_end - place;
+		ifs.read(place, n_wanted);
+		size_t n_read = ifs.gcount();
+		char* block_end = place + n_read;
+		int offset = 0;
+
+		// no more data
+		if (n_read < n_wanted) {
+			continueReading = false;
+		}
+		else {
+			// find last newline
+			while (!is_newline(*(block_end - 1))) {
+				--block_end;
+				++offset;
+			}
+		}
+
+		//LOG_DEBUG << "portion: " << n_read << ", offset: " << offset << ", carryOn: " << carryOn << endl;
+
+		// pass through buffer
+		char* line = buf;
+		while (true) {
+			char* line_end = std::find_if(line, block_end, is_newline);
+
+			// no more lines
+			if (line_end == block_end) {
+				break;
+			}
+
+			++n_total_distances;
+
+			char* p = line;
+			decltype(names2ids.begin()) its[2];
+			double d = std::numeric_limits<double>::max();
+
+			int k = 0;
+			bool carryOn = true;
+
+			for (int c = 0; c < n_columns; ++c) {
+				char* q = std::find_if(p, line_end, is_sep); // support both tsv and csv files
+				*q = 0;
+
+				if (k < 2 && c == col_ids[k]) {
+					char* name = p;
+
+					// store name in hashtable
+					its[k] = names2ids.find(name);
+
+					if (its[k] == names2ids.end()) {
+
+						char* localName = raw_ptr;
+						while (*raw_ptr++ = *name++) {}
+						auto it_and_flag = names2ids.insert({ localName, {-1, names2ids.size()} }); // -1 indicate singleton
+						its[k] = it_and_flag.first;
+					}
+
+					++k;
+				}
+				else if (c == col_distance || filters[c].enabled) {
+					double value = Conversions::strtod(p, &p);
+
+					if (c == col_distance) {
+						d = transform(value); 	// convert similarity to distance if neccessary
+					}
+
+					// check distance condition
+					if (value < filters[c].min || value > filters[c].max) {
+						carryOn = false;
+						break;
+					}
+				}
+
+				p = q + 1;
+			}
+
+			// move to the next line
+			line = std::find_if(line_end, block_end, [](char c) { return c != '\r' && c != '\n' && c != 0; });
+
+			// do not consider rows not fulfilling conditions
+			if (carryOn == false) {
+				continue;
+			}
+
+			// translate seq names to numerical ids
+			for (int k = 0; k < 2; ++k) {
+				auto it = its[k];
+
+				// if name not mapped to numerical ids
+				if (it->second.first == -1) {
+					ids2names.push_back(it->first);
+					it->second.first = ids2names.size() - 1;
+				}
+			}
+
+			uint32_t i = std::min(its[0]->second.first, its[1]->second.first);
+			uint32_t j = std::max(its[0]->second.first, its[1]->second.first);
+
+			// omit diagonal elements - they are assumed to have 0 distance
+			if (i == j) {
+				continue;
+			}
+
+			if (matrix.distances.size() <= j) {
+				matrix.distances.resize(j + 1);
+			}
+
+			auto& Di = matrix.distances[i];
+			auto& Dj = matrix.distances[j];
+
+			// extend capacity by factor 1.5 with 16 as an initial state
+			if (Di.capacity() == Di.size()) {
+				Di.reserve(Di.capacity() == 0 ? 16 : size_t(Di.capacity() * 1.5));
+			}
+
+			if (Dj.capacity() == Dj.size()) {
+				Dj.reserve(Dj.capacity() == 0 ? 16 : size_t(Dj.capacity() * 1.5));
+			}
+
+			Di.emplace_back(j, d);
+			Dj.emplace_back(i, d);
+		}
+
+		// copy remaining part after consuming all the lines
+		if (continueReading && offset > 0) {
+			memcpy(buf, block_end, offset);
+			place = buf + offset;
+		}
+		else {
+			place = buf;
+		}
+	}
+
+	// if neccessary, sort distances in rows according to the second id	
+	matrix.n_elements = 0;
+
+	for (auto& row : matrix.distances) {
+		std::sort(row.begin(), row.end());
+		auto newEnd = std::unique(row.begin(), row.end(), [](const Distance& a, const Distance& b) { return a.get_id() == b.get_id(); });
+
+		row.erase(newEnd, row.end());
+
+		matrix.n_elements += row.size();
+	}
+
+	delete[] buf;
+
+	return n_total_distances;
+}
+
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+int GraphNamed<Distance>::saveRepresentatives(
+	std::ofstream& ofs,
+	const std::vector<std::string>& globalNames,
+	const std::vector<int>& assignments,
+	char separator) const
+{
+	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
+
+	std::vector<std::string> names;
+	if (globalNames.empty()) {
+		names.resize(names2ids.size());
+		std::vector<std::pair<const char*, std::pair<int, int>>> entries(names2ids.size());
+		std::copy(names2ids.begin(), names2ids.end(), entries.begin());
+		std::sort(entries.begin(), entries.end(), [](const auto& a, const auto& b) { return a.second.second < b.second.second; });
+
+		std::transform(entries.begin(), entries.end(), names.begin(), [](const auto& entry) { return std::string(entry.first); });
+	}
+	else {
+		names = globalNames;
+	}
+
+	std::unordered_map<std::string, int> names2globalIds;
+
+	for (size_t i = 0; i < names.size(); ++i) {
+		names2globalIds[names[i]] = i;
+	}
+
+	std::vector<int> lowestClusterMembers(n_clusters, std::numeric_limits<int>::max());
+
+	for (size_t i = 0; i < assignments.size(); ++i) {
+		// translate element ids
+		std::string name = ids2names[i];
+		int global_id = names2globalIds[name];
+		int cluster_id = assignments[i];
+
+		if (global_id < lowestClusterMembers[cluster_id]) {
+			lowestClusterMembers[cluster_id] = global_id;
+		}
+	}
+
+	std::vector<std::string*> representatives(names.size());
+
+	int n_singletons = 0;
+	for (size_t i = 0; i < names.size(); ++i) {
+
+		int id = get_id(names[i].c_str());
+		if (id == -1) {
+			// not in matrix - own representative
+			representatives[i] = &names[i];
+			++n_singletons;
+			//	LOG_DEBUG << names[i] << endl;
+		}
+		else {
+			int cluster_id = assignments[id];
+			int lowest_member = lowestClusterMembers[cluster_id];
+			representatives[i] = &names[lowest_member];
+		}
+	}
+
+	ofs << "object" << separator << "cluster" << std::endl;
+
+	for (size_t i = 0; i < names.size(); ++i) {
+		ofs << names[i] << separator << *representatives[i] << std::endl;
+	}
+
+	return n_clusters + n_singletons;
+}
+
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+int GraphNamed<Distance>::saveAssignments(
+	std::ofstream& ofs,
+	const std::vector<std::string>& globalNames,
+	const std::vector<int>& assignments,
+	char separator) const {
+
+	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
+
+	std::vector<std::string> names;
+	if (globalNames.empty()) {
+		names.resize(names2ids.size());
+		std::vector<std::pair<const char*, std::pair<int, int>>> entries(names2ids.size());
+		std::copy(names2ids.begin(), names2ids.end(), entries.begin());
+		std::sort(entries.begin(), entries.end(), [](const auto& a, const auto& b) { return a.second.second < b.second.second; });
+
+		std::transform(entries.begin(), entries.end(), names.begin(), [](const auto& entry) { return std::string(entry.first); });
+	}
+	else {
+		names = globalNames;
+	}
+
+	std::vector<int> globalAssignments(names.size());
+
+	int singleton_id = n_clusters;
+
+	for (size_t i = 0; i < names.size(); ++i) {
+
+		int id = get_id(names[i].c_str());
+		if (id == -1) {
+			// not in matrix 
+			globalAssignments[i] = singleton_id++;
+		}
+		else {
+			globalAssignments[i] = assignments[id];
+		}
+	}
+
+	ofs << "object" << separator << "cluster" << std::endl;
+
+	for (size_t i = 0; i < names.size(); ++i) {
+		ofs << names[i] << separator << globalAssignments[i] << std::endl;
+	}
+
+	return singleton_id;
+}
+
+/*********************************************************************************************************************/
+template <class Distance>
+void GraphNamed<Distance>::print(std::ostream& out) const {
+
+	out.precision(10);
+
+	std::vector<const char*> names(names2ids.size());
+	int i = 0;
+	for (auto q : names2ids) {
+		names[i] = q.first;
+		++i;
+	}
+
+	std::sort(names.begin(), names.end(), [](const char* a, const char* b) {
+		return strcmp(a, b) < 0;
+		});
+
+	for (auto name : names) {
+
+		int i = names2ids.at(name).first;
+		const std::vector<Distance>& row = matrix.distances[i];
+
+		for (auto& p : row) {
+			out << ids2names[i] << "," << ids2names[p.get_id()] << "," << std::fixed << p.get_d() << std::endl;
+		}
+	}
+}
+
diff --git a/src/graph_numbered.h b/src/graph_numbered.h
new file mode 100644
index 0000000..d3f97b2
--- /dev/null
+++ b/src/graph_numbered.h
@@ -0,0 +1,407 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+#pragma once
+#include "sparse_matrix.h"
+#include "graph.h"
+#include "log.h"
+
+#include <vector>
+#include <limits>
+#include <unordered_map>
+#include <iostream>
+#include <fstream>
+
+
+template <class Distance>
+class GraphNumbered : public Graph {
+
+	std::vector<int> global2local;
+	std::vector<int> local2global;
+
+	SparseMatrix<Distance> matrix;
+
+public:
+
+	IMatrix& getMatrix() override { return matrix; }
+
+	size_t getNumVertices() const override { return matrix.num_objects(); }
+
+	size_t getNumInputVertices() const override { return global2local.size(); }
+
+	size_t getNumEdges() const override { return matrix.num_elements(); }
+
+	void reorderObjects(
+		const std::vector<std::string>& externalNames,
+		std::vector<int>& objects) const override {
+	
+		int obj_id = 0;
+		for (size_t i = 0; i < externalNames.size(); ++i) {
+			int local_id = get_local_id(i);
+			if (local_id != -1) {
+				objects[obj_id++] = local_id;
+			}
+		}
+	}
+
+	size_t load(
+		std::ifstream& ifs,
+		const std::pair<std::string, std::string>& idColumns,
+		const std::string& distanceColumn,
+		distance_transformation_t transform,
+		const std::map<std::string, ColumnFilter>& columns2filters) override;
+
+	int saveAssignments(
+		std::ofstream& ofs,
+		const std::vector<std::string>& globalNames,
+		const std::vector<int>& assignments,
+		char separator) const override;
+
+	int saveRepresentatives(
+		std::ofstream& ofs,
+		const std::vector<std::string>& externalNames,
+		const std::vector<int>& assignments,
+		char separator) const override;
+
+	void print(std::ostream& out) const override {}
+
+protected:
+	int get_local_id(int global_id) const {
+		if ((size_t)global_id > global2local.size() - 1) {
+			return -1;
+		}
+		else {
+			return global2local[global_id];
+		}
+	}
+};
+
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+size_t GraphNumbered<Distance>::load(
+	std::ifstream& ifs,
+	const std::pair<std::string, std::string>& idColumns,
+	const std::string& distanceColumn,
+	distance_transformation_t transform,
+	const std::map<std::string, ColumnFilter>& columns2filters) {
+
+	size_t n_total_distances = 0;
+	const size_t N = 512ULL << 20;
+	char* buf = new char[N];
+	char* buf_end = buf + N - 1;
+
+	// get header
+	int col_ids[]{ 0, 1 };
+	int col_distance = 2; // by default use 3rd column as the one with distance 
+	std::vector<ColumnFilter> filters;
+	processHeader(ifs, idColumns, distanceColumn, columns2filters, col_ids, col_distance, filters);
+	int n_columns = filters.size();
+
+	auto is_sep = [](char c) {return c == ',' || c == '\t' || c == '\r' || c == '\t'; };
+	auto is_newline = [](char c) {return c == '\r' || c == '\n'; };
+
+	// assume space for 8M objects
+	matrix.distances.reserve(8LL << 20);
+	global2local.reserve(8LL << 20);
+	local2global.reserve(8LL << 20);
+
+	bool continueReading = true;
+	char* place = buf;
+
+	while (continueReading) {
+		size_t n_wanted = buf_end - place;
+		ifs.read(place, n_wanted);
+		size_t n_read = ifs.gcount();
+		char* block_end = place + n_read;
+		int offset = 0;
+
+		// no more data
+		if (n_read < n_wanted) {
+			continueReading = false;
+		}
+		else {
+			// find last newline
+			while (!is_newline(*(block_end - 1))) {
+				--block_end;
+				++offset;
+			}
+		}
+
+		//LOG_DEBUG << "portion: " << n_read << ", offset: " << offset << ", carryOn: " << continueReading << endl;
+
+		// pass through buffer
+		char* line = buf;
+		while (true) {
+			char* line_end = std::find_if(line, block_end, is_newline);
+
+			// no more lines
+			if (line_end == block_end) {
+				break;
+			}
+
+			++n_total_distances;
+
+			char* p = line;
+			double d = std::numeric_limits<double>::max();
+			int k = 0;
+			bool carryOn = true;
+
+			int global_ids[2];
+
+			for (int c = 0; c < n_columns; ++c) {
+				char* q = std::find_if(p, line_end, is_sep); // support both tsv and csv files
+				*q = 0;
+
+				if (k < 2 && c == col_ids[k]) {
+					global_ids[k] = Conversions::strtol(p, nullptr);
+
+					if (global_ids[k] + 1 > (int)global2local.size()) {
+						global2local.resize(global_ids[k] + 1, -1);
+					}
+
+					++k;
+				}
+				else if (c == col_distance || filters[c].enabled) {
+					double value = Conversions::strtod(p, &p);
+
+					if (c == col_distance) {
+						d = transform(value); 	// convert similarity to distance if neccessary
+					}
+
+					// check distance condition
+					if (value < filters[c].min || value > filters[c].max) {
+						carryOn = false;
+						break;
+					}
+				}
+
+				p = q + 1;
+			}
+
+			// move to the next line
+			line = std::find_if(line_end, block_end, [](char c) { return c != '\r' && c != '\n' && c != 0; });
+
+			// do not consider rows not fulfilling conditions
+			if (carryOn == false) {
+				continue;
+			}
+
+			int pair_ids[2];
+
+
+			for (int k = 0; k < 2; ++k) {
+
+				int gid = global_ids[k];
+				int lid = global2local[gid];
+
+				if (lid == -1) {
+					lid = local2global.size();
+					local2global.push_back(gid);
+					global2local[gid] = lid;
+				}
+
+				pair_ids[k] = lid;
+			}
+
+			uint32_t i = std::min(pair_ids[0], pair_ids[1]);
+			uint32_t j = std::max(pair_ids[0], pair_ids[1]);
+
+			// omit diagonal elements - they are assumed to have 0 distance
+			if (i == j) {
+				continue;
+			}
+
+			if (matrix.distances.size() <= j) {
+				matrix.distances.resize(j + 1);
+			}
+
+			auto& Di = matrix.distances[i];
+			auto& Dj = matrix.distances[j];
+
+			// extend capacity by factor 1.5 with 16 as an initial state
+			if (Di.capacity() == Di.size()) {
+				Di.reserve(Di.capacity() == 0 ? 16 : size_t(Di.capacity() * 1.5));
+			}
+
+			if (Dj.capacity() == Dj.size()) {
+				Dj.reserve(Dj.capacity() == 0 ? 16 : size_t(Dj.capacity() * 1.5));
+			}
+
+			Di.emplace_back(j, d);
+			Dj.emplace_back(i, d);
+		}
+
+		// copy remaining part after consuming all the lines
+		if (continueReading && offset > 0) {
+			memcpy(buf, block_end, offset);
+			place = buf + offset;
+		}
+		else {
+			place = buf;
+		}
+	}
+
+	// if neccessary, sort distances in rows according to the second id
+	matrix.n_elements = 0;
+
+	for (auto& row : matrix.distances) {
+		std::sort(row.begin(), row.end());
+		auto newEnd = std::unique(row.begin(), row.end(), [](const Distance& a, const Distance& b) { return a.get_id() == b.get_id(); });
+
+		row.erase(newEnd, row.end());
+		matrix.n_elements += row.size();
+	}
+
+	delete[] buf;
+
+	// Print distance histogram in the verbose mode
+	if (Log::getInstance(Log::LEVEL_VERBOSE).isEnabled()) {
+
+		std::vector<double> histo_bounds{ 0 };
+		double width = 0.001;
+
+		while (histo_bounds.back() < 0.05)
+		{
+			histo_bounds.push_back(histo_bounds.back() + width);
+		}
+		histo_bounds.push_back(std::numeric_limits<double>::max());
+		std::vector<int> histo(histo_bounds.size());
+
+		for (auto& row : matrix.distances) {
+			for (const auto& e : row) {
+				for (size_t i = 0; i < histo_bounds.size(); ++i) {
+					if (e.get_d() < histo_bounds[i]) {
+						++histo[i];
+						break;
+					}
+				}
+			}
+		}
+
+		LOG_VERBOSE << std::endl << "Distance histogram" << std::endl;
+		for (size_t i = 0; i < histo_bounds.size(); ++i) {
+			LOG_VERBOSE << "  d < " << histo_bounds[i] << ": " << histo[i] << std::endl;
+		}
+		LOG_VERBOSE << std::endl;
+	}
+
+	return n_total_distances;
+}
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+int GraphNumbered<Distance>::saveRepresentatives(
+	std::ofstream& ofs,
+	const std::vector<std::string>& globalNames,
+	const std::vector<int>& assignments,
+	char separator) const
+{
+
+	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
+
+	std::vector<std::string> names;
+	if (globalNames.empty()) {
+		names.resize(global2local.size());
+		int i = 0;
+		std::generate(names.begin(), names.end(), [&i]() { return std::to_string(i++); });
+	}
+	else {
+		names = globalNames;
+	}
+
+	std::unordered_map<std::string, int> names2globalIds;
+
+	for (size_t i = 0; i < names.size(); ++i) {
+		names2globalIds[names[i]] = i;
+	}
+
+	std::vector<int> lowestClusterMembers(n_clusters, std::numeric_limits<int>::max());
+
+	for (int local_id = 0; local_id < (int)assignments.size(); ++local_id) {
+		// translate element ids
+		int global_id = local2global[local_id];
+		int cluster_id = assignments[local_id];
+
+		if (global_id < lowestClusterMembers[cluster_id]) {
+			lowestClusterMembers[cluster_id] = global_id;
+		}
+	}
+
+	std::vector<std::string> representatives(names.size());
+
+	int n_singletons = 0;
+	for (size_t i = 0; i < names.size(); ++i) {
+
+		int local_id = get_local_id(i);
+		if (local_id == -1) {
+			// not in matrix - own representative
+			representatives[i] = names[i];
+			++n_singletons;
+			//	LOG_DEBUG << names[i] << endl;
+		}
+		else {
+			int cluster_id = assignments[local_id];
+			int lowest_member = lowestClusterMembers[cluster_id];
+			representatives[i] = names[lowest_member];
+		}
+	}
+
+	ofs << "object" << separator << "cluster" << std::endl;
+
+	for (size_t i = 0; i < names.size(); ++i) {
+		ofs << names[i] << separator << representatives[i] << std::endl;
+	}
+
+	return n_clusters + n_singletons;
+}
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+int GraphNumbered<Distance>::saveAssignments(
+	std::ofstream& ofs,
+	const std::vector<std::string>& globalNames,
+	const std::vector<int>& assignments,
+	char separator) const {
+
+	int n_clusters = *std::max_element(assignments.begin(), assignments.end()) + 1;
+	int singleton_id = n_clusters;
+
+	ofs << "object" << separator << "cluster" << std::endl;
+
+	if (globalNames.empty()) {
+		for (size_t local_id = 0; local_id < local2global.size(); ++local_id) {
+			ofs << local2global[local_id] << "," << assignments[local_id] << std::endl;
+		}
+	}
+	else {
+		std::vector<int> globalAssignments(globalNames.size());
+
+
+		for (size_t i = 0; i < globalNames.size(); ++i) {
+
+			int local_id = get_local_id(i);
+			if (local_id == -1) {
+				// not in matrix 
+				globalAssignments[i] = singleton_id++;
+			}
+			else {
+				globalAssignments[i] = assignments[local_id];
+			}
+		}
+
+		for (size_t i = 0; i < globalNames.size(); ++i) {
+			ofs << globalNames[i] << separator << globalAssignments[i] << std::endl;
+		}
+	}
+
+	return singleton_id; // return total number of clusters
+}
+
diff --git a/src/leiden.h b/src/leiden.h
index a3caf90..937ed59 100644
--- a/src/leiden.h
+++ b/src/leiden.h
@@ -11,6 +11,7 @@
 #include "clustering.h"
 #include <stdexcept>
 
+
 #ifndef NO_LEIDEN
 #include <igraph.h>
 #endif
@@ -22,8 +23,9 @@ struct LeidenParams {
 	int numIterations{ 2 };
 };
 
-template <class DistanceMatrix>
-class Leiden : public IClustering<DistanceMatrix> {
+
+template <class Distance>
+class Leiden : public IClustering<Distance> {
 
 private:
 	LeidenParams params;
@@ -38,7 +40,7 @@ class Leiden : public IClustering<DistanceMatrix> {
 	}
 
 	int operator()(
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		double threshold,
 		std::vector<int>& assignments) override {
@@ -51,7 +53,7 @@ class Leiden : public IClustering<DistanceMatrix> {
 	Leiden(const LeidenParams& params) : params(params) {}
 	
 	int operator()(
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		double threshold,
 		std::vector<int>& assignments) override {
@@ -80,7 +82,7 @@ class Leiden : public IClustering<DistanceMatrix> {
 	}
 
 
-	void load_graph(const DistanceMatrix& matrix, igraph_t& g, igraph_vector_t& edge_weights) {
+	void load_graph(SparseMatrix<Distance>& matrix, igraph_t& g, igraph_vector_t& edge_weights) {
 
 		igraph_vector_int_t edges;
 		igraph_vector_int_init(&edges, 0);
@@ -88,13 +90,13 @@ class Leiden : public IClustering<DistanceMatrix> {
 		igraph_vector_init(&edge_weights, 0);
 
 		for (int i = 0; i < matrix.num_objects(); ++i) {
-			for (const dist_t* edge = matrix.begin(i); edge < matrix.end(i); ++edge) {
-				igraph_vector_int_push_back(&edges, edge->u.s.lo);
-				igraph_vector_int_push_back(&edges, edge->u.s.hi);
-				igraph_vector_push_back(&edge_weights, 1.0 - edge->d);
+			for (const Distance* edge = matrix.begin(i); edge < matrix.end(i); ++edge) {
+				igraph_vector_int_push_back(&edges, i);
+				igraph_vector_int_push_back(&edges, edge->get_id());
+				igraph_vector_push_back(&edge_weights, 1.0 - edge->get_d());
 			}
 
-			matrix.clear(i);
+			matrix.clear_row(i);
 		}
 
 		/*
diff --git a/src/linkage_heaptrix.h b/src/linkage_heaptrix.h
index a24f71f..e55f72d 100644
--- a/src/linkage_heaptrix.h
+++ b/src/linkage_heaptrix.h
@@ -42,12 +42,12 @@ namespace linkage_algorithm_heaptrix
       std::vector<group> groups;
    };
 
-   std::ostream & operator << (std::ostream & sos, const group & g)
+  inline std::ostream & operator << (std::ostream & sos, const group & g)
    {
       return sos << g.id << ": d(" << g.left << ", " << g.right << ") == " << g.distance;
    }
 
-   std::ostream & operator << (std::ostream & sos, const dendrogram & d)
+   inline std::ostream & operator << (std::ostream & sos, const dendrogram & d)
    {
       sos << "--------------------" << std::endl;
       for (const auto g : d.groups)
@@ -59,8 +59,8 @@ namespace linkage_algorithm_heaptrix
    }
 
 
-   template <class DistanceMatrix, typename AggregationRule>
-      class linkage : public HierarchicalClustering<DistanceMatrix> 
+   template <class Distance, typename AggregationRule>
+      class linkage : public HierarchicalClustering<Distance> 
    {
       const std::size_t MAX_SIZE_T = std::numeric_limits<std::size_t>::max(); // Index of the location in the heap. MAX means no index set.
       const double MAX_DOUBLE = std::numeric_limits<double>::max(); 
@@ -68,14 +68,15 @@ namespace linkage_algorithm_heaptrix
       const std::string MAX_LABEL {"--"};
 
       /*** An element of a matrix and a heap simultaneously.  */
+      #pragma pack(push, 8)
       struct element
       {
           double _value;              ///< stored value
-          std::size_t _row;           ///< index of a row in the matrix
-         std::size_t _column;        ///< index of a column in the matrix
-         std::size_t _index_heap;    ///< index of an element in the heap
+          int32_t _row;           ///< index of a row in the matrix
+          int32_t _column;        ///< index of a column in the matrix
+          std::size_t _index_heap;    ///< index of an element in the heap
 
-         element (const std::size_t row, const std::size_t column, const std::size_t index_heap, const double value)
+         element (const int32_t row, const int32_t column, const std::size_t index_heap, const double value)
          {
             _row = row;
             _column = column;
@@ -111,6 +112,7 @@ namespace linkage_algorithm_heaptrix
             sos << "{(" << _row << ", " << _column << ") [" << _index_heap << "] " << _value << "}";
          }
       };
+      #pragma pack(pop)
 
       class matrix_row_ht
       {
@@ -766,7 +768,7 @@ namespace linkage_algorithm_heaptrix
       }
 
       public:
-      void read_matrix (DistanceMatrix & m)
+      void read_matrix (SparseMatrix<Distance> & m)
       {
          _matrix._rows.clear();
        
@@ -774,13 +776,13 @@ namespace linkage_algorithm_heaptrix
 
          for (size_t i = 0; i < m.num_objects(); ++i) {
              
-             for (const dist_t* p = m.begin(i); p < m.end(i); ++p) {
-                 const dist_t& edge = *p;
-                 if (edge.d != MAX_DOUBLE and edge.d != INF_DOUBLE and edge.u.s.lo != edge.u.s.hi)
-                     add_value(edge.u.s.lo, edge.u.s.hi, edge.d);
+             for (const Distance* p = m.begin(i); p < m.end(i); ++p) {
+                 const Distance& edge = *p;
+                 if (edge.get_d() != MAX_DOUBLE and edge.get_d() != INF_DOUBLE and i != edge.get_id())
+                     add_value(i, edge.get_id(), edge.get_d());
              }
 
-             m.clear(i);
+             m.clear_row(i);
          }
 
          _heap.make_heap();
@@ -988,7 +990,7 @@ namespace linkage_algorithm_heaptrix
       */
 
       public:
-      dendrogram do_clustering (DistanceMatrix & m)
+      dendrogram do_clustering (SparseMatrix<Distance> & m)
       {
          ksi::clock stopwatch;
          LOG_VERBOSE << "Loading data into heap ";
@@ -1021,7 +1023,7 @@ namespace linkage_algorithm_heaptrix
 
       public:
       int run (
-            DistanceMatrix& matrix,
+            SparseMatrix<Distance>& matrix,
             const std::vector<int>& objects,
             double threshold,
             std::vector<int>& assignments
@@ -1064,18 +1066,19 @@ namespace linkage_algorithm_heaptrix
    };
    
 
-   template <class DistanceMatrix, typename AggregationRule = my_max<double>>
-      class complete_linkage : public linkage<DistanceMatrix, AggregationRule>
+   template <class Distance, typename AggregationRule = my_max<double>>
+      class complete_linkage : public linkage<Distance, AggregationRule>
    {
    };
 
-   template <class DistanceMatrix>
-      class single_linkage : public linkage<DistanceMatrix, my_min<double>> 
+   template <class Distance>
+      class single_linkage : public linkage<Distance, my_min<double>> 
    {
    };
 }
 
-std::ostream & operator << (std::ostream & sos, const std::vector<std::tuple<std::size_t, std::size_t, double>> & dist)
+
+inline std::ostream & operator << (std::ostream & sos, const std::vector<std::tuple<std::size_t, std::size_t, double>> & dist)
 {
    for (const auto& [w, k, d] : dist)
       sos << "[" << w << ", " << k << ", " << d << "] ";
@@ -1083,12 +1086,12 @@ std::ostream & operator << (std::ostream & sos, const std::vector<std::tuple<std
    return sos;
 }
 
-template <class DistanceMatrix>
-class SingleLinkage : public linkage_algorithm_heaptrix::single_linkage<DistanceMatrix> 
+template <class Distance>
+class SingleLinkage : public linkage_algorithm_heaptrix::single_linkage<Distance> 
 {
    int operator()
       (
-       const DistanceMatrix& distances,
+       SparseMatrix<Distance>& distances,
        const std::vector<int>& objects,
        double threshold,
        std::vector<int>& assignments
@@ -1099,12 +1102,12 @@ class SingleLinkage : public linkage_algorithm_heaptrix::single_linkage<Distance
       }
 };
 
-template <class DistanceMatrix>
-class CompleteLinkage : public linkage_algorithm_heaptrix::complete_linkage<DistanceMatrix> 
+template <class Distance>
+class CompleteLinkage : public linkage_algorithm_heaptrix::complete_linkage<Distance> 
 {
    int operator()
       (
-       const DistanceMatrix& distances,
+       SparseMatrix<Distance>& distances,
        const std::vector<int>& objects,
        double threshold,
        std::vector<int>& assignments
diff --git a/src/main.cpp b/src/main.cpp
index cdfd931..1794659 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -5,200 +5,35 @@
 // Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
 //
 // *******************************************************************************************
-
-#include <fstream>
-#include <vector>
-#include <memory>
-#include <numeric>
-#include <iostream>
-#include <chrono>
-#include <stdexcept>
-
-#include "linkage_heaptrix.h"
-#include "uclust.h"
-#include "set_cover.h"
-#include "single_bfs.h"
-#include "cd_hit.h"
-#include "leiden.h"
-
 #include "console.h"
 #include "log.h"
 
-#define VAL(str) #str
-#define TOSTRING(str) VAL(str)
-#include "version.h"
+#include <vector>
+#include <stdexcept>
 
 using namespace std;
 
-#define debug(x) std::cerr << __FILE__ << " (" << __LINE__ << ") " << #x << " == " << (x) << std::endl
-
-
 int main(int argc, char** argv) 
 {
-   
-	Log::getInstance(Log::LEVEL_NORMAL).enable();
-
 	try {
-	   LOG_NORMAL << "Clusty" << endl
-		   << "  version " << VERSION
-#ifdef GIT_COMMIT
-		   << "-" << TOSTRING(GIT_COMMIT)
-#endif
-		   << " (" << DATE << ")" << endl << endl;
-
-	Console console;
-
-	if (!console.parse(argc, argv)) {
-		console.printUsage();
-		return 0;
-	}
-
-	if (console.verbose) {
-		Log::getInstance(Log::LEVEL_VERBOSE).enable();
-	}
-
-	shared_ptr<SparseMatrix> dists;
-	std::shared_ptr<IClustering<decltype(*dists)>> clustering;
-
-	switch (console.algo)
-	{
-	case Algo::SingleLinkage:
-		clustering = make_shared<SingleLinkageBFS<decltype(*dists)>>(); break;
-	case Algo::CompleteLinkage:
-		clustering = make_shared<CompleteLinkage<decltype(*dists)>>(); break;
-	case Algo::UClust:
-		clustering = make_shared<UClust<decltype(*dists)>>(); break;
-	case Algo::SetCover:
-		clustering = make_shared<SetCover<decltype(*dists)>>(); break;
-	case Algo::CdHit:
-		clustering = make_shared<CdHit<decltype(*dists)>>(); break;
-	case Algo::Leiden:
-		clustering = make_shared<Leiden<decltype(*dists)>>(console.leidenParams); break;
-	default:
-		throw std::runtime_error("Unkown clustering algorithm"); 
-	}
 
-	LOG_NORMAL << "Loading pairwise distances from " << console.distancesFile << "... ";
-	auto t = std::chrono::high_resolution_clock::now();
-	
-	vector<char> filebuf(128ULL << 20);  // 128MB buffer
-	ifstream ifs;
-	ifs.rdbuf()->pubsetbuf(filebuf.data(), filebuf.size());
-	ifs.open(console.distancesFile, ios_base::binary);
-	
-	if (!ifs) {
-		throw std::runtime_error("Unable to open distance file");
-	}
-
-	if (console.numericIds) {
-		dists = make_shared<SparseMatrixNumbered>();
-	}
-	else {
-		dists = make_shared<SparseMatrixNamed>();
-	}
-
-	map<DistanceSpecification, distance_transformation_t> transforms {
-		{ DistanceSpecification::Distance, [](double d) { return d; } },
-		{ DistanceSpecification::Similarity,		[](double d) { return 1.0 - d; } },
-		{ DistanceSpecification::PercentSimilarity, [](double d) { return 1.0 - d * 0.01; } },
-	};
-
-	size_t n_total_dists = dists->load(ifs, console.idColumns, console.distanceColumn,
-		transforms[console.distanceSpecification], console.columns2filters);
-	auto dt = std::chrono::high_resolution_clock::now() - t;
-
-	ifs.close();
-	LOG_NORMAL << endl
-		<< "  input graph: " << dists->num_input_objects() << " nodes, " << n_total_dists << " edges" << endl
-		<< "  filtered graph: " << dists->num_objects() << " nodes, " << dists->num_elements() << " edges" << endl
-		<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
-
-	vector<string> names;
-	vector<int> objects(dists->num_objects());
-	std::iota(objects.begin(), objects.end(), 0);
-
-	if (!console.objectsFile.empty()) {
-		LOG_NORMAL << "Loading objects from " << console.objectsFile << "... ";
-		ifs.open(console.objectsFile);
-		
-		if (ifs) {
-			t = std::chrono::high_resolution_clock::now();
-			string token;
-			token.reserve(16 << 10); // 16k buffer
-			
-			// omit header
-			getline(ifs, token);
-			auto is_sep = [](char c) {return c == ',' || c == '\t' || c == '\r' || c == '\t'; };
+		Console console;
+		Params params;
 
-			while (getline(ifs, token)) {
-				auto it = find_if(token.begin(), token.end(), is_sep);
-				if (it != token.begin()) {
-					names.emplace_back(token.begin(), it);
-				}
-			}
+		std::vector<int> objects;
+		std::vector<string> names;
+		std::vector<int> assignments;
 
-			ifs.close();
-			LOG_NORMAL << endl;
-
-			// reorder sequences according to the objects file 
-			if (auto m = dynamic_pointer_cast<SparseMatrixNamed>(dists)) {
-				int obj_id = 0;
-				for (const string& name : names) {
-					int local_id = m->get_id(name.c_str());
-					if (local_id != -1) {
-						objects[obj_id++] = local_id;
-					}
-				}
-			}
-			else {
-				auto q = dynamic_pointer_cast<SparseMatrixNumbered>(dists);
-				int obj_id = 0;
-				for (size_t i = 0; i < names.size(); ++i) {
-					int local_id = q->get_local_id(i);
-					if (local_id != -1) {
-						objects[obj_id++] = local_id;
-					}
-				}
-			}
-
-			dt = std::chrono::high_resolution_clock::now() - t;
-			LOG_NORMAL << "  total objects: " << names.size() << endl
-				<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
+		if (!console.init(argc, argv, params)) {
+			return 0;
 		}
-		else {
-			throw std::runtime_error("Unable to open objects file");
-		}
-	}
 	
-	LOG_NORMAL << "Clustering (algorithm: " << Console::algo2str(console.algo) << ")... ";
-	vector<int> assignments;
-	t = std::chrono::high_resolution_clock::now();
+		std::unique_ptr<Graph> graph = console.loadGraph(params);
 
-	double threshold = std::nexttoward(std::numeric_limits<double>::max(), 0.0);
-	int n_clusters = (*clustering)(*dists, objects, threshold, assignments);
-	dt = std::chrono::high_resolution_clock::now() - t;
-	LOG_NORMAL << endl
-		<< "  objects: " << dists->num_objects() << ", clusters: " << n_clusters << endl
-		<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
-	
-	LOG_NORMAL << "Saving clusters... ";
-	t = std::chrono::high_resolution_clock::now();
+		console.loadObjects(params, *graph, objects, names);
+		console.doClustering(params, *graph, objects, assignments);
+		console.saveAssignments(params, *graph, names, assignments);
 
-	char sep = console.outputCSV ? ',' : '\t';
-
-	ofstream ofs(console.output);
-	int n_total_clusters = 0;
-	if (console.outputRepresentatives) {
-		n_total_clusters = dists->saveRepresentatives(ofs, names, assignments, sep);
-	}
-	else {
-		n_total_clusters = dists->saveAssignments(ofs, names, assignments, sep);
-	}
-
-	dt = std::chrono::high_resolution_clock::now() - t;
-	LOG_NORMAL << endl
-		<< "  total clusters (including singletons): " << n_total_clusters << endl
-		<< "  time [s]: " << chrono::duration<double>(dt).count() << endl;
    }
    catch  (const std::string & message)
    {
@@ -208,7 +43,7 @@ int main(int argc, char** argv)
    catch (const std::exception & exc)
    {
 	   LOG_NORMAL << exc.what() << std::endl;
-	  return -1;
+	   return -1;
    }
    catch (...)
    {
diff --git a/src/params.cpp b/src/params.cpp
new file mode 100644
index 0000000..15e3b8e
--- /dev/null
+++ b/src/params.cpp
@@ -0,0 +1,117 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+
+#include "params.h"
+#include "distances.h"
+#include "log.h"
+#include "leiden.h"
+
+#include <vector>
+#include <fstream>
+#include <iostream>
+
+using namespace std;
+
+void Params::printUsage() const {
+	LOG_NORMAL << "Usage:" << endl
+		<< "clusty [options] <distances> <assignments>" << endl << endl
+		<< "Parameters:" << endl
+		<< "  <distances> - input TSV/CSV table with pairwise distances" << endl
+		<< "  <assignments> - output TSV/CSV table with assignments" << endl << endl
+		<< "Options:" << endl
+		<< "  " + PARAM_FILE_OBJECTS + " <string> - optional TSV/CSV file with object names in the first column sorted decreasingly w.r.t. representativness" << endl
+		<< "  " + PARAM_ALGO + " <single | complete | uclust | set-cover | cd-hit | leiden> - clustering algorithm:" << endl
+		<< "    * single     - single linkage (connected component, MMseqs mode 1)" << endl
+		<< "    * complete   - complete linkage" << endl
+		<< "    * uclust     - UCLUST" << endl
+		<< "    * set-cover  - greedy set cover (MMseqs mode 0)" << endl
+		<< "    * cd-hit     - CD-HIT (greedy incremental; MMseqs mode 2)" << endl
+		<< "    * leiden     - Leiden algorithm" << endl << endl
+
+		<< "  " + PARAM_ID_COLUMNS + " <column-name1> <column-name2> - names of columns with sequence identifiers (default: two first columns)" << endl
+		<< "  " + PARAM_DISTANCE_COLUMN + " <column-name> - name of the column with pairwise distances (or similarities; default: third column)" << endl
+		<< "  " + FLAG_SIMILARITY + " - use similarity instead of distances (has to be in [0,1] interval; default: false)" << endl
+		<< "  " + FLAG_PERCENT_SIMILARITY + " - use percent similarity (has to be in [0,100] interval; default: false)" << endl
+		<< "  " + PARAM_MIN + " <column-name> <real-threshold> - accept pairwise connections with values greater or equal given threshold in a specified column" << endl
+		<< "  " + PARAM_MAX + " <column-name> <real-threshold> - accept pairwise connections with values lower or equal given threshold in a specified column" << endl
+		<< "  " + FLAG_NUMERIC_IDS + " - use when sequences in the distances file are represented by numbers (can be mapped to string ids by the object file)" << endl
+		<< "  " + FLAG_OUT_REPRESENTATIVES + " - output a representative object for each cluster instead of a cluster numerical identifier (default: " << std::boolalpha << outputRepresentatives << ")" << endl
+		<< "  " + FLAG_OUT_CSV + " - output a CSV table instead of a default TSV (default: " << std::boolalpha << outputCSV << ")"
+
+#ifndef NO_LEIDEN
+		<< endl << endl
+		<< "  " + PARAM_LEIDEN_RESOLUTION + " - resolution parameter for Leiden algorithm (default: " << leidenParams.resolution << ")" << endl
+		<< "  " + PARAM_LEIDEN_BETA + " - beta parameter for Leiden algorithm (default: " << leidenParams.beta << ")" << endl
+		<< "  " + PARAM_LEIDEN_RESOLUTION + " - number of interations for Leiden algorithm (default: " << leidenParams.numIterations << ")"
+#endif
+		<< endl << endl;
+}
+
+
+bool Params::parse(int argc, char** argv) {
+
+	vector<string> args;
+	for (int i = 1; i < argc; ++i) {
+		args.emplace_back(argv[i]);
+	}
+
+	if (args.size() < 2) {
+		return false;
+	}
+
+	std::string tmp;
+	findOption(args, PARAM_ALGO, tmp);
+	if (tmp.length()) {
+		algo = str2algo(tmp);
+	}
+
+	findOption(args, PARAM_FILE_OBJECTS, objectsFile);
+
+	findOption(args, PARAM_ID_COLUMNS, idColumns.first, idColumns.second);
+	numericIds = findSwitch(args, FLAG_NUMERIC_IDS);
+
+	findOption(args, PARAM_DISTANCE_COLUMN, distanceColumn);
+
+	bool use_similarity = findSwitch(args, FLAG_SIMILARITY);
+	bool use_percent_similarity = findSwitch(args, FLAG_PERCENT_SIMILARITY);
+
+	if (use_percent_similarity) {
+		distanceSpecification = DistanceSpecification::PercentSimilarity;
+	}
+	else if (use_similarity) {
+		distanceSpecification = DistanceSpecification::Similarity;
+	}
+
+	string column;
+	double value;
+	while (findOption(args, PARAM_MIN, column, value)) {
+		columns2filters[column].min = std::max(value, columns2filters[column].min);
+	}
+	while (findOption(args, PARAM_MAX, column, value)) {
+		columns2filters[column].max = std::min(value, columns2filters[column].max);
+	}
+
+	outputRepresentatives = findSwitch(args, FLAG_OUT_REPRESENTATIVES);
+	outputCSV = findSwitch(args, FLAG_OUT_CSV);
+
+	// leiden parameters
+	findOption(args, PARAM_LEIDEN_RESOLUTION, leidenParams.resolution);
+	findOption(args, PARAM_LEIDEN_BETA, leidenParams.beta);
+	findOption(args, PARAM_LEIDEN_ITERATIONS, leidenParams.numIterations);
+
+	verbose = findSwitch(args, FLAG_VERBOSE);
+
+	if (args.size() == 2) {
+		distancesFile = args[0];
+		output = args[1];
+		return true;
+	}
+
+	return false;
+}
+
diff --git a/src/params.h b/src/params.h
new file mode 100644
index 0000000..e862112
--- /dev/null
+++ b/src/params.h
@@ -0,0 +1,151 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+#pragma once
+#include "graph.h"
+#include "leiden.h"
+
+#include <string>
+#include <stdexcept>
+#include <sstream>
+#include <map>
+
+enum class Algo {
+	SingleLinkage,
+	CompleteLinkage,
+	UClust,
+	SetCover,
+	CdHit,
+	Leiden
+};
+
+enum class DistanceSpecification {
+	Distance,
+	Similarity,
+	PercentSimilarity
+};
+
+class Params {
+	const std::string PARAM_ALGO{ "--algo" };
+
+	const std::string PARAM_FILE_OBJECTS{ "--objects-file" };
+
+	const std::string PARAM_ID_COLUMNS{ "--id-cols" };
+	const std::string FLAG_NUMERIC_IDS{ "--numeric-ids" };
+	const std::string PARAM_DISTANCE_COLUMN{ "--distance-col" };
+	const std::string FLAG_SIMILARITY{ "--similarity" };
+	const std::string FLAG_PERCENT_SIMILARITY{ "--percent-similarity" };
+
+	const std::string PARAM_MAX{ "--max" };
+	const std::string PARAM_MIN{ "--min" };
+
+	const std::string FLAG_OUT_REPRESENTATIVES{ "--out-representatives" };
+	const std::string FLAG_OUT_CSV{ "--out-csv" };
+
+	const std::string PARAM_LEIDEN_RESOLUTION{ "--leiden-resolution" };
+	const std::string PARAM_LEIDEN_BETA{ "--leiden-beta" };
+	const std::string PARAM_LEIDEN_ITERATIONS{ "--leiden-iterations" };
+
+	const std::string FLAG_VERBOSE{ "-v" };
+
+public:
+	static Algo str2algo(const std::string& str)
+	{
+		if (str == "single") { return Algo::SingleLinkage; }
+		else if (str == "complete") { return Algo::CompleteLinkage; }
+		else if (str == "uclust") { return Algo::UClust; }
+		else if (str == "set-cover") { return Algo::SetCover; }
+		else if (str == "cd-hit") { return Algo::CdHit; }
+		else if (str == "leiden") { return Algo::Leiden; }
+
+		else { throw std::runtime_error("Unkown clustering algorithm"); }
+	}
+
+	static std::string algo2str(Algo algo) {
+		switch (algo) {
+		case Algo::SingleLinkage: return "single";
+		case Algo::CompleteLinkage: return "complete";
+		case Algo::UClust: return "uclust";
+		case Algo::SetCover: return "set-cover";
+		case Algo::Leiden: return "leiden";
+		case Algo::CdHit: return "cd-hit";
+		default: throw std::runtime_error("Unkown clustering algorithm");
+		}
+	}
+
+
+public:
+
+	std::string objectsFile;
+	std::string distancesFile;
+	std::string output;
+
+	Algo algo{ Algo::SingleLinkage };
+
+	std::pair<std::string, std::string> idColumns;
+	bool numericIds{ false };
+
+	std::string distanceColumn;
+	DistanceSpecification distanceSpecification{ DistanceSpecification::Distance };
+
+	std::map<std::string, ColumnFilter> columns2filters;
+	bool outputRepresentatives{ false };
+	bool outputCSV{ false };
+
+	LeidenParams leidenParams;
+
+	bool verbose{ false };
+
+	void printUsage() const;
+	bool parse(int argc, char** argv);
+
+	bool findSwitch(std::vector<std::string>& params, const std::string& name) {
+		auto it = find(params.begin(), params.end(), name); // verbose mode
+		if (it != params.end()) {
+			params.erase(it);
+			return true;
+		}
+
+		return false;
+	}
+
+	template <typename T>
+	bool findOption(std::vector<std::string>& params, const std::string& name, T& v) {
+		if (params.size() < 2) {
+			return false;
+		}
+
+		auto stop = std::prev(params.end());
+		auto it = find(params.begin(), stop, name); // verbose mode
+		if (it != stop) {
+			std::istringstream iss(*std::next(it));
+			if (iss >> v) {
+				params.erase(it, it + 2);
+				return true;
+			}
+		}
+		return false;
+	}
+
+	template <typename T, typename U>
+	bool findOption(std::vector<std::string>& params, const std::string& name, T& value1, U& value2) {
+		if (params.size() < 3) {
+			return false;
+		}
+
+		auto stop = std::prev(params.end(), 2);
+		auto it = find(params.begin(), stop, name); // verbose mode
+		if (it != stop) {
+			if (std::istringstream(*std::next(it)) >> value1
+				&& std::istringstream(*std::next(it, 2)) >> value2) {
+				params.erase(it, it + 3);
+				return true;
+			}
+		}
+		return false;
+	}
+};
diff --git a/src/set_cover.h b/src/set_cover.h
index fe338cd..3a57b7b 100644
--- a/src/set_cover.h
+++ b/src/set_cover.h
@@ -18,12 +18,12 @@
 
 
 /** MMseqs0 clustering algorithm */
-template <class DistanceMatrix>
-class SetCover : public IClustering<DistanceMatrix> 
+template <class Distance>
+class SetCover : public IClustering<Distance> 
 {
 public:
 	int operator()(
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		double threshold,
 		std::vector<int>& assignments) override
@@ -37,7 +37,7 @@ class SetCover : public IClustering<DistanceMatrix>
 		for (int i = 0; i < nObjects; ++i) {
 			int obj = objects[i];
 			obj2connections[i].first = obj;
-			obj2connections[i].second = distances.get_num_neighbours(obj);
+			obj2connections[i].second = distances.num_neighbours(obj);
 		}
 
 		std::stable_sort(obj2connections.begin(), obj2connections.end(), [](const auto& a, const auto& b) { return a.second > b.second; });
@@ -53,9 +53,9 @@ class SetCover : public IClustering<DistanceMatrix>
 				assignments[obj] = cluster_number; // seed of a new cluster ... 
 				
 				// ... and its neighbours:
-				for (const dist_t* edge = distances.begin(obj); edge < distances.end(obj); ++edge) {
-					auto other = edge->u.s.hi;
-					if (edge->d <= threshold && assignments[other] == NO_ASSIGNMENT) {
+				for (const Distance* edge = distances.begin(obj); edge < distances.end(obj); ++edge) {
+					auto other = edge->get_id();
+					if (edge->get_d() <= threshold && assignments[other] == NO_ASSIGNMENT) {
 						assignments[other] = cluster_number;
 					}
 				}
diff --git a/src/single_bfs.h b/src/single_bfs.h
index c7e8b94..b3aa32c 100644
--- a/src/single_bfs.h
+++ b/src/single_bfs.h
@@ -8,37 +8,25 @@
 //
 // *******************************************************************************************
 
+#include "distances.h"
+#include "clustering.h"
+
 #include <vector>
 #include <algorithm>
 #include <deque>
 #include <utility>
 
-#include "distances.h"
-#include "clustering.h"
-
-#define debug(x) std::cerr << __FILE__ << " (" << __LINE__ << ") " << #x << " == " << (x) << std::endl
-
-std::ostream & operator<< (std::ostream & s, const std::deque<int> & q)
-{
-	s << "[ ";
-	for (const auto n : q)
-	  s << n << " ";
-	return s << "]";
-}
-
 
 /** MMseqs1 clustering algorithm
     based on connected component search in an undirected graph
 */
-template <class DistanceMatrix>
-class SingleLinkageBFS : public IClustering<DistanceMatrix>
+template <class Distance>
+class SingleLinkageBFS : public IClustering<Distance>
 {
 public:
 	
-	/** @param[out] assignments The row is a complete clustering for a threshold. Each value in a row denotes a group an object has been assgned to.
-	 @return number of elaborated clusters */
 	int operator() (
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		const double threshold,
 		std::vector<int>& assignments) override
@@ -76,9 +64,9 @@ class SingleLinkageBFS : public IClustering<DistanceMatrix>
 					if (assignments[node] == NO_ASSIGNMENT) {
 						assignments[node] = cluster_number; // seed of a new cluster 
 
-						for (const dist_t* edge = distances.begin(node); edge < distances.end(node); ++edge) {
-							auto other = edge->u.s.hi;
-							if (edge->d <= threshold && assignments[other] == NO_ASSIGNMENT) {
+						for (const Distance* edge = distances.begin(node); edge < distances.end(node); ++edge) {
+							auto other = edge->get_id();
+							if (edge->get_d() <= threshold && assignments[other] == NO_ASSIGNMENT) {
 								Q.push_back(other);
 							}
 						}
diff --git a/src/sparse_matrix.h b/src/sparse_matrix.h
new file mode 100644
index 0000000..b1952e5
--- /dev/null
+++ b/src/sparse_matrix.h
@@ -0,0 +1,39 @@
+// *******************************************************************************************
+// This file is a part of Clusty software distributed under GNU GPL 3 license.
+// The homepage of the Clusty project is https://github.com/refresh-bio/Clusty
+//
+// Copyright(C) 2024-2024, A.Gudys, K.Siminski, S.Deorowicz
+//
+// *******************************************************************************************
+#pragma once
+#include "distances.h"
+
+#include <vector>
+
+
+/*********************************************************************************************************************/
+template <class Distance>
+class SparseMatrix : public IMatrix
+{
+public:
+
+	size_t n_elements{ 0 };
+
+	std::vector<std::vector<Distance>> distances;
+
+	SparseMatrix() {}
+
+	virtual ~SparseMatrix() {}
+
+	size_t num_objects() const { return distances.size(); }
+
+	size_t num_elements() const { return n_elements; }
+
+	size_t num_neighbours(int i) const { return distances[i].size(); }
+
+	const Distance* begin(int row_id) const { return distances[row_id].data(); }
+	const Distance* end(int row_id) const { return distances[row_id].data() + distances[row_id].size(); }
+
+	void clear_row(int row_id) { std::vector<Distance>().swap(distances[row_id]); }
+};
+
diff --git a/src/uclust.h b/src/uclust.h
index 563cda8..06a269c 100644
--- a/src/uclust.h
+++ b/src/uclust.h
@@ -17,12 +17,12 @@
 #include <vector>
 #include <unordered_map>
 
-template <class DistanceMatrix>
-class UClust : public IClustering<DistanceMatrix> {
+template <class Distance>
+class UClust : public IClustering<Distance> {
 public:
 
 	int operator()(
-		const DistanceMatrix& distances,
+		SparseMatrix<Distance>& distances,
 		const std::vector<int>& objects,
 		double threshold,
 		std::vector<int>& assignments) override {
@@ -41,20 +41,20 @@ class UClust : public IClustering<DistanceMatrix> {
 		for (int i = 1; i < n_objects; ++i) {
 			obj = objects[i];
 
-			dist_t max_edge{ 0, dist_t::MAX };
-			const dist_t* closest = &max_edge;
+			Distance max_edge;
+			const Distance* closest = &max_edge;
 			
 			// select closest seed among neighbours
-			for (const dist_t* edge = distances.begin(obj); edge < distances.end(obj); ++edge) {
+			for (const Distance* edge = distances.begin(obj); edge < distances.end(obj); ++edge) {
 				
-				if (seeds2clusters.count(edge->u.s.hi) && edge->d < closest->d) {
+				if (seeds2clusters.count(edge->get_id()) && edge->get_d() < closest->get_d()) {
 					closest = edge;
 				}
 			}
 			
 			// if distance to seed is below threshold
-			if (closest->d <= threshold) {
-				assignments[obj] = seeds2clusters[closest->u.s.hi];
+			if (closest->get_d() <= threshold) {
+				assignments[obj] = seeds2clusters[closest->get_id()];
 			}
 			else {
 				int cluster_id = seeds2clusters.size();
diff --git a/src/version.h b/src/version.h
index 4c58389..9700723 100644
--- a/src/version.h
+++ b/src/version.h
@@ -8,12 +8,18 @@
 //
 // *******************************************************************************************
 
-#define VERSION		"1.0.3"
-#define DATE		"2024-09-12"
+#define VERSION		"1.1.0"
+#define DATE		"2024-09-30"
 
 
 /********* Version history *********
 
+ 1.1.0 (2024-09-30)
+ Memory optimizatons:
+* Row identifier removed from dist_t structure.
+* Object identifiers in complete linkage stored as 32-bit integers.
+* For some clustering algorithms only edges are stored (without distances). 
+
  1.0.3 (2024-09-12)
  Fixes in building scripts.